내가 나름대로 알고 있는 gdb 에 대한 기술적인 내용들을 정리했다.
워낙 gdb 가 내용이 방대한 프로그램이기 때문에 모두 다룰 수는 없고 여기서는 새롭게 알게된 것들을 중심으로 FAQ 형식으로 설명하도록 하겠다.

FAQ

ui_out Functions 는 무엇이고 어떻게 사용하는가

gdb 의 출력코드를 추상화(abstract) 시켜줌으로서, 프로그래머가 출력 내부 루틴을 알지 못하더라도 원하는 출력 형태를 만들 수 있다.
바로 이런 특징이 ddd 와 같은 front-end 프로그램을 만들 수 있는 계기를 제공한다. 여기서는 breakpoint 의 정보를 출력해주는 루틴을 예로 들어 설명하겠다.

(gdb) info breakpoints
Num Type           Disp Enb Address    What
1   breakpoint     keep y   0x08048472 in main at main.c:24

위와 같은 결과가 출력되었을 때, 마치 표 처럼 Num, Type, Disp, Enb, Address, What 항목에 맞게 출력되는 것을 볼 수 있다.

Num	Type	Disp	Enb	Address	What
1	breakpoint	keep	y	0x08048472	in main at main.c:24

앞에서 설명한 대로, 이 출력 형태을 마음대로 바꿀 수 있다. 예를 들면, 각 컬럼의 이름이나 정렬 방식을 변경할 수 있다.
이와 관련한 루틴은 gdb/breakpoint.c 파일의 breakpoints_info() 함수에 정의되어 있다. 'info breakpoint' 명령을 실행하면, 호출되는 함수이다.

static void
breakpoints_info (char *bnum_exp, int from_tty)
{
  int bnum = -1;
 
  if (bnum_exp)
    bnum = parse_and_eval_long (bnum_exp);
 
  breakpoint_1 (bnum, 0);   
}

다음은 breakpoint_1() 함수의 내용이다.

/* Print information on user settable breakpoint (watchpoint, etc)
   number BNUM.  If BNUM is -1 print all user settable breakpoints.
   If ALLFLAG is non-zero, include non- user settable breakpoints. */
 
static void
breakpoint_1 (int bnum, int allflag)
{
  struct breakpoint *b;
  CORE_ADDR last_addr = (CORE_ADDR) -1;
  int nr_printable_breakpoints;
  struct cleanup *bkpttbl_chain;
 
  /* Compute the number of rows in the table. */
  nr_printable_breakpoints = 0;
  ALL_BREAKPOINTS (b)
    if (bnum == -1
	|| bnum == b->number)
      {
	if (allflag || user_settable_breakpoint (b))
	  nr_printable_breakpoints++;
      }
 
  if (addressprint)
    bkpttbl_chain 
      = make_cleanup_ui_out_table_begin_end (uiout, 6, nr_printable_breakpoints,
                                             "BreakpointTable");   /* 테이블 생성(총 6 개 컬럼 지정) */
  else
    bkpttbl_chain 
      = make_cleanup_ui_out_table_begin_end (uiout, 5, nr_printable_breakpoints,
                                             "BreakpointTable");   
 
  if (nr_printable_breakpoints > 0)
    annotate_breakpoints_headers ();
  if (nr_printable_breakpoints > 0)
    annotate_field (0);
  ui_out_table_header (uiout, 3, ui_left, "number", "Num");		/* 1 번째 컬럼의 이름과 속성을 정의 */
  if (nr_printable_breakpoints > 0)
    annotate_field (1);
  ui_out_table_header (uiout, 14, ui_left, "type", "Type");		/* 2 번째 컬럼의 이름과 속성을 정의 */
  if (nr_printable_breakpoints > 0)
    annotate_field (2);
  ui_out_table_header (uiout, 4, ui_left, "disp", "Disp");		/* 3 번째 컬럼의 이름과 속성을 정의 */
  if (nr_printable_breakpoints > 0)
    annotate_field (3);
  ui_out_table_header (uiout, 3, ui_left, "enabled", "Enb");	/* 4 번째 컬럼의 이름과 속성을 정의 */
  if (addressprint)
	{
	  if (nr_printable_breakpoints > 0)
	    annotate_field (4);
	  if (TARGET_ADDR_BIT <= 32)
	    ui_out_table_header (uiout, 10, ui_left, "addr", "Address");/* 5 번째 컬럼의 이름과 속성을 정의 */
	  else
	    ui_out_table_header (uiout, 18, ui_left, "addr", "Address");
	}
  if (nr_printable_breakpoints > 0)
    annotate_field (5);
  ui_out_table_header (uiout, 40, ui_noalign, "what", "What");	/* 6 번째 컬럼의 이름과 속성을 정의 */
  ui_out_table_body (uiout);
  if (nr_printable_breakpoints > 0)
    annotate_breakpoints_table ();
 
  ALL_BREAKPOINTS (b)
    if (bnum == -1
	|| bnum == b->number)
      {
	/* We only print out user settable breakpoints unless the
	   allflag is set. */
	if (allflag || user_settable_breakpoint (b))
	  print_one_breakpoint (b, &last_addr);     /* 각 컬럼에 출력할 데이터들을 지정해서 뿌려주는 함수 */
      }
 
  do_cleanups (bkpttbl_chain);
 
  if (nr_printable_breakpoints == 0)
    {
      if (bnum == -1)   /* breakpoint 의 결과가 없으면, 다음을 출력 */
	ui_out_message (uiout, 0, "No breakpoints or watchpoints.\n");
      else
	ui_out_message (uiout, 0, "No breakpoint or watchpoint number %d.\n",
			bnum);
    }
  else
    {
      /* Compare against (CORE_ADDR)-1 in case some compiler decides
	 that a comparison of an unsigned with -1 is always false.  */
      if (last_addr != (CORE_ADDR) -1)
	set_next_address (last_addr);
    }
 
  /* FIXME? Should this be moved up so that it is only called when
     there have been breakpoints? */
  annotate_breakpoints_table_end ();
}

다음은 print_one_breakpoint() 함수이다.

/* Print B to gdb_stdout. */
static void
print_one_breakpoint (struct breakpoint *b,
		      CORE_ADDR *last_addr)
{
  struct command_line *l;
  struct symbol *sym;
  struct ep_type_description
    {
      enum bptype type;
      char *description;
    };
  static struct ep_type_description bptypes[] =
  {
    {bp_none, "?deleted?"},
    {bp_breakpoint, "breakpoint"},
    {bp_hardware_breakpoint, "hw breakpoint"},
    {bp_until, "until"},
    {bp_finish, "finish"},
    {bp_watchpoint, "watchpoint"},
    {bp_hardware_watchpoint, "hw watchpoint"},
    {bp_read_watchpoint, "read watchpoint"},
    {bp_access_watchpoint, "acc watchpoint"},
    {bp_longjmp, "longjmp"},
    {bp_longjmp_resume, "longjmp resume"},
    {bp_step_resume, "step resume"},
    {bp_through_sigtramp, "sigtramp"},
    {bp_watchpoint_scope, "watchpoint scope"},
    {bp_call_dummy, "call dummy"},
    {bp_shlib_event, "shlib events"},
    {bp_thread_event, "thread events"},
    {bp_overlay_event, "overlay events"},
    {bp_catch_load, "catch load"},
    {bp_catch_unload, "catch unload"},
    {bp_catch_fork, "catch fork"},
    {bp_catch_vfork, "catch vfork"},
    {bp_catch_exec, "catch exec"},
    {bp_catch_catch, "catch catch"},
    {bp_catch_throw, "catch throw"}
  };
 
  static char *bpdisps[] =
  {"del", "dstp", "dis", "keep"};
  static char bpenables[] = "nynny";
  char wrap_indent[80];
  struct ui_stream *stb = ui_out_stream_new (uiout);
  struct cleanup *old_chain = make_cleanup_ui_out_stream_delete (stb);
  struct cleanup *bkpt_chain;
 
  annotate_record ();
  bkpt_chain = make_cleanup_ui_out_tuple_begin_end (uiout, "bkpt");
 
  /* 1 번째 컬럼 설정 */
  annotate_field (0);                      
  ui_out_field_int (uiout, "number", b->number);   /* 1 번째 컬럼에 출력할 데이터 설정(구조체 b 의 number 값) */
 
  /* 2 번째 컬럼 설정 */
  annotate_field (1);  /* 2 번째 컬럼 설정 */
  if (((int) b->type > (sizeof (bptypes) / sizeof (bptypes[0])))
      || ((int) b->type != bptypes[(int) b->type].type))
    internal_error (__FILE__, __LINE__,
		    _("bptypes table does not describe type #%d."),
		    (int) b->type);
  ui_out_field_string (uiout, "type", bptypes[(int) b->type].description);  /* 2 번째 컬럼에 출력할 데이터 설정(구조체 b 의 type.description) */
 
  /* 3 번째 컬럼 설정 */
  annotate_field (2);
  ui_out_field_string (uiout, "disp", bpdisps[(int) b->disposition]);   /* 3 번째 컬럼에 출력할 데이터 설정 (b->disposition) */
 
  /* 4 번째 컬럼 설정 */
  annotate_field (3);
  ui_out_field_fmt (uiout, "enabled", "%c", bpenables[(int) b->enable_state]);
  ui_out_spaces (uiout, 2);
 
  /* 5 and 6 번째 컬럼 설정 */
  strcpy (wrap_indent, "                           ");
  if (addressprint)
    {
      if (TARGET_ADDR_BIT <= 32)
	strcat (wrap_indent, "           ");
      else
	strcat (wrap_indent, "                   ");
    }
 
  if (b->ops != NULL && b->ops->print_one != NULL)
    b->ops->print_one (b, last_addr);
  else
    switch (b->type)    /* type 에 따라서 출력 형식을 지정  */
      {
      case bp_none:
	internal_error (__FILE__, __LINE__,
			_("print_one_breakpoint: bp_none encountered\n"));
	break;
 
      case bp_watchpoint:
      case bp_hardware_watchpoint:
      case bp_read_watchpoint:
      case bp_access_watchpoint:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	print_expression (b->exp, stb->stream);
	ui_out_field_stream (uiout, "what", stb);
	break;
 
      case bp_catch_load:
      case bp_catch_unload:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	if (b->dll_pathname == NULL)
	  {
	    ui_out_field_string (uiout, "what", "<any library>");
	    ui_out_spaces (uiout, 1);
	  }
	else
	  {
	    ui_out_text (uiout, "library \"");
	    ui_out_field_string (uiout, "what", b->dll_pathname);
	    ui_out_text (uiout, "\" ");
	  }
	break;
 
      case bp_catch_fork:
      case bp_catch_vfork:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	if (b->forked_inferior_pid != 0)
	  {
	    ui_out_text (uiout, "process ");
	    ui_out_field_int (uiout, "what", b->forked_inferior_pid);
	    ui_out_spaces (uiout, 1);
	  }
	break;
 
      case bp_catch_exec:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	if (b->exec_pathname != NULL)
	  {
	    ui_out_text (uiout, "program \"");
	    ui_out_field_string (uiout, "what", b->exec_pathname);
	    ui_out_text (uiout, "\" ");
	  }
	break;
 
      case bp_catch_catch:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	ui_out_field_string (uiout, "what", "exception catch");
	ui_out_spaces (uiout, 1);
	break;
 
      case bp_catch_throw:
	/* Field 4, the address, is omitted (which makes the columns
	   not line up too nicely with the headers, but the effect
	   is relatively readable).  */
	if (addressprint)
	  ui_out_field_skip (uiout, "addr");
	annotate_field (5);
	ui_out_field_string (uiout, "what", "exception throw");
	ui_out_spaces (uiout, 1);
	break;
 
      case bp_breakpoint:
      case bp_hardware_breakpoint:
      case bp_until:
      case bp_finish:
      case bp_longjmp:
      case bp_longjmp_resume:
      case bp_step_resume:
      case bp_through_sigtramp:
      case bp_watchpoint_scope:
      case bp_call_dummy:
      case bp_shlib_event:
      case bp_thread_event:
      case bp_overlay_event:
	if (addressprint)
	  {
	    annotate_field (4);
	    if (b->pending)
	      ui_out_field_string (uiout, "addr", "<PENDING>");
	    else
	      ui_out_field_core_addr (uiout, "addr", b->loc->address);
	  }
	annotate_field (5);
	*last_addr = b->loc->address;
	if (b->source_file)
	  {
	    sym = find_pc_sect_function (b->loc->address, b->loc->section);
	    if (sym)
	      {
		ui_out_text (uiout, "in ");
		ui_out_field_string (uiout, "func",
				     SYMBOL_PRINT_NAME (sym));
		ui_out_wrap_hint (uiout, wrap_indent);
		ui_out_text (uiout, " at ");
	      }
	    ui_out_field_string (uiout, "file", b->source_file);
	    ui_out_text (uiout, ":");
 
            if (ui_out_is_mi_like_p (uiout))
              {
                struct symtab_and_line sal = find_pc_line (b->loc->address, 0);
                char *fullname = symtab_to_fullname (sal.symtab);
 
                if (fullname)
                  ui_out_field_string (uiout, "fullname", fullname);
              }
 
	    ui_out_field_int (uiout, "line", b->line_number);
	  }
	else if (b->pending)
	  {
	    ui_out_field_string (uiout, "pending", b->addr_string);
	  }
	else
	  {
	    print_address_symbolic (b->loc->address, stb->stream, demangle, "");
	    ui_out_field_stream (uiout, "at", stb);
	  }
	break;
      }
 
  if (b->thread != -1)
    {
      /* FIXME: This seems to be redundant and lost here; see the
	 "stop only in" line a little further down. */
      ui_out_text (uiout, " thread ");
      ui_out_field_int (uiout, "thread", b->thread);
    }
 
  ui_out_text (uiout, "\n");
 
  if (frame_id_p (b->frame_id))
    {
      annotate_field (6);
      ui_out_text (uiout, "\tstop only in stack frame at ");
      /* FIXME: cagney/2002-12-01: Shouldn't be poeking around inside
         the frame ID.  */
      ui_out_field_core_addr (uiout, "frame", b->frame_id.stack_addr);
      ui_out_text (uiout, "\n");
    }
 
  if (b->cond)
    {
      annotate_field (7);
      ui_out_text (uiout, "\tstop only if ");
      print_expression (b->cond, stb->stream);
      ui_out_field_stream (uiout, "cond", stb);
      ui_out_text (uiout, "\n");
    }
 
  if (b->pending && b->cond_string)
    {
      annotate_field (7);
      ui_out_text (uiout, "\tstop only if ");
      ui_out_field_string (uiout, "cond", b->cond_string);
      ui_out_text (uiout, "\n");
    }
 
  if (b->thread != -1)
    {
      /* FIXME should make an annotation for this */
      ui_out_text (uiout, "\tstop only in thread ");
      ui_out_field_int (uiout, "thread", b->thread);
      ui_out_text (uiout, "\n");
    }
 
  if (show_breakpoint_hit_counts && b->hit_count)
    {
      /* FIXME should make an annotation for this */
      if (ep_is_catchpoint (b))
	ui_out_text (uiout, "\tcatchpoint");
      else
	ui_out_text (uiout, "\tbreakpoint");
      ui_out_text (uiout, " already hit ");
      ui_out_field_int (uiout, "times", b->hit_count);
      if (b->hit_count == 1)
	ui_out_text (uiout, " time\n");
      else
	ui_out_text (uiout, " times\n");
    }
 
  /* Output the count also if it is zero, but only if this is
     mi. FIXME: Should have a better test for this. */
  if (ui_out_is_mi_like_p (uiout))
    if (show_breakpoint_hit_counts && b->hit_count == 0)
      ui_out_field_int (uiout, "times", b->hit_count);
 
  if (b->ignore_count)
    {
      annotate_field (8);
      ui_out_text (uiout, "\tignore next ");
      ui_out_field_int (uiout, "ignore", b->ignore_count);
      ui_out_text (uiout, " hits\n");
    }
 
  if ((l = b->commands))
    {
      struct cleanup *script_chain;
 
      annotate_field (9);
      script_chain = make_cleanup_ui_out_tuple_begin_end (uiout, "script");
      print_command_lines (uiout, l, 4);
      do_cleanups (script_chain);
    }
  do_cleanups (bkpt_chain);
  do_cleanups (old_chain);
}

필자의 이해를 돕기 위해서 몇 가지 테스트를 해봤다.

  bkpttbl_chain 
      = make_cleanup_ui_out_table_begin_end (uiout, 3, nr_printable_breakpoints,
                                             "BreakpointTable");   /* 3 으로 수정 */

위와 같이 처음에 테이블을 만들 때, 3 으로 수정한 하고, 뒤에서 6 개의 컬럼을 정의하면 수행 도중 에러가 발생한다.
이번에는 ui_out_table_header 함수의 4 번째 인자인 col_name 에 임의값 'TESTXX' 을 넣었다.

  ui_out_table_header (uiout, 3, ui_left, "TESTXX", "Num");		/* 1 */

수행 결과, 예전과 같은 결과가 나오는 것으로 봐서 별 의미 없는 값이다.
아래 함수 역시, 수행 결과 마찬가지 였다.

  ui_out_field_string (uiout, "TESTXX", bptypes[(int) b->type].description);

3 가지 타입의 Symbol table 은 무엇이고, 어떻게 만들어지는가

gdb 는 심볼 정보를 나타내기 위해서 3 가지 타입의 심볼 테이블을 가진다.

Full symbol tables(symtabs) : 읽어들인 symbol 파일의 symbol 과 address 에 관한 전체 정보를 가짐
Partial symbol tables(psymtabs) : 전체 symbol table 중에서 세부적인 부분을 debugging 할 때 실행파일의 debuggging 정보를 빠르게 전달 되도록 한다. 일부 symbol table 을 다시 읽어야할 필요가 있다면, 사용자가 이에 대한 delay 를 알 수 없도록 여러 번 작은 단위로 실행됨
Minimal symbol tables(msymtabs) : Non-debugging symbols 으로 부터 저장된 정보를 가짐

hello world 를 출력하는 프로그램이 있다고 하자. 이 파일을 디버깅하기 위해서는 gdb 에서 데이터 값을 읽어야 한다.

(gdb) list
1       #include <stdio.h>
2       int main(void)
3       {
4         printf("hello!!");
5         return 0;
6       }

심볼테이블을 살펴보기 위해 list 명령어를 이용한다. 이 때는 gdb/cli/cli-cmds.c 파일의 list_command 함수가 호출된다.

static void
list_command (char *arg, int from_tty)
{
  struct symtabs_and_lines sals, sals_end;
  struct symtab_and_line sal = { 0 };
  struct symtab_and_line sal_end = { 0 };
  struct symtab_and_line cursal = { 0 };
  struct symbol *sym;
  char *arg1;
  int no_end = 1;
  int dummy_end = 0;
  int dummy_beg = 0;
  int linenum_beg = 0;
  char *p;
 
  /* Pull in the current default source line if necessary */
  if (arg == 0 || arg[0] == '+' || arg[0] == '-')
    {
      set_default_source_symtab_and_line ();
      cursal = get_current_source_symtab_and_line ();
    }
 
  /* "l" or "l +" lists next ten lines.  */
 
  if (arg == 0 || strcmp (arg, "+") == 0)
    {
      print_source_lines (cursal.symtab, cursal.line,
			  cursal.line + get_lines_to_list (), 0);
      return;
    }
 
  /* "l -" lists previous ten lines, the ones before the ten just listed.  */
  if (strcmp (arg, "-") == 0)
    {
      print_source_lines (cursal.symtab,
			  max (get_first_line_listed () - get_lines_to_list (), 1),
			  get_first_line_listed (), 0);
      return;
    }
 
  /* Now if there is only one argument, decode it in SAL
     and set NO_END.
     If there are two arguments, decode them in SAL and SAL_END
     and clear NO_END; however, if one of the arguments is blank,
     set DUMMY_BEG or DUMMY_END to record that fact.  */
 
  if (!have_full_symbols () && !have_partial_symbols ())
    error (_("No symbol table is loaded.  Use the \"file\" command."));
 
  arg1 = arg;
  if (*arg1 == ',')
    dummy_beg = 1;
  else
    {
      sals = decode_line_1 (&arg1, 0, 0, 0, 0, 0);
 
      if (!sals.nelts)
	return;			/*  C++  */
      if (sals.nelts > 1)
	{
	  ambiguous_line_spec (&sals);
	  xfree (sals.sals);
	  return;
	}
 
      sal = sals.sals[0];
      xfree (sals.sals);
    }
 
  /* Record whether the BEG arg is all digits.  */
 
  for (p = arg; p != arg1 && *p >= '0' && *p <= '9'; p++);
  linenum_beg = (p == arg1);
 
  while (*arg1 == ' ' || *arg1 == '\t')
    arg1++;
  if (*arg1 == ',')
    {
      no_end = 0;
      arg1++;
      while (*arg1 == ' ' || *arg1 == '\t')
	arg1++;
      if (*arg1 == 0)
	dummy_end = 1;
      else
	{
	  if (dummy_beg)
	    sals_end = decode_line_1 (&arg1, 0, 0, 0, 0, 0);
	  else
	    sals_end = decode_line_1 (&arg1, 0, sal.symtab, sal.line, 0, 0);
	  if (sals_end.nelts == 0)
	    return;
	  if (sals_end.nelts > 1)
	    {
	      ambiguous_line_spec (&sals_end);
	      xfree (sals_end.sals);
	      return;
	    }
	  sal_end = sals_end.sals[0];
	  xfree (sals_end.sals);
	}
    }
 
  if (*arg1)
    error (_("Junk at end of line specification."));
 
  if (!no_end && !dummy_beg && !dummy_end
      && sal.symtab != sal_end.symtab)
    error (_("Specified start and end are in different files."));
  if (dummy_beg && dummy_end)
    error (_("Two empty args do not say what lines to list."));
 
  /* if line was specified by address,
     first print exactly which line, and which file.
     In this case, sal.symtab == 0 means address is outside
     of all known source files, not that user failed to give a filename.  */
  if (*arg == '*')
    {
      if (sal.symtab == 0)
	/* FIXME-32x64--assumes sal.pc fits in long.  */
	error (_("No source file for address %s."),
	       hex_string ((unsigned long) sal.pc));
      sym = find_pc_function (sal.pc);
      if (sym)
	{
	  deprecated_print_address_numeric (sal.pc, 1, gdb_stdout);
	  printf_filtered (" is in ");
	  fputs_filtered (SYMBOL_PRINT_NAME (sym), gdb_stdout);
	  printf_filtered (" (%s:%d).\n", sal.symtab->filename, sal.line);
	}
      else
	{
	  deprecated_print_address_numeric (sal.pc, 1, gdb_stdout);
	  printf_filtered (" is at %s:%d.\n",
			   sal.symtab->filename, sal.line);
	}
    }
 
  /* If line was not specified by just a line number,
     and it does not imply a symtab, it must be an undebuggable symbol
     which means no source code.  */
 
  if (!linenum_beg && sal.symtab == 0)
    error (_("No line number known for %s."), arg);
 
  /* If this command is repeated with RET,
     turn it into the no-arg variant.  */
 
  if (from_tty)
    *arg = 0;
 
  if (dummy_beg && sal_end.symtab == 0)
    error (_("No default source file yet.  Do \"help list\"."));
  if (dummy_beg)
    print_source_lines (sal_end.symtab,
			max (sal_end.line - (get_lines_to_list () - 1), 1),
			sal_end.line + 1, 0);
  else if (sal.symtab == 0)
    error (_("No default source file yet.  Do \"help list\"."));
  else if (no_end)
    {
      int first_line = sal.line - get_lines_to_list () / 2;
 
      if (first_line < 1) first_line = 1;
 
      print_source_lines (sal.symtab,
		          first_line,
			  first_line + get_lines_to_list (),
			  0);
    }
  else
    print_source_lines (sal.symtab, sal.line,
			(dummy_end
			 ? sal.line + get_lines_to_list ()
			 : sal_end.line + 1),
			0);
}

다음은 symtab 구조체이다.

/* Each source file or header is represented by a struct symtab. 
   These objects are chained through the `next' field.  */
 
struct symtab
{
 
  /* Chain of all existing symtabs.  */
 
  struct symtab *next;
 
  /* List of all symbol scope blocks for this symtab.  May be shared
     between different symtabs (and normally is for all the symtabs
     in a given compilation unit).  */
 
  struct blockvector *blockvector;
 
  /* Table mapping core addresses to line numbers for this file.
     Can be NULL if none.  Never shared between different symtabs.  */
 
  struct linetable *linetable;
 
  /* Section in objfile->section_offsets for the blockvector and
     the linetable.  Probably always SECT_OFF_TEXT.  */
 
  int block_line_section;
 
  /* If several symtabs share a blockvector, exactly one of them
     should be designated the primary, so that the blockvector
     is relocated exactly once by objfile_relocate.  */
 
  int primary;
 
  /* The macro table for this symtab.  Like the blockvector, this
     may be shared between different symtabs --- and normally is for
     all the symtabs in a given compilation unit.  */
  struct macro_table *macro_table;
 
  /* Name of this source file.  */
 
  char *filename;
 
  /* Directory in which it was compiled, or NULL if we don't know.  */
 
  char *dirname;
 
  /* This component says how to free the data we point to:
     free_contents => do a tree walk and free each object.
     free_nothing => do nothing; some other symtab will free
     the data this one uses.
     free_linetable => free just the linetable.  FIXME: Is this redundant
     with the primary field?  */
 
  enum free_code
  {
    free_nothing, free_contents, free_linetable
  }
  free_code;
 
  /* A function to call to free space, if necessary.  This is IN
     ADDITION to the action indicated by free_code.  */
 
  void (*free_func)(struct symtab *symtab);
 
  /* Total number of lines found in source file.  */
 
  int nlines;
 
  /* line_charpos[N] is the position of the (N-1)th line of the
     source file.  "position" means something we can lseek() to; it
     is not guaranteed to be useful any other way.  */
 
  int *line_charpos;
 
  /* Language of this source file.  */
 
  enum language language;
 
  /* String that identifies the format of the debugging information, such
     as "stabs", "dwarf 1", "dwarf 2", "coff", etc.  This is mostly useful
     for automated testing of gdb but may also be information that is
     useful to the user. */
 
  char *debugformat;
 
  /* String of version information.  May be zero.  */
 
  char *version;
 
  /* Full name of file as found by searching the source path.
     NULL if not yet known.  */
 
  char *fullname;
 
  /* Object file from which this symbol information was read.  */
 
  struct objfile *objfile;
 
};

다음은 psymtabs 구조체이다.

/* Each source file that has not been fully read in is represented by
   a partial_symtab.  This contains the information on where in the
   executable the debugging symbols for a specific file are, and a
   list of names of global symbols which are located in this file.
   They are all chained on partial symtab lists.
 
   Even after the source file has been read into a symtab, the
   partial_symtab remains around.  They are allocated on an obstack,
   objfile_obstack.  FIXME, this is bad for dynamic linking or VxWorks-
   style execution of a bunch of .o's.  */
 
struct partial_symtab
{
 
  /* Chain of all existing partial symtabs.  */
 
  struct partial_symtab *next;
 
  /* Name of the source file which this partial_symtab defines */
 
  char *filename;
 
  /* Full path of the source file.  NULL if not known.  */
 
  char *fullname;
 
  /* Directory in which it was compiled, or NULL if we don't know.  */
 
  char *dirname;
 
  /* Information about the object file from which symbols should be read.  */
 
  struct objfile *objfile;
 
  /* Set of relocation offsets to apply to each section.  */
 
  struct section_offsets *section_offsets;
 
  /* Range of text addresses covered by this file; texthigh is the
     beginning of the next section. */
 
  CORE_ADDR textlow;
  CORE_ADDR texthigh;
 
  /* Array of pointers to all of the partial_symtab's which this one
     depends on.  Since this array can only be set to previous or
     the current (?) psymtab, this dependency tree is guaranteed not
     to have any loops.  "depends on" means that symbols must be read
     for the dependencies before being read for this psymtab; this is
     for type references in stabs, where if foo.c includes foo.h, declarations
     in foo.h may use type numbers defined in foo.c.  For other debugging
     formats there may be no need to use dependencies.  */
 
  struct partial_symtab **dependencies;
 
  int number_of_dependencies;
 
  /* Global symbol list.  This list will be sorted after readin to
     improve access.  Binary search will be the usual method of
     finding a symbol within it. globals_offset is an integer offset
     within global_psymbols[].  */
 
  int globals_offset;
  int n_global_syms;
 
  /* Static symbol list.  This list will *not* be sorted after readin;
     to find a symbol in it, exhaustive search must be used.  This is
     reasonable because searches through this list will eventually
     lead to either the read in of a files symbols for real (assumed
     to take a *lot* of time; check) or an error (and we don't care
     how long errors take).  This is an offset and size within
     static_psymbols[].  */
 
  int statics_offset;
  int n_static_syms;
 
  /* Pointer to symtab eventually allocated for this source file, 0 if
     !readin or if we haven't looked for the symtab after it was readin.  */
 
  struct symtab *symtab;
 
  /* Pointer to function which will read in the symtab corresponding to
     this psymtab.  */
 
  void (*read_symtab) (struct partial_symtab *);
 
  /* Information that lets read_symtab() locate the part of the symbol table
     that this psymtab corresponds to.  This information is private to the
     format-dependent symbol reading routines.  For further detail examine
     the various symbol reading modules.  Should really be (void *) but is
     (char *) as with other such gdb variables.  (FIXME) */
 
  char *read_symtab_private;
 
  /* Non-zero if the symtab corresponding to this psymtab has been readin */
 
  unsigned char readin;
};

maintenance 를 이용한 분석

gdb 최신버전(6.6)의 경우, maintenance 명령어를 이용해서 symtabs 와 psymtabs 의 내용을 출력해 볼 수 있다.
여기서는 이것을 통해 symtabs 와 psymtabs 의 기능을 알아보도록 하겠다.

symtabs

디버깅 도중에 현재 symtabs 의 내용을 보고 싶으면, 아래와 같이 실행하면 된다.

(gdb) maintenance info symtabs

symtabs 는 list 명령어를 사용해서 심볼 테이블을 읽어들이지 않으면(= list 명령어를 사용하지 않으면) 생성되지 않는다.

#gdb a.out
(gdb) maintenance info symtabs 
(gdb)

이번에는 list 명령어를 사용해서 symtabs 가 출력된 것을 볼 수 있다.

#gdb a.out
(gdb) b main
(gdb) r
(gdb) list
(gdb) maintenance info symtabs 
{ objfile /usr/local/gdb/bin/testg ((struct objfile *) 0x830da30)
  { symtab test.c ((struct symtab *) 0x832af30)
    dirname /root
    fullname /root/test.c
    blockvector ((struct blockvector *) 0x832af18) (primary)
    debugformat DWARF 2
  }
}

symtabs 의 특징이라고 한다면, 한번 저장된 내용은 변하지 않는다는 것이다. 또한 현재 디버깅에 필요한 파일 자체에 대한 정보를 가지고 있다.
앞에서 예를 든 것은 하나의 파일로 만들어진 간단한 프로그램이지만, 예를 들어 총 3 개의 파일로 만든 프로그램의 경우(main.c, add.c, sub.c), 가장 먼저 디버깅이 시작되는 main.c 에 대한 symtabs 가 만들어지고, 함수가 호출되는 순서에 따라서 차례대로 add.c, sub.c 의 symtabs 가 만들어진다.

{ objfile /usr/local/gdb/bin/test_man ((struct objfile *) 0x830da30)
  { symtab sub.c ((struct symtab *) 0x83d67c0)
    dirname /root/arm
    fullname /root/arm/sub.c
    blockvector ((struct blockvector *) 0x83d67b0) (primary)
    debugformat DWARF 2
  }
  { symtab add.c ((struct symtab *) 0x83d3d18)
    dirname /root/arm
    fullname /root/arm/add.c
    blockvector ((struct blockvector *) 0x83d3d08) (primary)
    debugformat DWARF 2
  }
  { symtab main.c ((struct symtab *) 0x832d8f8)
    dirname /root/arm
    fullname /root/arm/main.c
    blockvector ((struct blockvector *) 0x832d8e8) (primary)
    debugformat DWARF 2
  }
}

blockvector 은 symtab 을 위한 모든 심볼 영역 block 의 리스트로서, 다른 symtabs 들 사이에서 공유된다.

psymtabs

디버깅 도중에 현재 psymtabs 의 내용을 보고 싶으면, 아래와 같이 실행하면 된다.

(gdb)  maintenance info psymtabs

symtab 과 다른 것이 있다면, list 명령어를 사용하지 않더라도 psymtab 을 출력한다는 점이다.

#gdb a.out
(gdb) maintenance info psymtabs 
{ objfile /usr/local/gdb/bin/testg ((struct objfile *) 0x830da50)
  { psymtab /usr/src/build/231499-i386/BUILD/glibc-2.3.2-20030313/build-i386-linux/csu/crtn.S
    ((struct partial_symtab *) 0x831c61c)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies (none)
  }
  { psymtab test.c ((struct partial_symtab *) 0x831c5c8)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies {
      psymtab test.c ((struct partial_symtab *) 0x831c498)
    }
  }
  { psymtab test.c ((struct partial_symtab *) 0x831c498)
    readin no
    fullname (null)
    text addresses 0x8048328 -- 0x80483ce
    globals (* (struct partial_symbol **) 0x82fb1a4 @ 3)
    statics (* (struct partial_symbol **) 0x83174b8 @ 108)
    dependencies (none)
  }
  { psymtab /usr/src/build/231499-i386/BUILD/glibc-2.3.2-20030313/build-i386-linux/csu/crti.S
    ((struct partial_symtab *) 0x831c3fc)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies (none)
  }
  { psymtab init.c ((struct partial_symtab *) 0x831be98)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (* (struct partial_symbol **) 0x82fb1a0 @ 1)
    statics (* (struct partial_symbol **) 0x8317348 @ 92)
    dependencies (none)
  }
  { psymtab ../sysdeps/i386/elf/start.S ((struct partial_symtab *) 0x831be30)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies {
      psymtab ../sysdeps/i386/elf/start.S ((struct partial_symtab *) 0x8317080)
    }
  }
  { psymtab ../sysdeps/i386/elf/start.S ((struct partial_symtab *) 0x8317080)
    readin no
    fullname (null)
    text addresses 0x8048278 -- 0x804829c
    globals (none)
    statics (none)
    dependencies (none)
  }
}

symtab 과 비교해봐도 내용이 확실히 많다. 이 테이블에서는 crtn.S, test.c, crti.S, init.c, start.S 에 대한 내용을 담고 있다. 여기서 디버깅 할 파일인 test.c 를 제외하고는 어떤 오브젝트 파일이든지 main 함수가 수행하기 전에 반드시 수행되어야 하는 파일들이다.
psymtabs 의 특징이라고 한다면 현재 루틴 실행시에 필요한 의존성을 갖는 모든 파일들(.c, .h)에 대한 정보를 가진다는 것이다.
예를 들어, 3 개의 파일로 만들어진 프로그램이 있다면, 디버깅 시에 psymtabs 에 모두 저장한다. 다음은 main 함수의 가장 마지막 행에서 psymtabs 를 출력한 것이다.

{ objfile /usr/local/gdb/bin/test_man ((struct objfile *) 0x830da50)
  { psymtab /usr/src/build/231499-i386/BUILD/glibc-2.3.2-20030313/build-i386-linux/csu/crtn.S
    ((struct partial_symtab *) 0x831f920)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies (none)
  }
  { psymtab main.c ((struct partial_symtab *) 0x831f8cc)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies {
      psymtab main.c ((struct partial_symtab *) 0x831f87c)
    }
  }
  { psymtab main.c ((struct partial_symtab *) 0x831f87c)
    readin yes
    fullname (null)
    text addresses 0x8048364 -- 0x80483c2
    globals (* (struct partial_symbol **) 0x82fb1ac @ 1)
    statics (* (struct partial_symbol **) 0x8305040 @ 108)
    dependencies (none)
  }
  { psymtab sub.c ((struct partial_symtab *) 0x831f828)
    readin no
    fullname (null)
    text addresses 0x0 -- 0x0
    globals (none)
    statics (none)
    dependencies {
      psymtab sub.c ((struct partial_symtab *) 0x831f7d8)
    }
  }
  { psymtab sub.c ((struct partial_symtab *) 0x831f7d8)
    readin yes
    fullname (null)
    text addresses 0x8048344 -- 0x8048361
    globals (* (struct partial_symbol **) 0x82fb1a8 @ 1)
    statics (* (struct partial_symbol **) 0x8304e90 @ 108)
    dependencies (none)
  }
  { psymtab add.c ((struct partial_symtab *) 0x831f784)
    text addresses 0x8048278 -- 0x804829c
    globals (none)
    statics (none)
    dependencies (none)
  }
}

위에서 출력된 결과에 대한 설명을 하자면, 아래의 표와 같다.

readin	해당파일을 수행 했는지 여부를 판단하는 의미로, 만일 yes 로 되어있다면, 해당 파일의 함수를 실제로 수행했다는 의미이다, 위의 예에서는 add 함수(add.c) 와 sub 함수(sub.c) 를 수행했기 때문에, yes 가 되었다.
globals	global symbol list 이다. 이 것은 심볼 파일을 읽은 후에 정렬된다. binary 검색은 이 것에서 심볼 검색의 유용한 방법이다. globals_offset 은 global_psymbols[] 안에 있는 정수 옵셋이다.
statics	심볼을 찾기 위해서 심볼 파일을 읽어들인 후에 정렬하지 않는다. 전부를 찾는 데 사용되어진다. 결국에 파일 심볼을 읽거나, 에러를 잡는데 많은 시간이 소비되기 때문에 적당하다. 이 것은 static_psymbols[] 안에 옵셋과 사이즈가 있다.

요약

써보고 나니, 너무 장황하게 설명한 것 같아서 제대로 이해를 했는지 모르겠다. 단순히 명령어를 이용해서 현재 테이블이 가지고 있는 정보를 보고 이해를 한 내용이라, 아무래도 부족한 점이 많이 있을 줄로 안다. gdb internal 문서에는 총 3 개의 테이블로 구성된다고 설명하고 있지만, 실제로 심볼 테이블과 관련있는 테이블은 symtabs 와 psymtabs 이다.
처음에는 symtabs(full symbol tables) 와 psymtabs(partial symbol tables) 이 각각, 심볼의 전체 와 부분을 저장하는 테이블로 알고 있었다.
하지만, 다음과 같이 요약할 수 있다.

symtab : 현재 디버깅하고 있는 파일의 정보를 저장, 여러 개의 파일로 나뉘어졌을 경우에는 각 파일이 실행될 때마다, 파일의 정보를 저장
psymtab : 디버깅하려는 프로그램이 실행하기 위해서 참조하는 파일들(.h, .c)의 정보를 모두 테이블에 각각 저장, 그리고 readin 변수를 이용해서 이미 수행했던 파일을 표시함. 또한 global 심볼 리스트와 static 심볼 리스트를 사용하여 검색 효율을 높임

ABI 가 무엇인가

Application Binary Interface 의 약자로서, ABI 는 application program 과 operating system 사이, 그것의 library 또는 application 의 component parts 사이의 low-level 인터페이스를 정의한다. ABI 는 source code 와 library 사이의 인터페이스를 정의하는 API(application programming interface) 와는 다르다.
API 는 같은 source code 를 API 를 지원하는 시스템에서 컴파일해야 하지만, 이에 반해 ABI 는 컴파일된 오브젝트 코드 함수가 호환성있는 ABI 를 사용하는 시스템에서 수정없이 실행된다. ABI 는 예를 들면, 어떻게 함수 인자를 전달하고 리턴값을 받아오는지를 콘트롤하는 방법을 정의한다.(시스템 콜 number 와 어떻게 application 에서 시스템 콜을 operating system 에서 만들 수 있는지 그리고 ABI 를 지원하는 operating system 에서 오브젝트 파일의 바이너리 포맷, 프로그램 library)

ABI 의 경우, 예를 들어 Intel Binary Compatibility Standard(iBCS) 를 지원하는 시스템의 프로그램은 수정없이 다른 시스템에서 실행이 가능하다.
다른 ABI 은 C++ name decoration 과 같은 플랫폼 상의 컴파일러들 사이의 호출 규약은 표준화되었다. 그러나, cross-platform 호환성은 그렇지 않다.

Unix-like operating system 들 중에서 하나의 하드웨어 플랫폼 상에서 호환성이 없는 operating system 들이 많이 있다. 그들은 다른 시스템에 그들의 프로그램을 포팅하려는 application vendor 에 의해 요청되어 지는 노력을 줄이기 위해 ABI 표준화를 시도하고 있다. 그러나 이것은 리눅스 진영에서만 성공적으로 이루어지고 있다.

위 그림에서 마찬가지로, ABI 는 하드웨어 플랫폼과 operating system 을 합친 개념이다. API 를 한 단계 넘어서, application 에서 operating system 으로 호출하는 방법을 정의한다. ABI 는 API + 특별한 CPU 계열을 위한 machine language 로 정의된다.
API 는 runtime 호환성을 보장하지 않는다. 그러나 ABI 는 machine language, 또는 runtime, format 을 정의하고 있기 때문에 보장한다.

성능을 위해서 대부분의 컴파일러는 특정 메모리 영역에 레코드의 필드를 배치한다. 정확한 세부 내용은 언어의 종류와 컴파일러의 구현, CPU 에 따라서 달라지지만, 대부분의 컴파일러는 레코드에 할당된 메모리 내부에서 접근이 쉬운 위치에 필드를 배치한다. 예를 들어 80×86 에서, 인텔 ABI 를 따르는 컴파일러는 1 바이트 객체를 임의의 위치에 필드를 배치할 것이고, 워드는 짝수의 오프셋에, 더블 워드나 좀더 큰 오브젝트는 더블 워드의 오프셋에만 배치할 것이다.

비록 모든 80×86 의 컴파일러가 인텔 ABI 를 지원하는 것은 아니지만, 대부분의 컴파일러는 ABI 를 지원하기 때문에, 서로 다른 언어에 의해서 쓰여진 함수들 사이에서도 레코드를 공유할 수 있다. 다른 CPU 제조사들 역시 그들 자신의 프로세서를 위한 ABI 를 제공해서, 그 ABI 를 준수하는 프로그램끼리는 런타임에 바이너리 데이터를 공유할 수 있도록 해준다.

DDD 로 GDB 디버깅 하기

DDD 는 GDB 의 GUI Frontend 로서 사용자에 좀더 익숙한 인터페이스 제공한다. GDB 의 루틴을 분석하기 위해서는 GDB 를 GDB 로 돌려야 하는 아이러니한 상황이 발생해야만 한다.
http://www.gnu.org/software/ddd/ 에서 최신버전을 다운로드 한다. 앞서 설명했듯이 Frontend 이기 때문에 gdb 가 사전에 설치되어 있어야 한다.
ddd 를 설치했다면, gdb 를 gdb 로 돌려보자!! 설치 경로는 /usr/local/ddd 이다.
여기서는 이름 때문에 혼동이 일어날 수 있으므로 대소문자로 구분한다. gdb 로 gdb 를 디버깅하기 위해서는 몇 가지 필요한 것들이 있다.

GDB : 디버거, 여기서는 기존의 RPM 형태로 사전에 설치되어 있다. GDB 본래의 기능을 사용한다.
gdb : 루틴을 분석하기 위한 -g 옵션으로 컴파일 된 오브젝트 파일. gdb 소스를 직접 컴파일해서 사용한다.
hello : 'hello world' 를 출력하는 프로그램의 오브젝트 파일. gcc -g 옵션을 사용해서 만들었다.

gdb 와 hello 는 편의상 /usr/local/ddd/bin 아래에 저장한다. 참고로 X-Window manager 는 Gnome 을 사용한다.
모든 준비가 끝났으면, 디버깅을 시작한다.

#/usr/local/ddd/bin/ddd &

'File → Open Program' 에서 gdb 를 선택한다. 아래 입출력 창에 (gdb) 프롬프트가 나타날 것이다.

(gdb) set args hello
(gdb) b main
(gdb) r

위와 같이 입력한다. gdb/gdb.c 파일의 main() 함수에 breakpoint 가 잡혀 멈추면 제대로 수행하는 것이다.
다른 파일에 breakpoint 를 잡고 싶다면, 'File → Open Source' 에서 선택하면 된다.

마치며

GDB 핵심가이드 라는 제목이 무색해질 정도로 나름대로 궁금했던 내용을 정확하게 기술하려 여러가지 시도를 했었다.
결론은 GDB 를 이해하기에는 프로그램 코드를 이해하기 전에 우선적으로 여러가지 선수적으로 알아야 하는 지식(object format)들이 많이 있었다.
아무래도 소스코드를 따라가는 것만으로는 왜 이렇게 사용을 했는지 이해하기가 힘들었다.

나중에 시간이 허락한다면, 오브젝트 파일에서 디버깅 정보를 추출하는 방법에 대해서 알아봐야 하겠다.
추가적으로 gdb 를 이해하기 위한 문서들을 링크한 페이지는 http://sourceware.org/gdb/documentation/ 이다.

Table of Contents

FAQ