zoukankan      html  css  js  c++  java
  • 基于GCC和GDB的简单C程序的汇编分析和运行时堆栈分析

    学号:212

    一、从源代码文件到可执行文件

            从C文件到可执行文件,一般来说需要两步,先将每个C文件编译成.o文件,再把多个.o文件和链接库一起链接成可执行文件。但具体来说,其实是分为四步,下面以example.c为例进行说明。
    #define MYINT int
    
    short addend1 = 1;
    static int addend2 = 2;
    const static long addend3 = 3;
    
    static MYINT g(MYINT x)
    {
        return x + addend1;
    }
    
    static const MYINT f(MYINT x)
    {
        return g(x + addend2);
    }
    
    MYINT main(void)
    {
        return f(8) + addend3;
    }

            第一步: 预处理,进行宏替换等工作。执行gcc -E -o example.cpp example.c,得到example.cpp如下:

    # 1 "example.c"
    # 1 "<built-in>"
    # 1 "<命令行>"
    # 1 "example.c"
    
    short addend1 = 1;
    static int addend2 = 2;
    const static long addend3 = 3;
    
    static int g(int x)
    {
        return x + addend1;
    }
    
    static const int f(int x)
    {
        return g(x + addend2);
    }
    
    int main(void)
    {
        return f(8) + addend3;
    }

            第二步:将预处理文件编译成汇编文件。执行 gcc -x cpp-output -S -fno-asynchronous-unwind-tables -o example.s example.cpp,加入 -fno-asynchronous-unwind-tables是为了禁止生成.cfi代码。生成的汇编代码如下:

        .file    "example.c"       ; C文件的文件名
        .globl    addend1          ; 全局变量
        .data                      ; 数据段
                                   ; short addend1 = 1;开始
        .align 2                   ; 地址对齐,按2的整数倍对齐
        .type    addend1, @object  ; 类型是对象
        .size    addend1, 2        ; 占两个字节
    addend1:                       ; 起始地址
        .value    1                ; 初始值
                                   ; static int addend2 = 2;开始
        .align 4 
        .type    addend2, @object
        .size    addend2, 4
    addend2:
        .long    2
        .section    .rodata        ; 常量存储区开始
        .align 4
        .type    addend3, @object
        .size    addend3, 4
    addend3:
        .long    3
        .text                      ; 代码段开始
        .type    g, @function      ; 函数g
    g:                             ; g的起始地址
        pushl    %ebp              ; %ebp入栈
        movl    %esp, %ebp         ; 当前函数栈从%esp开始
        movzwl    addend1, %eax    ; 把short放入%eax
        cwtl
        addl    8(%ebp), %eax      ; int + short
        popl    %ebp
        ret
        .size    g, .-g
        .type    f, @function
    f:
        pushl    %ebp
        movl    %esp, %ebp
        subl    $4, %esp           ; 为调用g时传递参数准备空间
        movl    addend2, %eax      ; 在%eax中计算实参
        addl    8(%ebp), %eax
        movl    %eax, (%esp)       ; 实参入栈
        call    g
        leave
        ret
        .size    f, .-f
        .globl    main             ; main未加static,是全局可见的
        .type    main, @function
    main:
        pushl    %ebp
        movl    %esp, %ebp
        subl    $4, %esp
        movl    $8, (%esp)
        call    f
        movl    addend3, %edx
        addl    %edx, %eax
        leave
        ret
        .size    main, .-main
        .ident    "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
        .section    .note.GNU-stack,"",@progbits
      
            由汇编代码可见:1.未加static的全局变量和函数都生成了相应的.globl代码,表示是全局的;2.int和long是4字节的;3.const变量放在常量存储区.rodata处。
            第三步,将汇编代码编译成二进制目标文件,gcc -x assembler -c example.s。生成example.o文件,用objdump -D example.o察看,得到如下信息:
    example.o:     file format elf32-i386
    
    
    Disassembly of section .text:
    
    00000000 <g>:
       0:    55                       push   %ebp
       1:    89 e5                    mov    %esp,%ebp
       3:    0f b7 05 00 00 00 00     movzwl 0x0,%eax
       a:    98                       cwtl   
       b:    03 45 08                 add    0x8(%ebp),%eax
       e:    5d                       pop    %ebp
       f:    c3                       ret    
    
    00000010 <f>:
      10:    55                       push   %ebp
      11:    89 e5                    mov    %esp,%ebp
      13:    83 ec 04                 sub    $0x4,%esp
      16:    a1 04 00 00 00           mov    0x4,%eax
      1b:    03 45 08                 add    0x8(%ebp),%eax
      1e:    89 04 24                 mov    %eax,(%esp)
      21:    e8 da ff ff ff           call   0 <g>
      26:    c9                       leave  
      27:    c3                       ret    
    
    00000028 <main>:
      28:    55                       push   %ebp
      29:    89 e5                    mov    %esp,%ebp
      2b:    83 ec 04                 sub    $0x4,%esp
      2e:    c7 04 24 08 00 00 00     movl   $0x8,(%esp)
      35:    e8 d6 ff ff ff           call   10 <f>
      3a:    8b 15 00 00 00 00        mov    0x0,%edx
      40:    01 d0                    add    %edx,%eax
      42:    c9                       leave  
      43:    c3                       ret    
    
    Disassembly of section .data:
    
    00000000 <addend1>:
       0:    01 00                    add    %eax,(%eax)
        ...
    
    00000004 <addend2>:
       4:    02 00                    add    (%eax),%al
        ...
    
    Disassembly of section .rodata:
    
    00000000 <addend3>:
       0:    03 00                    add    (%eax),%eax
        ...
    
    Disassembly of section .comment:
    
    00000000 <.comment>:
       0:    00 47 43                 add    %al,0x43(%edi)
       3:    43                       inc    %ebx
       4:    3a 20                    cmp    (%eax),%ah
       6:    28 55 62                 sub    %dl,0x62(%ebp)
       9:    75 6e                    jne    79 <main+0x51>
       b:    74 75                    je     82 <main+0x5a>
       d:    2f                       das    
       e:    4c                       dec    %esp
       f:    69 6e 61 72 6f 20 34     imul   $0x34206f72,0x61(%esi),%ebp
      16:    2e 36 2e 33 2d 31 75     cs ss xor %cs:%ss:0x75627531,%ebp
      1d:    62 75 
      1f:    6e                       outsb  %ds:(%esi),(%dx)
      20:    74 75                    je     97 <main+0x6f>
      22:    35 29 20 34 2e           xor    $0x2e342029,%eax
      27:    36 2e 33 00              ss xor %cs:%ss:(%eax),%eax

            第四步,将目标代码编译成可执行文件, gcc -o example example.o。此时可以继续用objdump -D example > example.objdump察看,可见example.objdump文件有728行,已经加入了大量的代码,其中我们自己写的部分是:

    080483b4 <g>:
     80483b4:    55                       push   %ebp
     80483b5:    89 e5                    mov    %esp,%ebp
     80483b7:    0f b7 05 10 a0 04 08     movzwl 0x804a010,%eax
     80483be:    98                       cwtl   
     80483bf:    03 45 08                 add    0x8(%ebp),%eax
     80483c2:    5d                       pop    %ebp
     80483c3:    c3                       ret    
    
    080483c4 <f>:
     80483c4:    55                       push   %ebp
     80483c5:    89 e5                    mov    %esp,%ebp
     80483c7:    83 ec 04                 sub    $0x4,%esp
     80483ca:    a1 14 a0 04 08           mov    0x804a014,%eax
     80483cf:    03 45 08                 add    0x8(%ebp),%eax
     80483d2:    89 04 24                 mov    %eax,(%esp)
     80483d5:    e8 da ff ff ff           call   80483b4 <g>
     80483da:    c9                       leave  
     80483db:    c3                       ret    
    
    080483dc <main>:
     80483dc:    55                       push   %ebp
     80483dd:    89 e5                    mov    %esp,%ebp
     80483df:    83 ec 04                 sub    $0x4,%esp
     80483e2:    c7 04 24 08 00 00 00     movl   $0x8,(%esp)
     80483e9:    e8 d6 ff ff ff           call   80483c4 <f>
     80483ee:    8b 15 d0 84 04 08        mov    0x80484d0,%edx
     80483f4:    01 d0                    add    %edx,%eax
     80483f6:    c9                       leave  
     80483f7:    c3                       ret    
     80483f8:    90                       nop
     80483f9:    90                       nop
     80483fa:    90                       nop
     80483fb:    90                       nop
     80483fc:    90                       nop
     80483fd:    90                       nop
     80483fe:    90                       nop
     80483ff:    90                       nop
    
    ...
    ...
    ...
    
    Disassembly of section .data:
    
    0804a008 <__data_start>:
     804a008:    00 00                    add    %al,(%eax)
        ...
    
    0804a00c <__dso_handle>:
     804a00c:    00 00                    add    %al,(%eax)
        ...
    
    0804a010 <addend1>:
     804a010:    01 00                    add    %eax,(%eax)
        ...
    
    0804a014 <addend2>:
     804a014:    02 00                    add    (%eax),%al
        ...
    
    Disassembly of section .bss:
    
    0804a018 <completed.6159>:
     804a018:    00 00                    add    %al,(%eax)
        ...
    
    0804a01c <dtor_idx.6161>:
     804a01c:    00 00                    add    %al,(%eax)
        ...
            可见此时的代码已经有了它运行时的实际地址,并且.rodata段也已经不存在了。
            然后还可以用readelf -a example > example.elf 察看该可执行文件的ELF头部信息,共221行,这里只摘录前57行:
    ELF Header:
      Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 
      Class:                             ELF32
      Data:                              2's complement, little endian
      Version:                           1 (current)
      OS/ABI:                            UNIX - System V
      ABI Version:                       0
      Type:                              EXEC (Executable file)
      Machine:                           Intel 80386
      Version:                           0x1
      Entry point address:               0x8048300
      Start of program headers:          52 (bytes into file)
      Start of section headers:          4416 (bytes into file)
      Flags:                             0x0
      Size of this header:               52 (bytes)
      Size of program headers:           32 (bytes)
      Number of program headers:         9
      Size of section headers:           40 (bytes)
      Number of section headers:         30
      Section header string table index: 27
    
    Section Headers:
      [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
      [ 0]                   NULL            00000000 000000 000000 00      0   0  0
      [ 1] .interp           PROGBITS        08048154 000154 000013 00   A  0   0  1
      [ 2] .note.ABI-tag     NOTE            08048168 000168 000020 00   A  0   0  4
      [ 3] .note.gnu.build-i NOTE            08048188 000188 000024 00   A  0   0  4
      [ 4] .gnu.hash         GNU_HASH        080481ac 0001ac 000020 04   A  5   0  4
      [ 5] .dynsym           DYNSYM          080481cc 0001cc 000040 10   A  6   1  4
      [ 6] .dynstr           STRTAB          0804820c 00020c 000045 00   A  0   0  1
      [ 7] .gnu.version      VERSYM          08048252 000252 000008 02   A  5   0  2
      [ 8] .gnu.version_r    VERNEED         0804825c 00025c 000020 00   A  6   1  4
      [ 9] .rel.dyn          REL             0804827c 00027c 000008 08   A  5   0  4
      [10] .rel.plt          REL             08048284 000284 000010 08   A  5  12  4
      [11] .init             PROGBITS        08048294 000294 00002e 00  AX  0   0  4
      [12] .plt              PROGBITS        080482d0 0002d0 000030 04  AX  0   0 16
      [13] .text             PROGBITS        08048300 000300 0001ac 00  AX  0   0 16
      [14] .fini             PROGBITS        080484ac 0004ac 00001a 00  AX  0   0  4
      [15] .rodata           PROGBITS        080484c8 0004c8 00000c 00   A  0   0  4
      [16] .eh_frame_hdr     PROGBITS        080484d4 0004d4 00002c 00   A  0   0  4
      [17] .eh_frame         PROGBITS        08048500 000500 0000a4 00   A  0   0  4
      [18] .ctors            PROGBITS        08049f14 000f14 000008 00  WA  0   0  4
      [19] .dtors            PROGBITS        08049f1c 000f1c 000008 00  WA  0   0  4
      [20] .jcr              PROGBITS        08049f24 000f24 000004 00  WA  0   0  4
      [21] .dynamic          DYNAMIC         08049f28 000f28 0000c8 08  WA  6   0  4
      [22] .got              PROGBITS        08049ff0 000ff0 000004 04  WA  0   0  4
      [23] .got.plt          PROGBITS        08049ff4 000ff4 000014 04  WA  0   0  4
      [24] .data             PROGBITS        0804a008 001008 000010 00  WA  0   0  4
      [25] .bss              NOBITS          0804a018 001018 000008 00  WA  0   0  4
      [26] .comment          PROGBITS        00000000 001018 00002a 01  MS  0   0  1
      [27] .shstrtab         STRTAB          00000000 001042 0000fc 00      0   0  1
      [28] .symtab           SYMTAB          00000000 0015f0 000450 10     29  49  4
      [29] .strtab           STRTAB          00000000 001a40 000209 00      0   0  1
    Key to Flags:
      W (write), A (alloc), X (execute), M (merge), S (strings)
      I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
      O (extra OS processing required) o (OS specific), p (processor specific)

    二、运行时堆栈分析

            为了使用gdb进行调试,用gcc - g example.c -o example重新编译代码,然后gdb example进入gdb调试。

            在main函数入口处设置断点,运行程序,然后察看运行到的汇编指令、此时的寄存器数据和堆栈:

    (gdb) b 17
    Breakpoint 1 at 0x80483e2: file example.c, line 17.
    (gdb) r
    Starting program: /home/qpx/操作系统/example 
    
    Breakpoint 1, main () at example.c:19
    19        return f(8) + addend3;
    (gdb) disassemble 
    Dump of assembler code for function main:
       0x080483dc <+0>:    push   %ebp
       0x080483dd <+1>:    mov    %esp,%ebp
       0x080483df <+3>:    sub    $0x4,%esp
    => 0x080483e2 <+6>:    movl   $0x8,(%esp)
       0x080483e9 <+13>:    call   0x80483c4 <f>
       0x080483ee <+18>:    mov    0x80484d0,%edx
       0x080483f4 <+24>:    add    %edx,%eax
       0x080483f6 <+26>:    leave  
       0x080483f7 <+27>:    ret    
    End of assembler dump.
    (gdb) info registers 
    eax            0x1    1
    ecx            0xbffff394    -1073745004
    edx            0xbffff324    -1073745116
    ebx            0xb7fc2ff4    -1208209420
    esp            0xbffff2f4    0xbffff2f4
    ebp            0xbffff2f8    0xbffff2f8
    esi            0x0    0
    edi            0x0    0
    eip            0x80483e2    0x80483e2 <main+6>
    eflags         0x200282    [ SF IF ID ]
    cs             0x73    115
    ss             0x7b    123
    ds             0x7b    123
    es             0x7b    123
    fs             0x0    0
    gs             0x33    51

      (gdb) x/2 0xbffff2f4
      0xbffff2f4: 0x000000000 x00000000

    可见此时主函数的栈基址为0xbffff2f8,而%esp已经下移4字节准备为函数 f 传递参数8,但目前%esp所指堆栈内容为0,%ebp所指内容也为0。下面展示每一步时%esp、%ebp和堆栈内容的变化:

    call指令将下一条指令的地址入栈:

    将上一个函数的基址入栈,从当前%esp开始作为新基址:

    先为传参做准备:

    实参的计算在%eax中进行:

    实参入栈:

    call指令将下一条指令的地址入栈:

    计算short+int:

    pop %ebp指令将栈顶弹到%ebp中,同时%esp增加4字节:

    ret指令将栈顶弹给%eip

    因为函数 f 修改了%esp,所以用leave指令恢复。leave指令先将%esp对其到%ebp,然后把栈顶弹给%ebp:

     程序最终结束。

  • 相关阅读:
    bzoj2564: 集合的面积(闵可夫斯基和 凸包)
    省选前做题记录
    loj#2978. 「THUSCH 2017」杜老师(乱搞)
    loj#6437. 「PKUSC2018」PKUSC(计算几何)
    洛谷P4501/loj#2529 [ZJOI2018]胖(ST表+二分)
    loj#6436. 「PKUSC2018」神仙的游戏(NTT)
    洛谷P4459/loj#2511 [BJOI2018]双人猜数游戏(博弈论)
    洛谷P4458 /loj#2512.[BJOI2018]链上二次求和(线段树)
    洛谷P4457/loj#2513 [BJOI2018]治疗之雨(高斯消元+概率期望)
    loj#6435. 「PKUSC2018」星际穿越(倍增)
  • 原文地址:https://www.cnblogs.com/yding9/p/3084113.html
Copyright © 2011-2022 走看看