zoukankan      html  css  js  c++  java
  • 高级C代码的汇编分析

    在windows上,常用的函数调用方式有:

    Pascal方式,WINAPI(_stdcall)方式 和C方式(_cdecl)

    _cdecl调用规则:

    1,参数从右到左入堆栈

    2,在函数返回后,调用者要负责清除堆栈

    所以这种调用常会生成较大的可执行文件。

    _stdcall又称为WINAPI调用方式,规则:

    1,参数从右向左入堆栈

    2,被调用的函数在返回前自行清理堆栈

    所以这种调用会生成比cdecl小的代码

    Pascal调用方式,主要用在WIN16函数库中,现在基本不用

    规则:

    1,参数从左向右入堆栈

    2,被调用函数在返回前自行清理堆栈

    此外,在Windows内核中还常见的有快速调用方式(_fastcall)

    在C++编译的代码中有this call方式(_thiscall)

    在windows中,不管哪种方式,返回值都写在eax中,外部从中获取返回值

    _cdecl方式步骤

    1,保存ebp

    2,保存esp到ebp

    3,在堆栈中腾出一个区域来保存局部变量

    4,保存ebx,esi,edi到堆栈中,函数调用完后返回

    5,把局部变量区域初始化为0xcccccccch,实际上是int 3指令机器码,这是一个断点软中断

    6,做函数里应该做的事情

    7,恢复ebx,esi,edi,esp,ebp,最后返回

    2:    int func(int a,int b)
    3:    {
    00401010   push        ebp
    00401011   mov         ebp,esp
    00401013   sub         esp,44h
    00401016   push        ebx
    00401017   push        esi
    00401018   push        edi
    00401019   lea         edi,[ebp-44h]
    0040101C   mov         ecx,11h
    00401021   mov         eax,0CCCCCCCCh
    00401026   rep stos    dword ptr [edi]
    4:        int c = a + b;
    00401028   mov         eax,dword ptr [ebp+8]
    0040102B   add         eax,dword ptr [ebp+0Ch]
    0040102E   mov         dword ptr [ebp-4],eax
    5:        return c;
    00401031   mov         eax,dword ptr [ebp-4]
    6:    }
    00401034   pop         edi
    00401035   pop         esi
    00401036   pop         ebx
    00401037   mov         esp,ebp
    00401039   pop         ebp
    0040103A   ret
    

    for循环的汇编代码分析:

    6:        int i;
    7:        for(i = 0 ;i < 50 ; i ++)
    0040B501   mov         dword ptr [ebp-8],0
    0040B508   jmp         func+33h (0040b513)
    0040B50A   mov         ecx,dword ptr [ebp-8]
    0040B50D   add         ecx,1
    0040B510   mov         dword ptr [ebp-8],ecx
    0040B513   cmp         dword ptr [ebp-8],32h
    0040B517   jge         func+44h (0040b524)
    8:            c = c + i;
    0040B519   mov         edx,dword ptr [ebp-4]
    0040B51C   add         edx,dword ptr [ebp-8]
    0040B51F   mov         dword ptr [ebp-4],edx
    0040B522   jmp         func+2Ah (0040b50a)
    9:
    10:       return c;
    0040B524   mov         eax,dword ptr [ebp-4]
    11:   }
    

     从上面的汇编代码可以分析出,for循环就是cmp指令+jmp指令

    根据cmp判断然后跳转到那个位置执行代码

    do...while循环分析

    5:
    6:        int i = 0;
    0040B501   mov         dword ptr [ebp-8],0
    7:
    8:        do {
    9:            c = c +i;
    0040B508   mov         ecx,dword ptr [ebp-4]
    0040B50B   add         ecx,dword ptr [ebp-8]
    0040B50E   mov         dword ptr [ebp-4],ecx
    10:       }while(c < 50);
    0040B511   cmp         dword ptr [ebp-4],32h
    0040B515   jl          func+28h (0040b508)
    11:
    12:       return c;
    0040B517   mov         eax,dword ptr [ebp-4]
    13:   }
    0040B51A   pop         edi
    0040B51B   pop         esi
    0040B51C   pop         ebx
    0040B51D   mov         esp,ebp
    0040B51F   pop         ebp
    0040B520   ret
    

     从上面代码可以看出

    本质do...while循环和for差不多

    while循环:

    6:        int i = 0;
    0040B501   mov         dword ptr [ebp-8],0
    7:
    8:        while(i < 50)
    0040B508   cmp         dword ptr [ebp-8],32h
    0040B50C   jge         func+39h (0040b519)
    9:        {
    10:           c = c +i;
    0040B50E   mov         ecx,dword ptr [ebp-4]
    0040B511   add         ecx,dword ptr [ebp-8]
    0040B514   mov         dword ptr [ebp-4],ecx
    11:       };
    0040B517   jmp         func+28h (0040b508)
    12:
    13:       return c;
    0040B519   mov         eax,dword ptr [ebp-4]
    14:   }
    0040B51C   pop         edi
    0040B51D   pop         esi
    0040B51E   pop         ebx
    0040B51F   mov         esp,ebp
    0040B521   pop         ebp
    0040B522   ret
    

    if...else if...else语句分析

    :
    6:        int i = 0;
    0040B501   mov         dword ptr [ebp-8],0
    7:
    8:        if(c>0 && c < 10)
    0040B508   cmp         dword ptr [ebp-4],0
    0040B50C   jle         func+43h (0040b523)
    0040B50E   cmp         dword ptr [ebp-4],0Ah
    0040B512   jge         func+43h (0040b523)
    9:        {
    10:           printf("c > 0");
    0040B514   push        offset string "c > 0" (0041ff5c)
    0040B519   call        printf (0040b780)
    0040B51E   add         esp,4
    11:       }
    12:       else if(c>10 && c<00)
    0040B521   jmp         func+6Bh (0040b54b)
    0040B523   cmp         dword ptr [ebp-4],0Ah
    0040B527   jle         func+5Eh (0040b53e)
    0040B529   cmp         dword ptr [ebp-4],0
    0040B52D   jge         func+5Eh (0040b53e)
    13:       {
    14:           printf("c>10 && c<100");
    0040B52F   push        offset string "c>10 && c<100" (0041ff4c)
    0040B534   call        printf (0040b780)
    0040B539   add         esp,4
    15:       }
    16:       else
    0040B53C   jmp         func+6Bh (0040b54b)
    17:       {
    18:           printf("c>10 && c < 100");
    0040B53E   push        offset string "c>10 && c < 100" (0041ff3c)
    0040B543   call        printf (0040b780)
    0040B548   add         esp,4
    19:       }
    20:
    21:       return c;
    0040B54B   mov         eax,dword ptr [ebp-4]
    22:   }
    0040B54E   pop         edi
    0040B54F   pop         esi
    0040B550   pop         ebx
    0040B551   add         esp,48h
    0040B554   cmp         ebp,esp
    0040B556   call        __chkesp (0040b4a0)
    0040B55B   mov         esp,ebp
    0040B55D   pop         ebp
    0040B55E   ret
    

    switch...case 代码分析

    4:        int c = a + b;
    0040B4F8   mov         eax,dword ptr [ebp+8]
    0040B4FB   add         eax,dword ptr [ebp+0Ch]
    0040B4FE   mov         dword ptr [ebp-4],eax
    5:
    6:        switch(c)
    7:        {
    0040B501   mov         ecx,dword ptr [ebp-4]
    0040B504   mov         dword ptr [ebp-8],ecx
    0040B507   cmp         dword ptr [ebp-8],0
    0040B50B   je          func+35h (0040b515)
    0040B50D   cmp         dword ptr [ebp-8],1
    0040B511   je          func+42h (0040b522)
    0040B513   jmp         func+51h (0040b531)
    8:        case 0:
    9:            printf("c>0");
    0040B515   push        offset string "c>0" (0041ff4c)
    0040B51A   call        printf (0040b780)
    0040B51F   add         esp,4
    10:       case 1:
    11:           printf("c>10 && c<100");
    0040B522   push        offset string "c>10 && c<100" (0041ff3c)
    0040B527   call        printf (0040b780)
    0040B52C   add         esp,4
    12:           break;
    0040B52F   jmp         func+5Eh (0040b53e)
    13:       default:
    14:           printf("c>10 && c<100");
    0040B531   push        offset string "c>10 && c<100" (0041ff3c)
    0040B536   call        printf (0040b780)
    0040B53B   add         esp,4
    15:       }
    16:
    17:       return c;
    0040B53E   mov         eax,dword ptr [ebp-4]
    18:   }
    0040B541   pop         edi
    0040B542   pop         esi
    0040B543   pop         ebx
    0040B544   add         esp,48h
    0040B547   cmp         ebp,esp
    0040B549   call        __chkesp (0040b4a0)
    0040B54E   mov         esp,ebp
    0040B550   pop         ebp
    0040B551   ret
    

    结构体分析

    1:
    2:    typedef struct {
    3:        int a;
    4:        int b;
    5:        int c;
    6:    }mystruct;
    7:
    8:    int func(int a,int b)
    9:    {
    0040B800   push        ebp
    0040B801   mov         ebp,esp
    0040B803   sub         esp,1D8h
    0040B809   push        ebx
    0040B80A   push        esi
    0040B80B   push        edi
    0040B80C   lea         edi,[ebp-1D8h]
    0040B812   mov         ecx,76h
    0040B817   mov         eax,0CCCCCCCCh
    0040B81C   rep stos    dword ptr [edi]
    10:
    11:       unsigned char *buf[100];
    12:       mystruct *strs = (mystruct *)buf;
    0040B81E   lea         eax,[ebp-190h]
    0040B824   mov         dword ptr [ebp-194h],eax
    13:       int i;
    14:       for(i=0; i<5; i++)
    0040B82A   mov         dword ptr [ebp-198h],0
    0040B834   jmp         func+45h (0040b845)
    0040B836   mov         ecx,dword ptr [ebp-198h]
    0040B83C   add         ecx,1
    0040B83F   mov         dword ptr [ebp-198h],ecx
    0040B845   cmp         dword ptr [ebp-198h],5
    0040B84C   jge         func+94h (0040b894)
    15:       {
    16:           strs[i].a=0;
    0040B84E   mov         edx,dword ptr [ebp-198h]
    0040B854   imul        edx,edx,0Ch
    0040B857   mov         eax,dword ptr [ebp-194h]
    0040B85D   mov         dword ptr [eax+edx],0
    17:           strs[i].b=1;
    0040B864   mov         ecx,dword ptr [ebp-198h]
    0040B86A   imul        ecx,ecx,0Ch
    0040B86D   mov         edx,dword ptr [ebp-194h]
    0040B873   mov         dword ptr [edx+ecx+4],1
    18:           strs[i].c=2;
    0040B87B   mov         eax,dword ptr [ebp-198h]
    0040B881   imul        eax,eax,0Ch
    0040B884   mov         ecx,dword ptr [ebp-194h]
    0040B88A   mov         dword ptr [ecx+eax+8],2
    19:       }
    0040B892   jmp         func+36h (0040b836)
    20:
    21:       return 0;
    0040B894   xor         eax,eax
    22:   }
    0040B896   pop         edi
    0040B897   pop         esi
    0040B898   pop         ebx
    0040B899   mov         esp,ebp
    0040B89B   pop         ebp
    0040B89C   ret
    

    从上面不难看出,结构体赋值是先经过计算,然后把基址存放的一个变量

    然后计算每个结构体的偏移量,然后对每个struct进行定数累加赋值

    枚举,联合,结构结合分析:

    1:    typedef enum {
    2:        ENUM_1 = 1,
    3:        ENUM_2 = 2,
    4:        ENUM_3,
    5:        ENUM_4
    6:    }myenum;
    7:
    8:    typedef struct {
    9:        int a;
    10:       int b;
    11:       int c;
    12:   }mystruct;
    13:
    14:   typedef union {
    15:       mystruct s;
    16:       myenum e[3];
    17:   }myunion;
    18:
    19:   int func(int a,int b)
    20:   {
    00401020   push        ebp
    00401021   mov         ebp,esp
    00401023   sub         esp,0ACh
    00401029   push        ebx
    0040102A   push        esi
    0040102B   push        edi
    0040102C   lea         edi,[ebp-0ACh]
    00401032   mov         ecx,2Bh
    00401037   mov         eax,0CCCCCCCCh
    0040103C   rep stos    dword ptr [edi]
    21:       unsigned char buf[100] = {0};
    0040103E   mov         byte ptr [ebp-64h],0
    00401042   mov         ecx,18h
    00401047   xor         eax,eax
    00401049   lea         edi,[ebp-63h]
    0040104C   rep stos    dword ptr [edi]
    0040104E   stos        word ptr [edi]
    00401050   stos        byte ptr [edi]
    22:       myunion *uns = (myunion *)buf;
    00401051   lea         eax,[ebp-64h]
    00401054   mov         dword ptr [ebp-68h],eax
    23:
    24:       int i;
    25:
    26:       for(i = 0; i < 5; i++)
    00401057   mov         dword ptr [ebp-6Ch],0
    0040105E   jmp         func+49h (00401069)
    00401060   mov         ecx,dword ptr [ebp-6Ch]
    00401063   add         ecx,1
    00401066   mov         dword ptr [ebp-6Ch],ecx
    00401069   cmp         dword ptr [ebp-6Ch],5
    0040106D   jge         func+83h (004010a3)
    27:       {
    28:           uns[i].s.a=0;
    0040106F   mov         edx,dword ptr [ebp-6Ch]
    00401072   imul        edx,edx,0Ch
    00401075   mov         eax,dword ptr [ebp-68h]
    00401078   mov         dword ptr [eax+edx],0
    29:           uns[i].s.b = 1;
    0040107F   mov         ecx,dword ptr [ebp-6Ch]
    00401082   imul        ecx,ecx,0Ch
    00401085   mov         edx,dword ptr [ebp-68h]
    00401088   mov         dword ptr [edx+ecx+4],1
    30:           uns[i].e[2] = ENUM_4;
    00401090   mov         eax,dword ptr [ebp-6Ch]
    00401093   imul        eax,eax,0Ch
    00401096   mov         ecx,dword ptr [ebp-68h]
    00401099   mov         dword ptr [ecx+eax+8],4
    31:       }
    004010A1   jmp         func+40h (00401060)
    32:
    33:       return 0;
    004010A3   xor         eax,eax
    34:   }
    004010A5   pop         edi
    004010A6   pop         esi
    004010A7   pop         ebx
    004010A8   mov         esp,ebp
    004010AA   pop         ebp
    004010AB   ret
    

     我们发现这段代码和上面的汇编后代码基本一样,因此我们知道,汇编中对共用体和枚举类型没有特别的处理

    并不会引入新的代码,因为共用体和枚举都是方便给程序员用的,本质没什么改变

     其实上面这些控制语句,对反汇编来说很容易分析,逆向工程中最令人蛋疼的是算法

    一个3*3矩阵算法的逆向分析

    main函数

     int main()
    13:   {
    0040B640   push        ebp
    0040B641   mov         ebp,esp
    0040B643   sub         esp,0ACh
    0040B649   push        ebx
    0040B64A   push        esi
    0040B64B   push        edi
    0040B64C   lea         edi,[ebp-0ACh]
    0040B652   mov         ecx,2Bh
    0040B657   mov         eax,0CCCCCCCCh
    0040B65C   rep stos    dword ptr [edi]
    14:       int a[3][3] = {{1,2,3},{2,3,4},{3,4,5}};
    0040B65E   mov         dword ptr [ebp-24h],1
    0040B665   mov         dword ptr [ebp-20h],2
    0040B66C   mov         dword ptr [ebp-1Ch],3
    0040B673   mov         dword ptr [ebp-18h],2
    0040B67A   mov         dword ptr [ebp-14h],3
    0040B681   mov         dword ptr [ebp-10h],4
    0040B688   mov         dword ptr [ebp-0Ch],3
    0040B68F   mov         dword ptr [ebp-8],4
    0040B696   mov         dword ptr [ebp-4],5
    15:       int b[3][3] = {{2,3,4},{2,4,1},{6,2,1}};
    0040B69D   mov         dword ptr [ebp-48h],2
    0040B6A4   mov         dword ptr [ebp-44h],3
    0040B6AB   mov         dword ptr [ebp-40h],4
    0040B6B2   mov         dword ptr [ebp-3Ch],2
    0040B6B9   mov         dword ptr [ebp-38h],4
    0040B6C0   mov         dword ptr [ebp-34h],1
    0040B6C7   mov         dword ptr [ebp-30h],6
    0040B6CE   mov         dword ptr [ebp-2Ch],2
    0040B6D5   mov         dword ptr [ebp-28h],1
    16:       int c[3][3];
    17:
    18:       func(a,b,c);
    0040B6DC   lea         eax,[ebp-6Ch]
    0040B6DF   push        eax
    0040B6E0   lea         ecx,[ebp-48h]
    0040B6E3   push        ecx
    0040B6E4   lea         edx,[ebp-24h]
    0040B6E7   push        edx
    0040B6E8   call        @ILT+5(_func) (0040100a)
    0040B6ED   add         esp,0Ch
    19:
    20:       return 0;
    0040B6F0   xor         eax,eax
    21:   }
    0040B6F2   pop         edi
    0040B6F3   pop         esi
    0040B6F4   pop         ebx
    0040B6F5   add         esp,0ACh
    0040B6FB   cmp         ebp,esp
    0040B6FD   call        __chkesp (00401130)
    0040B702   mov         esp,ebp
    0040B704   pop         ebp
    0040B705   ret
    

    算法函数:

    1:    int func(int a[3][3],int b[3][3],int c[3][3])
    2:    {
    0040B580   push        ebp
    0040B581   mov         ebp,esp
    0040B583   sub         esp,48h
    0040B586   push        ebx
    0040B587   push        esi
    0040B588   push        edi
    0040B589   lea         edi,[ebp-48h]
    0040B58C   mov         ecx,12h
    0040B591   mov         eax,0CCCCCCCCh
    0040B596   rep stos    dword ptr [edi]
    3:        int i,j;
    4:        for(i = 0 ; i < 3; i++)
    0040B598   mov         dword ptr [ebp-4],0
    0040B59F   jmp         func+2Ah (0040b5aa)
    0040B5A1   mov         eax,dword ptr [ebp-4]
    0040B5A4   add         eax,1
    0040B5A7   mov         dword ptr [ebp-4],eax
    0040B5AA   cmp         dword ptr [ebp-4],3
    0040B5AE   jge         func+0AAh (0040b62a)
    5:        {
    6:            for(j = 0 ; j < 3; j ++)
    0040B5B0   mov         dword ptr [ebp-8],0
    0040B5B7   jmp         func+42h (0040b5c2)
    0040B5B9   mov         ecx,dword ptr [ebp-8]
    0040B5BC   add         ecx,1
    0040B5BF   mov         dword ptr [ebp-8],ecx
    0040B5C2   cmp         dword ptr [ebp-8],3
    0040B5C6   jge         func+0A5h (0040b625)
    7:                c[i][j] = a[i][0]*b[0][j]+a[i][1]*b[1][j]+a[i][2]*b[2][j];
    0040B5C8   mov         edx,dword ptr [ebp-4]
    0040B5CB   imul        edx,edx,0Ch
    0040B5CE   mov         eax,dword ptr [ebp+8]
    0040B5D1   mov         ecx,dword ptr [ebp-8]
    0040B5D4   mov         esi,dword ptr [ebp+0Ch]
    0040B5D7   mov         edx,dword ptr [eax+edx]
    0040B5DA   imul        edx,dword ptr [esi+ecx*4]
    0040B5DE   mov         eax,dword ptr [ebp-4]
    0040B5E1   imul        eax,eax,0Ch
    0040B5E4   mov         ecx,dword ptr [ebp+8]
    0040B5E7   mov         esi,dword ptr [ebp-8]
    0040B5EA   mov         edi,dword ptr [ebp+0Ch]
    0040B5ED   mov         eax,dword ptr [ecx+eax+4]
    0040B5F1   imul        eax,dword ptr [edi+esi*4+0Ch]
    0040B5F6   add         edx,eax
    0040B5F8   mov         ecx,dword ptr [ebp-4]
    0040B5FB   imul        ecx,ecx,0Ch
    0040B5FE   mov         eax,dword ptr [ebp+8]
    0040B601   mov         esi,dword ptr [ebp-8]
    0040B604   mov         edi,dword ptr [ebp+0Ch]
    0040B607   mov         ecx,dword ptr [eax+ecx+8]
    0040B60B   imul        ecx,dword ptr [edi+esi*4+18h]
    0040B610   add         edx,ecx
    0040B612   mov         eax,dword ptr [ebp-4]
    0040B615   imul        eax,eax,0Ch
    0040B618   mov         ecx,dword ptr [ebp+10h]
    0040B61B   add         ecx,eax
    0040B61D   mov         eax,dword ptr [ebp-8]
    0040B620   mov         dword ptr [ecx+eax*4],edx
    0040B623   jmp         func+39h (0040b5b9)
    8:        }
    0040B625   jmp         func+21h (0040b5a1)
    9:        return 0;
    0040B62A   xor         eax,eax
    10:   }
    0040B62C   pop         edi
    0040B62D   pop         esi
    0040B62E   pop         ebx
    0040B62F   mov         esp,ebp
    0040B631   pop         ebp
    0040B632   ret
    

    从上面的代码我们可以看出,汇编对Debug模式的二位数组操作方式如下:

    mov		eax,<数组元素下表>
    imul	eax,eax,<结构体的大小>
    mov		ecx,<结构体开始地址>
    mov		eax,dword ptr [ecx+eax]
    访问内部变量的时候,还要加上数字
    
    mov     eax,dword ptr [ecx+eax+0CH]
    
  • 相关阅读:
    LAMP安装配置过程
    【校招面试 之 C/C++】第31题 C++ 11新特性(二)之nullptr关键字
    【校招面试 之 C/C++】第30题 C++ 11新特性(一)之auto关键字
    【校招面试 之 C/C++】第29题 C/C++ 关键字extern
    【校招面试 之 剑指offer】第18题 删除链表中的节点
    【校招面试 之 剑指offer】第17题 打印从1到最大的n位数
    【校招面试 之 剑指offer】第16题 数值的整数次方
    【校招面试 之 剑指offer】第11题 旋转数组中的最小数字
    【Linux 进程】之关于父子进程之间的数据共享分析
    【校招面试 之 剑指offer】第10-3题 矩阵覆盖问题
  • 原文地址:https://www.cnblogs.com/lfsblack/p/3763993.html
Copyright © 2011-2022 走看看