zoukankan      html  css  js  c++  java
  • pwnable.kr memcpy之write up

      1 // compiled with : gcc -o memcpy memcpy.c -m32 -lm
      2 #include <stdio.h>
      3 #include <string.h>
      4 #include <stdlib.h>
      5 #include <signal.h>
      6 #include <unistd.h>
      7 #include <sys/mman.h>
      8 #include <math.h>
      9 
     10 unsigned long long rdtsc(){
     11         asm("rdtsc");
     12 }
     13 
     14 char* slow_memcpy(char* dest, const char* src, size_t len){
     15     int i;
     16     for (i=0; i<len; i++) {
     17         dest[i] = src[i];
     18     }
     19     return dest;
     20 }
     21 
     22 char* fast_memcpy(char* dest, const char* src, size_t len){
     23     size_t i;
     24     // 64-byte block fast copy
     25     if(len >= 64){
     26         i = len / 64;
     27         len &= (64-1);
     28         while(i-- > 0){
     29             __asm__ __volatile__ (
     30             "movdqa (%0), %%xmm0
    "
     31             "movdqa 16(%0), %%xmm1
    "
     32             "movdqa 32(%0), %%xmm2
    "
     33             "movdqa 48(%0), %%xmm3
    "
     34             "movntps %%xmm0, (%1)
    "
     35             "movntps %%xmm1, 16(%1)
    "
     36             "movntps %%xmm2, 32(%1)
    "
     37             "movntps %%xmm3, 48(%1)
    "
     38             ::"r"(src),"r"(dest):"memory");
     39             dest += 64;
     40             src += 64;
     41         }
     42     }
     43 
     44     // byte-to-byte slow copy
     45     if(len) slow_memcpy(dest, src, len);
     46     return dest;
     47 }
     48 
     49 int main(void){
     50 
     51     setvbuf(stdout, 0, _IONBF, 0);
     52     setvbuf(stdin, 0, _IOLBF, 0);
     53 
     54     printf("Hey, I have a boring assignment for CS class.. :(
    ");
     55     printf("The assignment is simple.
    ");
     56 
     57     printf("-----------------------------------------------------
    ");
     58     printf("- What is the best implementation of memcpy?        -
    ");
     59     printf("- 1. implement your own slow/fast version of memcpy -
    ");
     60     printf("- 2. compare them with various size of data         -
    ");
     61     printf("- 3. conclude your experiment and submit report     -
    ");
     62     printf("-----------------------------------------------------
    ");
     63 
     64     printf("This time, just help me out with my experiment and get flag
    ");
     65     printf("No fancy hacking, I promise :D
    ");
     66 
     67     unsigned long long t1, t2;
     68     int e;
     69     char* src;
     70     char* dest;
     71     unsigned int low, high;
     72     unsigned int size;
     73     // allocate memory
     74     char* cache1 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
     75     char* cache2 = mmap(0, 0x4000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
     76     src = mmap(0, 0x2000, 7, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
     77 
     78     size_t sizes[10];
     79     int i=0;
     80 
     81     // setup experiment parameters
     82     for(e=4; e<14; e++){    // 2^13 = 8K
     83         low = pow(2,e-1);
     84         high = pow(2,e);
     85         printf("specify the memcpy amount between %d ~ %d : ", low, high);
     86         scanf("%d", &size);
     87         if( size < low || size > high ){
     88             printf("don't mess with the experiment.
    ");
     89             exit(0);
     90         }
     91         sizes[i++] = size;
     92     }
     93 
     94     sleep(1);
     95     printf("ok, lets run the experiment with your configuration
    ");
     96     sleep(1);
     97 
     98     // run experiment
     99     for(i=0; i<10; i++){
    100         size = sizes[i];
    101         printf("experiment %d : memcpy with buffer size %d
    ", i+1, size);
    102         dest = malloc( size );
    103 
    104         memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect
    105         t1 = rdtsc();
    106         slow_memcpy(dest, src, size);        // byte-to-byte memcpy
    107         t2 = rdtsc();
    108         printf("ellapsed CPU cycles for slow_memcpy : %llu
    ", t2-t1);
    109 
    110         memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect
    111         t1 = rdtsc();
    112         fast_memcpy(dest, src, size);        // block-to-block memcpy
    113         t2 = rdtsc();
    114         printf("ellapsed CPU cycles for fast_memcpy : %llu
    ", t2-t1);
    115         printf("
    ");
    116     }
    117 
    118     printf("thanks for helping my experiment!
    ");
    119     printf("flag : ----- erased in this source code -----
    ");
    120     return 0;
    121 }

    分析源码:

        size_t sizes[10];
        int i=0;
    
        // setup experiment parameters
        for(e=4; e<14; e++){    // 2^13 = 8K
            low = pow(2,e-1);
            high = pow(2,e);
            printf("specify the memcpy amount between %d ~ %d : ", low, high);
            scanf("%d", &size);
            if( size < low || size > high ){
                printf("don't mess with the experiment.
    ");
                exit(0);
            }
            sizes[i++] = size;
        }

    从上代码中分析得到,需要输入2的n次幂和2的n+1次幂之间

    // run experiment
        for(i=0; i<10; i++){
            size = sizes[i];
            printf("experiment %d : memcpy with buffer size %d
    ", i+1, size);
            dest = malloc( size );

    这段代码分析得到,输入size后malloc分配空间,分配的空间大小就是我们输入的size大小。

    memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect
            t1 = rdtsc();
            slow_memcpy(dest, src, size);        // byte-to-byte memcpy
            t2 = rdtsc();
            printf("ellapsed CPU cycles for slow_memcpy : %llu
    ", t2-t1);
    
            memcpy(cache1, cache2, 0x4000);        // to eliminate cache effect
            t1 = rdtsc();
            fast_memcpy(dest, src, size);        // block-to-block memcpy
            t2 = rdtsc();
            printf("ellapsed CPU cycles for fast_memcpy : %llu
    ", t2-t1);
            printf("
    ");
        }

    分配空间后,分别用slow_memcpy和fast_memcpy两种方式,对堆块内的数据向另外一个内存地址拷贝,并比较二者时间。那么分析一下slow_memcpy和fast_memcpy:

    char* slow_memcpy(char* dest, const char* src, size_t len){
        int i;
        for (i=0; i<len; i++) {
            dest[i] = src[i];
        }
        return dest;
    }
    char* fast_memcpy(char* dest, const char* src, size_t len){
        size_t i;
        // 64-byte block fast copy
        if(len >= 64){
            i = len / 64;
            len &= (64-1);
            
            while(i-- > 0){
                __asm__ __volatile__ (
                "movdqa (%0), %%xmm0
    "
                "movdqa 16(%0), %%xmm1
    "
                "movdqa 32(%0), %%xmm2
    "
                "movdqa 48(%0), %%xmm3
    "
                "movntps %%xmm0, (%1)
    "
                "movntps %%xmm1, 16(%1)
    "
                "movntps %%xmm2, 32(%1)
    "
                "movntps %%xmm3, 48(%1)
    "
                ::"r"(src),"r"(dest):"memory");
                dest += 64;
                src += 64;
            }
        }
     

    slow_memcpy是循环赋值,fast_memcpy是用asm汇编指令movdqa进行拷贝。拷贝结束后输入flag。

    根据提示生成可执行程序,然后执行程序看一下:

     那么我们运行程序来看一下:

    随便输入发现出错了:

    我们用gdb来看,发现了出错的位置:

    出错的位置,也就是movntps的执行出了问题,百度了一下movntps的用法:

    movntps m128,XMM
    m128 <== XMM 直接把XMM中的值送入m128,不经过cache,必须对齐16字节。再参考别人的wp:
    malloc分配的堆块大小是以8字节对其的。

    假设用户申请的堆块大小是a的话,malloc(a)分配的堆块大小为 8*(int((a+4)/8)+1)。

    因此假设第一个malloc分配地址是16字节对齐的,则每次请求大小为16字节对齐的数据块即可成功运行结束。可以用脚本来算一下:

    # coidng  = utf-8
    while(1):
        a = raw_input()
        a = int(a)
        if ((a+4)%16>=9) or ((a+4)%16==0):
            print a," is true"
        else:
            print a," is false"

    根据脚本算出来的数,我们输入得到flag:

    memcpy@ubuntu:~$ ls
    memcpy.c  readme
    memcpy@ubuntu:~$ cat readme
    the compiled binary of "memcpy.c" source code (with real flag) will be executed under memcpy_pwn privilege if you connect to port 9022.
    execute the binary by connecting to daemon(nc 0 9022).
    
    memcpy@ubuntu:~$ nc o 9022
    nc: getaddrinfo: Name or service not known
    memcpy@ubuntu:~$ nc 0 9022
    Hey, I have a boring assignment for CS class.. :(
    The assignment is simple.
    -----------------------------------------------------
    - What is the best implementation of memcpy?        -
    - 1. implement your own slow/fast version of memcpy -
    - 2. compare them with various size of data         -
    - 3. conclude your experiment and submit report     -
    -----------------------------------------------------
    This time, just help me out with my experiment and get flag
    No fancy hacking, I promise :D
    specify the memcpy amount between 8 ~ 16 : 9
    specify the memcpy amount between 16 ~ 32 : 21
    specify the memcpy amount between 32 ~ 64 : 40
    specify the memcpy amount between 64 ~ 128 : 70
    specify the memcpy amount between 128 ~ 256 : 135
    specify the memcpy amount between 256 ~ 512 : 265
    specify the memcpy amount between 512 ~ 1024 : 520
    specify the memcpy amount between 1024 ~ 2048 : 1030
    specify the memcpy amount between 2048 ~ 4096 : 2055
    specify the memcpy amount between 4096 ~ 8192 : 5210
    ok, lets run the experiment with your configuration
    experiment 1 : memcpy with buffer size 9
    ellapsed CPU cycles for slow_memcpy : 1497
    ellapsed CPU cycles for fast_memcpy : 438
    
    experiment 2 : memcpy with buffer size 21
    ellapsed CPU cycles for slow_memcpy : 384
    ellapsed CPU cycles for fast_memcpy : 411
    
    experiment 3 : memcpy with buffer size 40
    ellapsed CPU cycles for slow_memcpy : 636
    ellapsed CPU cycles for fast_memcpy : 672
    
    experiment 4 : memcpy with buffer size 70
    ellapsed CPU cycles for slow_memcpy : 1134
    ellapsed CPU cycles for fast_memcpy : 288
    
    experiment 5 : memcpy with buffer size 135
    ellapsed CPU cycles for slow_memcpy : 1938
    ellapsed CPU cycles for fast_memcpy : 237
    
    experiment 6 : memcpy with buffer size 265
    ellapsed CPU cycles for slow_memcpy : 3633
    ellapsed CPU cycles for fast_memcpy : 291
    
    experiment 7 : memcpy with buffer size 520
    ellapsed CPU cycles for slow_memcpy : 7287
    ellapsed CPU cycles for fast_memcpy : 342
    
    experiment 8 : memcpy with buffer size 1030
    ellapsed CPU cycles for slow_memcpy : 13860
    ellapsed CPU cycles for fast_memcpy : 441
    
    experiment 9 : memcpy with buffer size 2055
    ellapsed CPU cycles for slow_memcpy : 27561
    ellapsed CPU cycles for fast_memcpy : 984
    
    experiment 10 : memcpy with buffer size 5210
    ellapsed CPU cycles for slow_memcpy : 72930
    ellapsed CPU cycles for fast_memcpy : 2628
    
    thanks for helping my experiment!
    flag : 1_w4nn4_br34K_th3_m3m0ry_4lignm3nt
  • 相关阅读:
    关于学习netty的两个完整服务器客户端范例
    android-betterpickers
    ValueBar
    CircleDisplay
    JellyViewPager
    十天学习PHP之第二天
    android-測试so动态库(九)
    实习题
    android 编程小技巧(持续中)
    Codeforces Round #253 (Div. 2)——Borya and Hanabi
  • 原文地址:https://www.cnblogs.com/liuyimin/p/7348062.html
Copyright © 2011-2022 走看看