zoukankan      html  css  js  c++  java
  • 计组复习题集_第三章

    首先说一点我自己容易搞混的知识点

    leal (%edx), %eax
    movl %edx, %eax
    

    效果虽然一样,就是%eax中存储的内容变成了%edx中存储的内容

    但是实现过程有点不一样:

    书上写的是:

    movl 从指定的位置读入数据

    leal 将有效地址写入目的操作数

    说通俗点就是:

    movl 类似于 *p = a;

    leal 类似于 (*x = a;) p = x;

    3.18

    Starting with C code of the form

    int test(int x, int y) {
        int val = ____;
        if (____) {
            if (____)
                val = ____;
            else
                val = ____;
        } else if (____)
            val = ____;
        return val;
    }
    

    GCC generates the following assembly code:

    ; x at %ebp+8, y at %ebp+12
      movl    8(%ebp), %eax
      movl    12(%ebp), %edx
      cmpl    $-3, %eax
      jge     .L2
      cmpl    %edx, %eax
      jle     .L3
      imull   %edx, %eax
      jmp     .L4
    .L3:
      leal    (%edx,%eax), %eax
      jmp   
    .L2:
      cmpl    $2, %eax
      jg      .L5
      xorl    %edx, %eax
      jmp     .L4
    .L5:
      subl    %edx, %eax
    .L4:
    

    Fill in the missing expressions in the C code. To make the code fit into the
    C code template, you will need to undo some of the reordering of computations
    done by GCC.

    ANSWER

    /**
     * 注意GCC对C代码进行了重排序,也就是C代码第一行的val = x ^ y被移到了汇编代码的下面,只有能以这个值输出时才进行计算
     * (但其实重排序不多
     * 汇编代码在if的处理上,会满足:除非迫不得已,否则不跳转,也就是说else才跳转。所以"jge""jle"等往往代表了else的条件。
     */
    int test(int x, int y) {
        int val = x ^ y;
        if (x < -3) {   // if x>= -3, then jmp, 否则继续按顺序执行汇编代码
            if (y < x)  // 同理
                val = x * y;
            else        // .L3
                val = x + y; // leal (%edx, %eax), %eax == a = a+b;
        } else if (x > 2) // .L2
            //这里挺有趣的,虽然是"cmpl $2, %eax"和"jg .L5",我第一反应是判断条件为 x <= 2, 但是实际上判断条件是 x > 2, else if 也是else的一种..
            val = x - y;  
        return val;
    }
    

    3.22

    A function, fun_a, has the following overall structure:

    int fun_a(unsigned x) {
      int val = 0;
      while (_____) {
        _____;
      }
      return _____;
    }
    

    The GCC C compiler generates the following assembly code:

    ;x at %ebp+8
      movl    8(%ebp), %edx
      movl    $0, %eax
      testl   %edx, %edx
      je      .L7
    .L10:
      xorl    %edx, %eax
      shrl    %edx ;Shift right by 1
      jne     .L10
    .L7:
      andl    $1, %eax
    

    Reverse engineer the operation of this code and then do the following:

    A. Use the assembly-code version to fill in the missing parts of the C code.

    B. Describe in English what this function computes

    ANSWER

    /*A*/
    int fun_a(unsigned x) {
      int val = 0;
      while (x != 0) {
        val ^= x;
        x >>= 1; 
      }
      return val & 1;
    }
    
    /*B*/
    /**
     * 由于返回值 val & 1 从意义上来说是只取了val最低位的信息,于是我们只讨论val最低位表达的信息即可
     * 循环的作用就是计算val = (x >> 0) ^ (x >> 1) ^ (x >> 2) ^ ... ^ (x >> k-1) [假设x有k位有效数字]
     * 比如令 x = 1011
     * 那么:
     *   1011
     *   0101
     *   0010
     * ^ 0001
     * ------
     *   1101
     * 可以看出循环结束后val的最低位就是x的从最低位到最高位的每一位取异或,举个例子,假设x=10111,那么val的最低位=1^0^1^1^1。
     * 当有偶数个1时,先把所有的1两两取余转成0,式子中就只剩下0了,式子的结果是0,例如;1^0^0^1 = (1^1)^0^0 = 0^0^0 = 0;
     * 当有奇数个1时,尽可能地把1两两取余转成0,最后式子中只剩下一个1,表达式的值就是1,例如;1^0^1^1 = (1^1)^0^1 = 0^0^1 = 1;
     * 由此val的最低位的最终值反映了x中1的数量的奇偶性,这也是这个函数的作用:计算参数x中1的数量的奇偶性,当x中有奇数个1,返回1;当x中有偶数个1,返回0。
     */
    

    3.29

    For a C function switcher with the general structure

    int switcher(int a, int b, int c)
    {
      int answer;
      switch(a) {
      case _____:         /* Case A */
        c = _____;
        /* Fall through */
      case :              /* Case B */
        answer = _____;
        break;
      case _____:         /* Case C */
      case _____:         /* Case D */
        answer = _____;
        break;
      case _____:         /* Case E */
        answer = _____;
        break;
      default:
        answer = _____;
      }
      return answer;
    }
    

    GCC generates the assembly code and jump table shown in Figure 3.20.

    Fill in the missing parts of the C code. Except for the ordering of case labels C and D, there is only one way to fit the different cases into the template.

    ;a at %ebp+8, b at %ebp+12, c at %ebp+16
      movl    8(%ebp), %eax
      cmpl    $7, %eax
      ja      .L2
      jmp     *.L7(,%eax,4)
    .L2:
      movl    12(%ebp), %eax
      jmp     .L8
    .L5:
      movl    $4, %eax
      jmp     .L8
    .L6:
      movl    12(%ebp), %eax
      xorl    $15, %eax
      movl    %eax, 16(%ebp)
    .L3:
      movl    16(%ebp), %eax
      addl    $112, %eax
      jmp     .L8
    .L4:
      movl    16(%ebp), %eax
      addl    12(%ebp), %eax
      sall    $2, %eax
    .L8:
    
    .L7:
      .long   .L3
      .long   .L2
      .long   .L4
      .long   .L2
      .long   .L5
      .long   .L6
      .long   .L2
      .long   .L4
    

    ANSWER

    /**
     * default 的标号是 ja 对应的标号
     */
    int switcher(int a, int b, int c)
    {
      int answer;
      switch(a) {
      case 5:         /* Case A */
        c = b ^ 15;
        /* Fall through */
      case 0:              /* Case B */
        answer = c + 112;
        break;
      case 2:         /* Case C */
      case 7:         /* Case D */
        answer = (b + c) << 2;
        break;
      case 4:         /* Case E */
        answer = a;   // 也可以写成 answer = 4, 从上下文看,answer的赋值应该是与a/b/c相关的,但是在编译时编译器对此语句进行了优化:把返回值设为4,而非a。
        break;
      default:
        answer = b;
      }
      return answer;
    }
    

    3.34

    For a C function having the general structure

    int rfun(unsigned x) {
      if (_____)
        return _____;
      unsigned nx = _____;
      int rv = rfun(nx);
      return _____;
    }
    

    GCC generates the following assembly code (with the setup and completion code omitted):

      movl    8(%ebp), %ebx
      movl    $0, %eax
      testl   %ebx, %ebx
      je      .L3
      movl    %ebx, %eax
      shrl    %eax            ;Shift right by 1
      movl    %eax, (%esp)
      call    rfun
      movl    %ebx, %edx
      andl    $1, %edx
      leal    (%edx,%eax), %eax
    .L3:
    

    A. What value does rfun store in the callee-save register %ebx?

    B. Fill in the missing expressions in the C code shown above.

    C. Describe in English what function this code computes.

    ANSWER

    /*A*/
    /**
     * %ebx 存储的是x的值
     */
    
    /*B*/
    int rfun(unsigned x) {
      if (x == 0)
        return x;
      unsigned nx = x >> 1;
      int rv = rfun(nx);
      return (x & 0x1) + rv;
    }
    
    /*C*/
    /**
     * 计算x中位的和:递归地计算除了最低位之外的所有其他位的和,然后加上最低位得到结果
     */
    

    3.37

    Consider the following source code, where M and N are constants declared with #define:

    int mat1[M][N];
    int mat2[N][M];
    
    int sum_element(int i, int j) {
      return mat1[i][j] + mat2[j][i];
    }
    

    In compiling this program, GCC generates the following assembly code:

    ;i at %ebp+8, j at %ebp+12
    movl    8(%ebp), %ecx
    movl    12(%ebp), %edx
    leal    0(,%ecx,8), %eax
    subl    %ecx, %eax
    addl    %edx, %eax          ;%eax = 7 * i + j
    leal    (%edx,%edx,4), %edx
    addl    %ecx, %edx          ;%ebx = 5 * j + i
    movl    mat1(,%eax,4), %eax
    addl    mat2(,%edx,4), %eax
    

    Use your reverse engineering skills to determine the values of M and N based on this assembly code.

    ANSWER

     
    %eax = 7 * i + j
    %ebx = 5 * j + i
    而 mat1[i][j] = mat1 + (i * N + j) * 4
    mat1(,%eax,4) = mat1(,7 * i + j,4) = mat1 + (i * 7 + j) * 4
    所以 N = 7
    同理 M = 5
    
    

    3.39

    Consider the following structure declaration:

    struct prob {
      int *p;
      struct {
        int x;
        int y;
      } s;
      struct prob *next;
    };
    

    This declaration illustrates that one structure can be embedded within another, just as arrays can be embedded within structures, and arrays can be embedded within arrays.
    The following procedure (with some expressions omitted) operates on this structure:

    void sp_init(struct prob *sp)
    {
      sp->s.x = _____;
      sp->p = _____;
      sp->next = _____;
    }
    

    A. What are the offsets (in bytes) of the following fields?

         p: _____
       s.x: _____
       s.y: _____
      next: _____
    

    B. How many total bytes does the structure require?

    C.The compiler generates the following assembly code for the body of sp_init:

    ;sp at %ebp+8
      movl 8(%ebp), %eax  ;%eax = sp (type of sp: pointer)
      movl 8(%eax), %edx  ;%edx = sp->s.x
      movl %edx, 4(%eax)  ;
      leal 4(%eax), %edx  ;sp->s.x = 
      movl %edx, (%eax)   ; 
      movl %eax, 12(%eax) ;sp.y =  %eax
    

    On the basis of this information, fill in the missing expressions in the code for sp_init.

    ANSWER

    /*A*/
         p: 0~7
       s.x: 8~11
       s.y: 11~15
      next: 16~23
    
    /*B*/
      24 bytes in total.
    
    /*C*/
    void sp_init(struct prob *sp)
    {
      sp->s.x = _____;
      sp->p = _____;
      sp->next = _____;
    }
    

    3.56

    Consider the following assembly code:

    ;x at %ebp+8, n at %ebp+12
      movl    8(%ebp), %esi
      movl    12(%ebp), %ebx
      movl    $-1, %edi
      movl    $1, %edx
    .L2:
      movl    %edx, %eax
      andl    %esi, %eax
      xorl    %eax, %edi
      movl    %ebx, %ecx
      sall    %cl, %edx
      testl   %edx, %edx
      jne     .L2
      movl    %edi, %eax
    

    The preceding code was generated by compiling C code that had the following overall form:

    int loop(int x, int n)
    {
      int result = _____;
      int mask;
      for (mask = _____; mask _____; mask = _____) {
        result ^= _____;
      }
      return result;
    }
    

    Your task is to fill in the missing parts of the C code to get a program equivalent to the generated assembly code. Recall that the result of the function is returned in register %eax. You will find it helpful to examine the assembly code before, during, and after the loop to form a consistent mapping between the registers and the program variables.

    A. Which registers hold program values x, n, result, and mask?

    B. What are the initial values of result and mask?

    C. What is the test condition for mask?

    D. How does mask get updated?

    E. How does result get updated?

    F. Fill in all the missing parts of the C code.

    ANSWER

    /*A*/
           x : %esi
           n : %ebx
        mask : %edx
      result : %edi
    
    /*B*/
        mask : 1
      result : -1
    
    /*C*/
      mask != 0;
    
    /*D*/
      mask <<= n;
    
    /*E*/
      result ^= x & mask;
    
    /*F*/
    int loop(int x, int n)
    {
      int result = -1;
      int mask;
      for (mask = 1; mask != 0; mask = mask << (short)n) {
        result ^= x & mask;
      }
      return result;
    }
    

    3.59

    This problem will give you a chance to reverse engineer a switch statement from machine code. In the following procedure, the body of the switch statement has been removed:

    int switch_prob(int x, int n)
    {
      int result = x;
      switch(n) {
        /* Fill in code here */
      }
      return result;
    }
    

    Figure 3.44 shows the disassembled machine code for the procedure. We can see in lines 4 and 5 that parameters x and n are loaded into registers %eax and %edx, respectively.

    The jump table resides in a different area of memory. We can see from the indirect jump on line 9 that the jump table begins at address 0x80485d0. Using the GDB debugger, we can examine the six 4-byte words of memory comprising the jump table with the command x/6w 0x80485d0. GDB prints the following:

    (GDB) x/6w 0x80485d0
    0x80485d0: 0x08048438 0x08048448 0x08048438 > 0x0804843d
    0x80485e0: 0x08048442 0x08048445
    

    Fill in the body of the switch statement with C code that will have the same behavior as the machine code.

    08048420 <switch_prob>:
    8048420: 55                     push %ebp
    8048421: 89 e5                  mov %esp,%ebp
    8048423: 8b 45 08               mov 0x8(%ebp),%eax
    8048426: 8b 55 0c               mov 0xc(%ebp),%edx
    8048429: 83 ea 32               sub $0x32,%edx
    804842c: 83 fa 05               cmp $0x5,%edx
    804842f: 77 17                  ja 8048448 <switch_prob+0x28>
    8048431: ff 24 95 d0 85 04 08   jmp *0x80485d0(,%edx,4)
    8048438: c1 e0 02               shl $0x2,%eax
    804843b: eb 0e                  jmp 804844b <switch_prob+0x2b>
    804843d: c1 f8 02               sar $0x2,%eax
    8048440: eb 09                  jmp 804844b <switch_prob+0x2b>
    8048442: 8d 04 40               lea (%eax,%eax,2),%eax
    8048445: 0f af c0               imul %eax,%eax
    8048448: 83 c0 0a               add $0xa,%eax
    804844b: 5d                     pop %ebp
    804844c: c3                     ret
    

    ANSWER

    /**
     * 8048429 和 804842c两行说明,找到对应的数组位置后还要加上32才是n
     */
    int switch_prob(int x, int n)
    {
      int result = x;
      switch(n) {
        case 32:
        case 34:
          result <<= 2;
          break;
        case 35:
          result >>= 2;
          break;
        case 36: 
          result *= 3;
        case 37:
          result *= result;
        default:
          result += 0xa;
      }
      return result;
    }
    
    

    3.64

    For this exercise, we will examine the code generated by GCC for functions that have structures as arguments and return values, and from this see how these language features are typically implemented.
    The following C code has a function word_sum having structures as argument and return values, and a function prod that calls word_sum:

    typedef struct {
      int a;
      int *p;
    } str1;
    
    typedef struct {
      int sum;
      int diff;
    } str2;
    
    str2 word_sum(str1 s1) {
      str2 result;
      result.sum = s1.a + *s1.p;
      result.diff = s1.a - *s1.p;
      return result;
    }
    
    int prod(int x, int y)
    {
      str1 s1;
      str2 s2;
      s1.a = x;
      s1.p = &y;
      s2 = word_sum(s1);
      return s2.sum * s2.diff;
    }
    

    GCC generates the following code for these two functions:

    word_sum:
      pushl %ebp
      movl %esp, %ebp
      pushl %ebx
      movl 8(%ebp), %eax
      movl 12(%ebp), %ebx
      movl 16(%ebp), %edx
      movl (%edx), %edx
      movl %ebx, %ecx
      subl %edx, %ecx
      movl %ecx, 4(%eax)
      addl %ebx, %edx
      movl %edx, (%eax)
      popl %ebx
      popl %ebp
      ret $4
    
    prod:
      pushl %ebp
      movl %esp, %ebp
      subl $20, %esp
      leal 12(%ebp), %edx
      leal -8(%ebp), %ecx
      movl 8(%ebp), %eax
      movl %eax, 4(%esp)
      movl %edx, 8(%esp)
      movl %ecx, (%esp)
      call word_sum
      subl $4, %esp
      movl -4(%ebp), %eax
      imull -8(%ebp), %eax
      leave
      ret
    

    The instruction ret $4 is like a normal return instruction, but it increments the stack pointer by 8 (4 for the return address plus 4 additional), rather than 4.

    A. We can see in lines 5–7 of the code for word_sum that it appears as if three
    values are being retrieved from the stack, even though the function has only a single argument. Describe what these three values are.

    B. We can see in line 4 of the code for prod that 20 bytes are allocated in the stack frame. These get used as five fields of 4 bytes each. Describe how each of these fields gets used.

    C. How would you describe the general strategy for passing structures as arguments to a function?

    D. How would you describe the general strategy for handling a structure as a return value from a function?

    ANSWER

    /* A */
    /**
     * movl (%edx), %edx  ; 取地址操作,对应*s1.p,可知 12(%ebp) 对应 s1.p, 根据 str1 的结构关系,我们可以知道 16(%ebp) 对应 s1.v
     * movl %ebx, %ecx    ; 
     * subl %edx, %ecx    ; - 操作,对应 s1.a - *s1.p;
     * movl %ecx, 4(%eax) ; 
     * addl %ebx, %edx    ; + 操作,对应 s1.a + *s1.p;
     * movl %edx, (%eax)  ; 根据这几个赋值操作,可以推断出 %eax, 也就是 8(%ebp) 对应result, 4(%eax) = result.sum, (%eax) = result.prod
     */
      line 5 ~ line 7 的几个值:
      8(%ebp)  = %eax : result
      12(%ebp) = %ebx : s1.p
      16(%ebp) = %edx : s1.v
    
    /* B */
    /* 不严谨地说,movl就是揭示这个地址对应的意义的操作 */
    /**
     * prod:
     *   pushl %ebp
     *   movl %esp, %ebp
     *   subl $20, %esp
     *   leal 12(%ebp), %edx  ; 12(%ebp)说明是传参,leal说明是取地址并将地址的值存入%edx指向的内存,可推断出是 &x
     *   leal -8(%ebp), %ecx  ; 
     *   movl 8(%ebp), %eax   ; 8(%ebp)说明是传参,movl 说明是取8(%ebp)指向的内存中的值存入%edx指向的内存,可推断出是 y
     *   movl %eax, 4(%esp)   ; 赋值操作,将y赋值给某个变量,可知是赋给s1.v,故得知 s1.v 存在 %esp+4 = %ebp-16
     *   movl %edx, 8(%esp)   ; 同理,s1.p 存在 -12(%ebp)
     *   movl %ecx, (%esp)    ; 取了 %ebp-8 这个地址存到了(%esp)里,这个地址目前还不知道什么意义。但是当我们看完了代码之后,能知道%ebp-8是指向s2的,那么%ebp - 8存的就是&s2
     *   call word_sum        ;
     *   subl $4, %esp        ;
     *   movl -4(%ebp), %eax  ;
     *   imull -8(%ebp), %eax ; 由上行和这行可以推出s2在-8(%ebp), 且s2.prod 在 %ebp-4, s2.sum 在 %ebp-8
     *   leave
     *   ret
     */
       -4(%ebp) : s2.prod
       -8(%ebp) : s2.num
      -12(%ebp) : s1.p
      -16(%ebp) : s1.v
      -20(%ebp) : &s2
    
    /* C */
    传入结构体参数时的传入方法:
    从word_sum的汇编代码我们可以看到,向函数传入结构体参数的通用策略是:将结构体参数的成员的值分别作为参数传入函数。
    通俗地说,就是将结构体参数拆开再传入。
    
    /* D */
    函数怎么将一个结构体作为返回值返回:
    首先,ret指令返回的是%eax指向的内存存储的值。
    然后,看word_sum的汇编代码,%eax存储的是结构体result的地址。(赋值操作是movl %edx,(%eax), 也就是赋给%eax存储的值指向的地址)
    所以,函数返回的是结构体变量的地址。
    

    3.65

    In the following code, A and B are constants defined with #define:

    typedef struct {
      short x[A][B]; /* Unknown constants A and B */
      int y;
    } str1;
    
    typedef struct {
      char array[B];
      int t;
      short s[B];
      int u;
    } str2;
    
    void setVal(str1 *p, str2 *q) {
      int v1 = q->t;
      int v2 = q->u;
      p->y = v1+v2;
    }
    

    GCC generates the following code for the body of setVal:

    movl    12(%ebp), %eax
    movl    36(%eax), %edx
    addl    12(%eax), %edx
    movl    8(%ebp), %eax
    movl    %edx, 92(%eax)
    

    What are the values of A and B? (The solution is unique.)

    ANSWER

    movl    12(%ebp), %eax  ; gets q
    movl    36(%eax), %edx  ; 结合下一行,由于数据对齐的原因,只能推出 16+2*B 在 [33,36]范围内都是可能的, 不能直接说 16+2*B = 36。 从此句得知 B ∈ [8.5, 10]
    addl    12(%eax), %edx  ; B ∈ [9,12], 结合上一句的推理结果,B = 9 或 10。
    movl    8(%ebp), %eax   ; gets p
    movl    %edx, 92(%eax)  ; A*B*2 ∈ [89 , 92], A * B ∈ [44.5 , 46]
    
    ; 所以 B = 9 或 10, 且需满足 A * B ∈ [44.5 , 46] ∧ A ∈ N+(正整数集)
    ; 设 B = 9, 则 A ∈ [44.5/9, 46/9]。由于 [44.5/9, 46/9] ∩ N+ = 5,所以 A = 5 是有效的数字。
    ; 设 B = 10, 则 A ∈ [4.45, 4.6], 这个区间内并没有整数,[4.45, 4.6] ∩ N+ = φ,没有能满足条件的值,因此 B 不能取10。
    ; 综上所述,B = 9 且 A = 5
    
    A = 5, B = 9
    ; 这道题中文版和英文版数据不一样,所以结果也不一样
    ; 中文版的按照上面的流程走一遍,得到答案是: A = 3, B = 7
    
  • 相关阅读:
    【2020NOI.AC省选模拟#5】C. 光滑序列
    【2020NOI.AC省选模拟#2】A. 旋转
    【2020NOI.AC省选模拟#1】B. Trie
    Redis 配置
    Redis 删除策略
    Redis 事务
    Redis 持久化
    Redis 通用指令
    Redis 数据类型实战案例
    Redis sorted_set
  • 原文地址:https://www.cnblogs.com/khunkin/p/10206676.html
Copyright © 2011-2022 走看看