zoukankan      html  css  js  c++  java
  • fstrict-aliasing

    承如“optimization blocks”文中所述,由于相同的指针可能指向相关的内存区,因此编译器将不做过分的优化……

    特意搜了下编译器在不同的优化等级下都有哪些默认优化,因此有了此记录(比较长,尽管只讲述了fstrict-aliasing  ……)

    下述内容引述自:http://dbp-consulting.com/tutorials/StrictAliasing.html

                               http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html

                               http://stackoverflow.com/questions/98650/what-is-the-strict-aliasing-rule

    gcc -v
            gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5)
    arm-none-arm-none-linux-gnueabi-gcc -v
            gcc version 4.3.2 (Sourcery G++ Lite 2008q3-72)
    首先看下在不同优化等级下gcc的默认优化参数(此处重点关注fstrict-aliasing  在O0(默认优化等级),O2(一般使用时的优化等级)
    下的开启情况)
    gcc -Q -O0 --help=optimizers -fstrict-aliasing [disabled] arm-gcc -Q -O0 --help=optimizers -fstrict-aliasing [disabled] gcc -Q -O2 --help=optimzers -fstrict-aliasing [enabled] arm-gcc -Q -O0 --help=optimizers -fstrict-aliasing [enabled] 在O0等级下,无论是否显式的添加fstrict-aliasing,fstrict-aliasing都被禁用 在O2等级下,默认开启fstrict-aliasing,可以显式的添加fno-strict-aliasing以禁用fastrict-aliasing

    先看下程序和运行结果(分arm与x86)

    示例file1.c、
    unsigned int swap_words(unsigned int arg)
    {
        unsigned short int* const sp = (unsigned short int*)&arg;
        unsigned short int lo = sp[0];
        unsigned short int hi = sp[1];
    
        sp[0] = hi;
        sp[1] = lo;
        return arg;
    }
    
    int main(void)
    {
        int x = 0x12345678;
        x = swap_words(x);
        printf("%08x
    ", x);
        return 0;
    }
    gcc -m32 file1.c -o mytest : 56781234
    swap_words:
        pushl   %ebp
        movl    %esp,      %ebp
        subl    $16,       %esp
        leal    8(%ebp),   %eax
        movl    %eax,      -8(%ebp)   ;sp
        movl    -8(%ebp),  %eax
        movzwl  (%eax),    %eax       ;arg    
        movw    %ax,       -2(%ebp)   ;arg(l) 
        movl    -8(%ebp),  %eax
        addl    $2,        %eax
        movzwl  (%eax),    %eax
        movw    %ax,       -4(%ebp)   ;arg(h) 
        movl    -8(%ebp),  %eax
        movzwl  -4(%ebp),  %edx
        movw    %dx,       (%eax)     ;arg: arg(h)arg(h)
        movl    -8(%ebp),  %eax
        leal    2(%eax),   %edx
        movzwl  -2(%ebp),  %eax      
        movw    %ax,       (%edx)     ;arg: arg(l)arg(h)
        movl    8(%ebp),   %eax
        leave   
        ret
    
    gcc -m32 -O2 file1.c -o mytest: 12345678
    swap_words:
        pushl   %ebp
        movl    %esp,      %ebp
        movzwl  8(%ebp),   %eax      ;arg    
        movzwl  10(%ebp),  %edx      ;arg(h) 
        movw    %ax,       10(%ebp)  ;arg(l)arg(l)
        movl    8(%ebp),   %eax      ;arg(l)arg(l)
        movw    %dx,       8(%ebp)   ;arg(l)arg(h)
        popl    %ebp    
        ret
    尽管在swap_words中正确修改了调用者的栈上的值,但是在main中更本就不调用swap_words,
    eax的值与我们的原意也不同
    
    gcc -m32 -fno-strict-aliasing -O2 file1.c -o mytest :56781234
    swap_words:
        pushl   %ebp
        movl    %esp,      %ebp
        movzwl  8(%ebp),   %eax
        movzwl  10(%ebp),  %edx
        movw    %ax,       10(%ebp)
        movw    %dx,       8(%ebp) 
        movl    8(%ebp),   %eax
        popl    %ebp    
        ret
    正确计算了返回值
    
    arm-gcc file1.c -o mytest: 56781234
    swap_words:
        str     fp, [sp, #-4]!
        add     fp, sp, #0
        sub     sp, sp, #20 
        str     r0, [fp, #-16]      ;arg
        sub     r3, fp, #16        
        str     r3, [fp, #-12]      ;sp  
        ldr     r3, [fp, #-12]
        ldrh    r3, [r3, #0]        
        strh    r3, [fp, #-8]       ;arg[l]
        ldr     r3, [fp, #-12]
        add     r3, r3, #2
        ldrh    r3, [r3, #0]     
        strh    r3, [fp, #-6]       ;arg[h]
        ldr     r2, [fp, #-12]
        ldrh    r3, [fp, #-6]    
        strh    r3, [r2, #0]        ;arg:arg(l)arg(l)
        ldr     r3, [fp, #-12]
        add     r2, r3, #2
        ldrh    r3, [fp, #-8]    
        strh    r3, [r2, #0]        ;arg:arg(l)arg(h)
        ldr     r3, [fp, #-16]
        mov     r0, r3  
        add     sp, fp, #0
        ldmfd   sp!, {fp}
        bx      lr 
    
    arm-gcc file1.c -O2 -o mytest: 12345678
    swap_words:
        sub sp, sp, #8
        str r0, [sp, #4]
        add sp, sp, #8
        bx  lr  
    因为实施了fstrict-aliasing优化,因此,swap_words中做了非常大的优化,以至于不做任何计算,直接返回r0
    
    arm-gcc file1.c -O2 -fno-strict-aliasing -o mytest: 56781234
    swap_words:
        sub sp, sp, #8
        str r0, [sp, #4]
        ldrh    r3, [sp, #4]
        ldrh    r2, [sp, #6]
        strh    r3, [sp, #6]    @ movhi   
        ldr r0, [sp, #4]
        add sp, sp, #8
        bx  lr  
    当我们加上fno-strict-aliasing参数时,swap_words正确计算了返回值,尽管file1.c违背了strict-aliasing规则

    先以如上实例阐述一些概念

    通过以上的对比分析,我们看到当加入有效的fstrict-aliasing时,编译器做了大量的优化,由于我们的程序违背了strict-aliasing规则,但是这是由于我们程序员自身造成的,与编译器本身的优化无关。那么,如果我们既想编译器做出大量的优化,同时对于违背strict-aliasing规则的程序发出警告,该怎么办了????加入-Wstrict-aliasing编译参.
    例如:
    arm-none-linux-gnueabi-gcc -O2 -Wstrict-aliasing file1.c -o mytest,会有如下输出:
    warning: likely type-punning may break strict-aliasing rules
    当我们遇到这类输出时一定要注意(要么我们就使用fno-strict-aliasing参数,例如内核,使用fno-strict-aliasing后只是在某些部分没有做优化,但不会导致编译出的程序与我们的原意相反)


    关于c/c++中strict aliasing记录(重点是c)
    aliasing is when more than one lvalue refers to the same memory location.

    strict aliasing is an assumption, made by the C (or C++) compiler, that dereferencing pointers to objects of different types will never refer to the same memory location. Enable this option in GCC with -fstrict-aliasing flag. Be sure that all code can safely run with this rule enabled. Enable strict aliasing related warnings with -Wstrict-aliasing, but do not expect to be warned in all cases. In order to discover aliasing problems as quickly as possible, -fstrict-aliasing should always be included in the compilation flags for GCC. Otherwise problems may only be visible at the highest optimization levels where it is the most difficult to debug(如果我们使用参数-fno-strict-warning,有些优化将不被实施,而且对于一些潜在的转换错误,编译器不会报出)

    the compiler writers know what the strict aliasing rules are for. They are written to let compiler writers known when they can safely assume that a change made through one variable won't affect the value of another variable.
    Conversely, the compiler must act very conservatively to access the memory when the compiler cannot assume that two object are not aliased.

    在编译选项中加入-fstrict-aliasing的优势在于向编译器说明不同类型的lvalue将指向不相关的内存区域,编译器可以做大量的优化。在编译内核的编译选项CFLAGS中,加入了-fno-strict-aliasing,向编译器表明不同类型的lvalue可能指向相关的内存区域,因此编译器不会做出一些极端的优化而造成不安全(内核编译中优化选项为-O2, -O2优化时默认是-fstrict-aliasing,因此需要显
    式的指出编译参数是-fno-strict-aliasing)

    1、编译器设计者知道一些左值量会指向相关的内存区域,因此不会做出一些不安全的优化(即使编译选项显式的指出为-fstrict-aliasing,也不会做极端的优化,因为这些类型的左值量有可能指向相关的内存的区域);
    2、当我们给编译器加入-fstrict-aliasing选项时,就意味着使用编译器的程序员清晰地向编译器表明:不同类型的左值量不会指向相关的内存区域,编译器可以做出大量的优化,由此产生的不良后果由程序员负责,而非编译器设计者负责;
       当使用-fstrict-aliasing时,有如下基本示例表明不同类型的左值将不会指向相关的内存区域:
       (1)pointers to different built in types do not alias
       (2)pointers to aggregate or union types with differing tags do not alias
       (3)pointers to aggregate or union types which differ only by name may alias

    在 1 中提到:一些左值量可能指向相关的内存区域,那么都有哪些可能呢?,如下:                                         
        An object shall have its stored value accessed only by an lvalue expression that has one of the following types:
        (1)— a type compatible with the effective type of the object,
        (2)— a qualified version of a type compatible with the effective type of the object,
        (3)— a type that is the signed or unsigned type corresponding to the effective type of the object,
        (4)— a type that is the signed or unsigned type corresponding to a qualified version of the effective type of the object,
        (5)— an aggregate or union type that includes one of the aforementioned types among its members (including, recursively, a member of a subaggregate or contained union), or
        (6)— a character type.
    即:
        (1)Things that are compatible types or differ only by the addition of any combination of signed, unsigned, or volatile. For most purposes compatible type just means the same type. If you want more details you can read the specs. (Example: If you get a pointer to long, and a pointer to const unsigned long they could point to the same thing.)
        (2)An aggregate (struct or class) or union type can alias types contained inside them. (Example: If a function gets passed a pointer to an int, and a pointer to a struct or union containing an int, or possibly containing another struct or union containing an int, or containing...ad infinitum, it's possible that the int* points to an int contained inside the struct or union pointed at by the other pointer.)
        (3)A character type. A char*, signed char*, or unsigned char* is specifically allowed by the specs to point to anything. That means it can alias anything in memory.
        (4)For C++ only, a possibly CV (const and/or volatile) qualified base class type of a dynamic type can alias the child type. (Example: if class dog has class animal for a base class, pointers or references to class dog and class animal can alias.)
        (5)Any lvalue has to be assumed to possibly alias to another lvalue if these rules say that they can alias. An aliasing issue is just as likely to come up with values passed by reference as it is with values passed as pointer to values. Additionally any combination of pointers and references have a possibility of aliasing.

    看完上面的内容,我们来测试下:

    示例file2.c, 此实例验证了上述的第二点(2)
    
    typedef struct
    {
        unsigned int a;
        unsigned int b;
        unsigned int c;
    }Sample;
    
    void test(unsigned int* values, 
              Sample* uniform, 
              unsigned int count)
    {
        unsigned int i = 0;
        for(i = 0; i < count; i++)
        {
            values[i] = (unsigned int)uniform->b;
        }
    }
    
    gcc file2.c -o mytest
    
    test:
        pushl   %ebp
        movl    %esp,         %ebp
        subl    $16,          %esp
        movl    $0,           -4(%ebp)       ;i      
        movl    $0,           -4(%ebp)
        jmp .L2 
    .L3:
        movl    -4(%ebp),     %eax     ;i
        sall    $2,           %eax
        addl    8(%ebp),      %eax     ;values + 4 * i 
        movl    -4(%ebp),     %edx     ;i
        sall    $2,           %edx
        addl    8(%ebp),      %edx     ;values + 4 * i 
        movl    (%edx),       %ecx
        movl    12(%ebp),     %edx
        movl    4(%edx),      %edx     ;每次重新取得uniform->b
        leal    (%ecx,%edx),  %edx    
        movl    %edx,         (%eax)
        addl    $1,           -4(%ebp)
    .L2:
        movl    -4(%ebp),     %eax     ;i      
        cmpl    16(%ebp),     %eax     ;i-count
        jb  .L3 
        leave   
        ret
    
    
    gcc file2.c -O2 -o mytest
    test:
        pushl   %ebp
        movl    %esp,        %ebp
        pushl   %esi
        movl    8(%ebp),     %edx
        pushl   %ebx
        movl    16(%ebp),    %ebx
        movl    12(%ebp),    %esi
        testl   %ebx,        %ebx
        je  .L4 
        xorl    %eax,        %eax
    .L3:
        movl    4(%esi),     %ecx   ;每次重新取得uniform->b
        addl    %ecx, (%edx,%eax,4)
        addl    $1,          %eax
        cmpl    %eax,        %ebx
        ja  .L3 
    .L4:
        popl    %ebx    
        popl    %esi    
        popl    %ebp    
        ret
    
    
    arm-gcc file2.c -O2 -o mytest:
    test:
        stmfd   sp!, {r4, r5}  
        subs    r5, r2, #0
        mov     r4, r1
        beq     .L4 
        mov     r1, #0
        mov     ip, r1
    .L3:
        ldr     r3, [r0, ip]
        ldr     r2, [r4, #4]   ;每次重新取得uniform->b
        add     r1, r1, #1
        add     r3, r3, r2
        cmp     r5, r1
        str     r3, [r0, ip]
        add     ip, ip, #4
        bhi     .L3 
    .L4:
        ldmfd   sp!, {r4, r5}
        bx      lr        

    =========================================================================

    The most commonly accepted method of converting one type of object to another is by
    using a union type;
    实例file3.c, 
    typedef union
    {
            unsigned int u32;
            unsigned short int u16[2]; 
    }U32;
    
    unsigned int swap_words(unsigned int arg)
    {
        U32 in; 
        unsigned short int lo; 
        unsigned short int hi; 
    
        in.u32 = arg;
        lo = in.u16[0];
        hi = in.u16[1];
    
        in.u16[0] = hi;
        in.u16[1] = lo;
    
        return in.u32;
    }
    
    gcc file3.c -S -o file3.s
    swap_words:
    pushl   %ebp
    movl    %esp,      %ebp
    subl    $16,       %esp
    movl    8(%ebp),   %eax       ;arg
    movl    %eax,      -8(%ebp)
    movzwl  -8(%ebp),  %eax       ;arg
    movw    %ax,       -2(%ebp)   ;arg(l) 
    movzwl  -6(%ebp),  %eax       ;arg(h) 
    movw    %ax,       -4(%ebp)   ;arg(h) 
    movzwl  -4(%ebp),  %eax
    movw    %ax,       -8(%ebp)   ;arg(h)arg(h)
    movzwl  -2(%ebp),  %eax
    movw    %ax,       -6(%ebp)   ;arg(l)arg(h)
    movl    -8(%ebp),  %eax
    leave   
    ret
    
    
    gcc file3.c -O2 -S -o file3.s
    既做了优化,有没有错误,验证上述第二点
    swap_words:
        pushl   %ebp
        movl    %esp,    %ebp
        movl    8(%ebp), %eax
        popl    %ebp    
        roll    $16,     %eax
        ret
    
    arm-gcc file3.c -S -o file3.s
    swap_words:
        str     fp, [sp, #-4]!
        add     fp, sp, #0
        sub     sp, sp, #20 
        str     r0, [fp, #-16]      ;arg
        ldr     r3, [fp, #-16]      ;arg
        str     r3, [fp, #-12]      ;arg
        ldrh    r3, [fp, #-12]      ;arg(l)
        strh    r3, [fp, #-8]       ;arg(l)
        ldrh    r3, [fp, #-10]      ;arg(h)
        strh    r3, [fp, #-6]       ;arg(h)
        ldrh    r3, [fp, #-6]       ;arg(h)
        strh    r3, [fp, #-12]      ;arg(h)arg(h)
        ldrh    r3, [fp, #-8]       ;arg(l)
        strh    r3, [fp, #-10]      ;arg(l)arg(h)
        ldr     r3, [fp, #-12]      ;arg(l)arg(h)
        mov     r0, r3
        add     sp, fp, #0
        ldmfd   sp!, {fp}
        bx      lr  
    
    arm-gcc file3.c -O2 -S -o file3.s
    //直接用了循环右移来实现,优化啊!!!
    swap_words:
        mov r0, r0, ror #16 
        bx  lr 

    =============================================================================

    Casting proper may be done between a pointer to a type and a pointer to an aggregate or union type which contains a member of a compatible type;
    
    
    实例file4.c
    unsigned int swap_words(unsigned int arg)
    {
        U32* in = (U32*)&arg;
        unsigned short int lo = in->u16[0];
        unsigned short int hi = in->u16[1];
    
        in->u16[0] = hi;
        in->u16[1] = lo;
        return (in->u32);
    }
    
    gcc -S -o file4.s
    swap_words:
        pushl   %ebp
        movl    %esp,     %ebp    
        subl    $16,      %esp    
        leal    8(%ebp),  %eax       
        movl    %eax,     -8(%ebp)   ;in
        movl    -8(%ebp), %eax
        movzwl  (%eax),   %eax       ;arg    
        movw    %ax,      -2(%ebp)   ;arg(l)
        movl    -8(%ebp), %eax
        movzwl  2(%eax),  %eax       ;arg(h) 
        movw    %ax,      -4(%ebp)   ;arg(h) 
        movl    -8(%ebp), %eax       ;in     
        movzwl  -4(%ebp), %edx       ;arg(h) 
        movw    %dx,      (%eax)     ;arg(h)arg(h)
        movl    -8(%ebp), %eax       ;in     
        movzwl  -2(%ebp), %edx       ;arg(l) 
        movw    %dx,      2(%eax)    ;arg(l)arg(h)
        movl    -8(%ebp), %eax
        movl    (%eax),   %eax       ;arg(l)arg(h)
        leave   
        ret
    
    gcc -S -O2 -o file4.s
    swap_words:
        pushl   %ebp
        movl    %esp,     %ebp    
        movzwl  8(%ebp),  %eax
        movzwl  10(%ebp), %edx
        movw    %ax,      10(%ebp)
        movw    %dx,      8(%ebp)
        movl    8(%ebp),  %eax
        popl    %ebp    
        ret
    
    
    arm-gcc file4.c -S -o file4.s
    swap_words:
        str     fp, [sp, #-4]!
        add     fp, sp, #0
        sub     sp, sp, #20 
        str     r0, [fp, #-16]  ;arg
        sub     r3, fp, #16 
        str     r3, [fp, #-12]  ;in
        ldr     r3, [fp, #-12]
        ldrh    r3, [r3, #0]    ;arg(l)
        strh    r3, [fp, #-8]   ;arg(l)
        ldr     r3, [fp, #-12]   
        ldrh    r3, [r3, #2]    ;arg(h)
        strh    r3, [fp, #-6]   ;arg(h)
        ldr     r2, [fp, #-12]
        ldrh    r3, [fp, #-6]   ;arg(h)
        strh    r3, [r2, #0]    ;arg(h)arg(h)
        ldr     r2, [fp, #-12]
        ldrh    r3, [fp, #-8]   ;arg(l)
        strh    r3, [r2, #2]    ;arg(l)arg(h)
        ldr     r3, [fp, #-12]
        ldr     r3, [r3, #0]    ;arg(l)arg(h)
        mov     r0, r3  
        add     sp, fp, #0
        ldmfd   sp!, {fp}
        bx      lr      
    
    arm-gcc file4.c -O2 -S -o file4.s
    swap_words:
        sub     sp, sp, #8
        add     r3, sp, #8
        str     r0, [r3, #-4]!  ;arg
        ldrh    r2, [sp, #4]    ;arg(l)
        ldrh    r1, [r3, #2]    ;arg(h)
        strh    r1, [sp, #4]    ;arg(h)arg(h)
        strh    r2, [r3, #2]    ;arg(l)arg(h)
        ldr     r0, [sp, #4]    ;arg(l)arg(h)
        add     sp, sp, #8
        bx      lr 

    ============================================================================

    实例file5.c(因为对于部分结构来说空间较大,用指针传递)
    void swap_words(unsigned int *arg)
    {
        U32* combined = (U32*)arg;
        unsigned int start = combined->u32;
        unsigned int lo = start >> 16;
        unsigned int hi = start << 16;
    
        unsigned int final = lo | hi;
    
        combined->u32 = final;
    }
    
    gcc file5.c -S -o file5.s
    swap_words:
        pushl   %ebp
        movl    %esp,       %ebp    
        subl    $32,        %esp    
        movl    8(%ebp),    %eax    
        movl    %eax,       -4(%ebp)  ;combined 
        movl    -4(%ebp),   %eax
        movl    (%eax),     %eax    
        movl    %eax,       -8(%ebp)  ;start  
        movl    -8(%ebp),   %eax
        shrl    $16,        %eax    
        movl    %eax,       -12(%ebp) ;lo
        movl    -8(%ebp),   %eax
        sall    $16,        %eax    
        movl    %eax,       -16(%ebp) ;hi
        movl    -16(%ebp),  %eax      ;hi     
        movl    -12(%ebp),  %edx      ;lo     
        orl     %edx,       %eax      ;hi | lo
        movl    %eax,       -20(%ebp) ;final
        movl    -4(%ebp),   %eax
        movl    -20(%ebp),  %edx
        movl    %edx,       (%eax)    ;combined->u32 = final
        leave   
        ret
    
    gcc file5.c -O2 -S -o file5.s  
    swap_words:
        pushl   %ebp
        movl    %esp,    %ebp
        movl    8(%ebp), %eax
        roll    $16,     (%eax)
        popl    %ebp    
        ret
    
    arm-gcc file5.c -S -o file5.s 
    swap_words:
        str fp, [sp, #-4]!
        add fp, sp, #0
        sub sp, sp, #36 
        str r0, [fp, #-32]
        ldr r3, [fp, #-32]   
        str r3, [fp, #-24]   ;combined
        ldr r3, [fp, #-24]
        ldr r3, [r3, #0]     
        str r3, [fp, #-20]   ;start
        ldr r3, [fp, #-20]
        mov r3, r3, lsr #16 
        str r3, [fp, #-16]   ;lo
        ldr r3, [fp, #-20]
        mov r3, r3, asl #16 
        str r3, [fp, #-12]   ;hi
        ldr r2, [fp, #-16]
        ldr r3, [fp, #-12]
        orr r3, r2, r3
        str r3, [fp, #-8]    ;final
        ldr r2, [fp, #-24]
        ldr r3, [fp, #-8]
        str r3, [r2, #0]     ;combined->u32 = final
        add sp, fp, #0
        ldmfd   sp!, {fp}
        bx  lr  
    
    arm-gcc file5.c -O2 -S -o file5.s
    //注意与file4.c中使用同样编译选项的编译结果的区别(file5.c比较高效,
    对于小结构体而言,file3.c同样的编译选项更高效)  
    swap_words:
        ldr r3, [r0, #0]
        mov r3, r3, ror #16
        str r3, [r0, #0]
        bx  lr 

    ==============================================================================

    实例file6.c
    Occasionally a programmer may encounter the following INVALID method for creating an alias with 
    a pointer of a different type: (注意上述(2)的适用范围不包括如下情形) typedef union { unsigned short int* sp; unsigned int* wp; }U32P; unsigned int swap_words(unsigned int arg) { U32P in = {.wp = &arg}; const unsigned int hi = in.sp[1]; const unsigned int lo = in.sp[0]; in.sp[0] = hi; in.sp[1] = lo; return arg; } int main(void) { int x = 0x12345678; int y = swap_words(x); printf("x:%08x y:%08x ", x, y); return 0; } gcc file6.c -o mytest: 12345678 56781234 swap_words: pushl %ebp movl %esp, %ebp subl $16, %esp movl $0, -4(%ebp) leal 8(%ebp), %eax movl %eax, -4(%ebp) ;wp movl -4(%ebp), %eax ;sp addl $2, %eax ;sp + 2 movzwl (%eax), %eax movzwl %ax, %eax movl %eax, -8(%ebp) ;hi movl -4(%ebp), %eax movzwl (%eax), %eax movzwl %ax, %eax movl %eax, -12(%ebp) ;lo movl -4(%ebp), %eax movl -8(%ebp), %edx ;hi movw %dx, (%eax) ;sp[0] = hi movl -4(%ebp), %eax leal 2(%eax), %edx movl -12(%ebp), %eax movw %ax, (%edx) ;sp[1] = lo movl 8(%ebp), %eax ;lo:hi leave ret gcc file6.c -O2 -o mytest: 12345678 12345678 swap_words: pushl %ebp movl %esp, %ebp movzwl 8(%ebp), %eax ;arg(l) movzwl 10(%ebp), %edx ;arg(h) movw %ax, 10(%ebp) ;arg(l)arg(l) movl 8(%ebp), %eax ;arg(l)arg(l) movw %dx, 8(%ebp) ;arg(l)arg(h) popl %ebp ret 尽管在swap_words中正确修改了调用者的栈上的值,但是在main中更本就不调用用swap_words eax的值与我们的原意也不同 arm-gcc -o mytest : 12345678 56781234 swap_words: str fp, [sp, #-4]! add fp, sp, #0 sub sp, sp, #28 str r0, [fp, #-24] ;arg mov r3, #0 str r3, [fp, #-16] sub r3, fp, #24 str r3, [fp, #-16] ;wp ldr r3, [fp, #-16] ;sp add r3, r3, #2 ldrh r3, [r3, #0] str r3, [fp, #-12] ;hi ldr r3, [fp, #-16] ;sp ldrh r3, [r3, #0] str r3, [fp, #-8] ;lo ldr r2, [fp, #-16] ;sp ldr r3, [fp, #-12] ;hi mov r3, r3, asl #16 mov r3, r3, lsr #16 strh r3, [r2, #0] ;arg(h)arg(h) ldr r3, [fp, #-16] add r2, r3, #2 ldr r3, [fp, #-8] mov r3, r3, asl #16 mov r3, r3, lsr #16 strh r3, [r2, #0] ;arg(l)arg(h) ldr r3, [fp, #-24] ;arg(l)arg(h) mov r0, r3 add sp, fp, #0 ldmfd sp!, {fp} bx lr arm-gcc -O2 -o mytest : 12345678 12345678 swap_words: sub sp, sp, #8 str r0, [sp, #4] add sp, sp, #8 bx lr 因为实施了fstrict-aliasing优化,因此,swap_words中做了非常大的优化,以至于不做任何计算,直接返回r0 The problem with this method is although U32P does in fact say that sp is an alias for wp,
    it does not say anything about the relationship between the values pointed to by sp and wp.
    This differs in a critical way from the experiment in "file3.c & file4.c" which both define
    aliases for the values being pointed to, not the pointers themselves.

    =========================================================================

    对于上述情形(3)的说明:
    It is always presumed that a char* may refer to an alias of any object. It is therefore quite safe, if perhaps a bit unoptimal (for architecture with wide loads and stores) to cast any pointer of any type to a char* type. (即由于char*可能与任何类型的左值相同,因此编译器不会做过分的优化,无论是否指定fstrict-aliasing参数选项都在安全范围内做优化,使用char*带来的问题是可能存在多次load/store,可能会降低性能)

    如下代码,使用-O0, 与-O2编译出的程序执行效果相同,都可以交换字内的字节序(注意情形(3))
    unsigned int swap_words(unsigned int arg)
    {
        char* const cp = (char*)arg;
        const char c0 = cp[0];
        const char c1 = cp[1];
        const char c2 = cp[2];
        const char c3 = cp[3];
    
        cp[0] = c2;
        cp[1] = c3;
        cp[2] = c0;
        cp[3] = c1;
    
        return arg;
    }

    对于情形(3),相反的情况则不一定成立,即:
    The converse is not true. Casting a char* to a pointer of any type other than a char* and dereferencing it is usually in volation of the strict aliasing rule.

    大叔来了,回去洗澡,有点乱,再看看

  • 相关阅读:
    艾伟:[WCF中的Binding模型]之六(完结篇):从绑定元素认识系统预定义绑定 狼人:
    艾伟:.NET框架4.0中都有些什么? 狼人:
    艾伟:WM有约(三):下一次是什么时候? 狼人:
    艾伟:为什么微软要推 ADO.NET Data Services Framework 狼人:
    艾伟:WM有约(二):配置信息 狼人:
    艾伟:F4何去何从 大视野观察Framework 4.0 狼人:
    艾伟:[WCF的Binding模型]之三:信道监听器(Channel Listener) 狼人:
    艾伟:.NET : 如何保护内存中的敏感数据? 狼人:
    艾伟:Silverlight 2.0 之旋转木马 狼人:
    艾伟:.NET和J2EE该相互学习什么 狼人:
  • 原文地址:https://www.cnblogs.com/openix/p/3201649.html
Copyright © 2011-2022 走看看