zoukankan      html  css  js  c++  java
  • ARM Memory Copy

      1         MODULE  ARM_MEMORY
      2 
      3         PUBLIC  ARM_MEMCPY
      4         PUBLIC  ARM_MEMSET
      5         PUBLIC  ARM_MEMSET8
      6         PUBLIC  ARM_MEMSET16
      7         PUBLIC  ARM_MEMSET32
      8 
      9         SECTION .text:CODE:NOROOT(2)
     10         CODE32
     11 
     12 ;-------------------------------------------------------------------------------
     13 ; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes)
     14 ;
     15 ; Function description
     16 ;   Copy data in memory from source address to destination address.
     17 ;
     18 ; Register usage:
     19 ;
     20 ;   R0    pDest
     21 ;   R1    pSrc
     22 ;   R2    NumBytes
     23 ;
     24 ;   R3    Used for data transfers
     25 ;   R4    Used for data transfers
     26 ;   R12   Used for data transfers
     27 ;   R14   Used for data transfers
     28 ;
     29 ;   R13   SP
     30 ;   R14   LR (contains return address)
     31 ;   R15   PC
     32 ;
     33 ;-------------------------------------------------------------------------------
     34 ARM_MEMCPY:
     35 ;-------------------------------------------------------------------------------
     36         cmp         R2, #+3                           ; R2 = NumBytes
     37         bls         ARM_MEMCPY_HandleTrailingBytes    ; If we have less than one complete word, use single byte transfer
     38 
     39         ands        R12, R0, #+3                      ; R0 = destination address
     40         beq         ARM_MEMCPY_DestIsDWordAligned     ; Is destination address already word aligned ?
     41 
     42 ;-------------------------------------------------------------------------------
     43 ; Handle as much bytes as necessary to align destination address
     44 ;
     45         ldrb        R3, [R1], #+1                     ; We need at least one byte to the next word alignment, so we read one.
     46         cmp         R12, #+2                          ; Set condition codes according to the mis-alignment
     47         add         R2, R2, R12                       ; Adjust NumBytes : 1, 2, 3
     48         ldrbls      R12, [R1], #+1                    ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
     49         strb        R3, [R0], #+1
     50         ldrbcc      R3, [R1], #+1                     ; Carry clear (CC)? -> We need one more byte
     51         strbls      R12, [R0], #+1
     52         sub         R2, R2, #+4                       ; Adjust NumBytes
     53         strbcc      R3, [R0], #+1                     ; now destination address already is word aligned
     54 
     55 ;-------------------------------------------------------------------------------
     56 ; Choose best way to transfer data
     57 ;
     58 ARM_MEMCPY_DestIsDWordAligned:
     59         ands        R3, R1, #+3
     60         beq         ARM_MEMCPY_HandleBulkWordData     ; If source and destination are aligned, use bulk word transfer
     61 
     62         subs        R2, R2, #+4
     63         bcc         ARM_MEMCPY_HandleTrailingBytes    ; If we have less than one complete word left, use single byte transfer
     64 
     65         ldr         R12, [R1, -R3]!                   ; Read first mis-aligned data word and word align source address
     66         cmp         R3, #+2
     67         beq         ARM_MEMCPY_Loop16BitShift
     68 
     69         bhi         ARM_MEMCPY_Loop24BitShift
     70 
     71 ;-------------------------------------------------------------------------------
     72 ; Handle data in units of word
     73 ;
     74 ; This is done by reading mis-aligned words from source address and
     75 ; shift them into the right alignment. After this the next data word
     76 ; will be read to complete the missing data part.
     77 ;
     78 ARM_MEMCPY_Loop8BitShift:
     79         mov         R3, R12, LSR #+8           ; Shift data word into right position
     80         ldr         R12, [R1, #+4]!            ; Load next mis-aligned data word
     81         subs        R2, R2, #+4                ; Decrement NumBytes
     82         orr         R3, R3, R12, LSL #+24      ; Combine missing part of data to build full data word
     83         str         R3, [R0], #+4              ; Store complete word
     84         bcs         ARM_MEMCPY_Loop8BitShift
     85 
     86         add         R1, R1, #+1                ; Adjust source address
     87         b           ARM_MEMCPY_HandleTrailingBytes         ; Handle trailing bytes
     88 
     89 ARM_MEMCPY_Loop16BitShift:
     90         mov         R3, R12, LSR #+16          ; Shift data word into right position
     91         ldr         R12, [R1, #+4]!            ; Load next mis-aligned data word
     92         subs        R2, R2, #+4                ; Decrement NumBytes
     93         orr         R3, R3, R12, LSL #+16      ; Combine missing part of data to build full data word
     94         str         R3, [R0], #+4              ; Store complete word
     95         bcs         ARM_MEMCPY_Loop16BitShift
     96 
     97         add         R1, R1, #+2                ; Adjust source address
     98         b           ARM_MEMCPY_HandleTrailingBytes         ; Handle trailing bytes
     99 
    100 ARM_MEMCPY_Loop24BitShift:
    101         mov         R3, R12, LSR #+24          ; Shift data word into right position
    102         ldr         R12, [R1, #+4]!            ; Load next mis-aligned data word
    103         subs        R2, R2, #+4                ; Decrement NumBytes
    104         orr         R3, R3, R12, LSL #+8       ; Combine missing part of data to build full data word
    105         str         R3, [R0], #+4              ; Store complete word
    106         bcs         ARM_MEMCPY_Loop24BitShift
    107 
    108         add         R1, R1, #+3                ; Adjust source address
    109         b           ARM_MEMCPY_HandleTrailingBytes         ; Handle trailing bytes
    110 
    111 ;-------------------------------------------------------------------------------
    112 ; Handle large bulk data in blocks of 8 words (32 bytes)
    113 ;
    114 ARM_MEMCPY_HandleBulkWordData:
    115         subs        R2, R2, #+0x20
    116         stmdb       SP!, {R4, LR}
    117         bcc         ARM_MEMCPY_HandleTrailingWords
    118 
    119 ARM_MEMCPY_LoopHandleBulkWord:
    120         ldm         R1!, {R3, R4, R12, LR}     ; Transfer 16 bytes at once
    121         stm         R0!, {R3, R4, R12, LR}
    122         ldm         R1!, {R3, R4, R12, LR}     ; Transfer 16 bytes at once
    123         stm         R0!, {R3, R4, R12, LR}
    124         subs        R2, R2, #+0x20
    125         bcs         ARM_MEMCPY_LoopHandleBulkWord
    126 
    127 ;-------------------------------------------------------------------------------
    128 ; Handle trailing 7 words
    129 ;
    130 ARM_MEMCPY_HandleTrailingWords:
    131         movs        R12, R2, LSL #28           ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    132 
    133         ldmcs       R1!, {R3, R4, R12, LR}     ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
    134         stmcs       R0!, {R3, R4, R12, LR}
    135         ldmmi       R1!, {R3, R4}              ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
    136         stmmi       R0!, {R3, R4}
    137 
    138         movs        R12, R2, LSL #+30          ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    139 
    140         ldmia       SP!, {R4, LR}
    141         ldrcs       R3, [R1], #+4              ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
    142         strcs       R3, [R0], #+4
    143         bxeq        LR
    144 
    145 ;-------------------------------------------------------------------------------
    146 ; Handle trailing 3 bytes
    147 ;
    148 ; N Z C V Q  ***** I F T M4 3 2 1 0
    149 ; N = bit[31]
    150 ; C = last shift bit : shift
    151 ; C = 1 ADD/CMN has carry bit
    152 ; C = 0 SUB/CMP no borrow bit
    153 ; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1
    154 ; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0
    155 ; BMI : N=1
    156 ; BCS : C=1
    157 ARM_MEMCPY_HandleTrailingBytes:
    158         movs        R2, R2, LSL #+31           ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    159 
    160         ldrbmi      R2, [R1], #+1              ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
    161         ldrbcs      R3, [R1], #+1              ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
    162         ldrbcs      R12, [R1], #+1
    163         strbmi      R2, [R0], #+1
    164         strbcs      R3, [R0], #+1
    165         strbcs      R12, [R0], #+1
    166         bx          LR
    167 
    168 
    169 ;-------------------------------------------------------------------------------
    170 ; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes)
    171 ;
    172 ; Function description
    173 ;   Copy data in memory from source address to destination address.
    174 ;
    175 ; Register usage:
    176 ;
    177 ;   R0    pDest
    178 ;   R1    c
    179 ;   R2    NumBytes
    180 ;
    181 ;   R3    Used for data transfers
    182 ;   R4    Used for data transfers
    183 ;   R5    Used for data transfers
    184 ;   R6    Used for data transfers
    185 ;
    186 ;   R13   SP
    187 ;   R14   LR (contains return address)
    188 ;   R15   PC
    189 ;
    190 ;-------------------------------------------------------------------------------
    191 ARM_MEMSET:
    192 ;-------------------------------------------------------------------------------
    193         orr         R1, R1, R1, LSL #+8
    194         orr         R1, R1, R1, LSL #+16
    195 
    196         cmp         R2, #+3                           ; R2 = NumBytes
    197         bls         ARM_MEMSET_HandleTrailingBytes    ; If we have less than one complete word, use single byte transfer
    198 
    199         ands        R3, R0, #+3                       ; R0 = destination address
    200         beq         ARM_MEMSET_DestIsAligned          ; Is destination address already word aligned ?
    201 
    202 ; Handle as much bytes as necessary to align destination address
    203 
    204         strb        R1, [R0], #+1              ; We need at least one byte to the next word alignment, so we read one.
    205         cmp         R3, #+2                    ; Set condition codes according to the mis-alignment
    206         add         R2, R2, R3                 ; Adjust NumBytes
    207         strbls      R1, [R0], #+1              ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
    208         sub         R2, R2, #+4                ; Adjust NumBytes
    209         strbcc      R1, [R0], #+1              ; Carry clear (CC)? -> We need one more byte
    210 
    211 ; Choose best way to transfer data
    212 
    213 ARM_MEMSET_DestIsAligned:                      ; destination is aligned, use bulk word transfer
    214 
    215 ; Handle large bulk data in blocks of 8 words (32 bytes)
    216 
    217 ARM_MEMSET_HandleBulkWordData:
    218         stmdb       SP!, {R4, R5, R6}
    219 
    220         mov         R3, R1, LSL #+0           ; Transfer 16 bytes at once
    221         mov         R4, R1, LSL #+0
    222         mov         R5, R1, LSL #+0
    223 
    224         subs        R2, R2, #+0x20             ; 32 Bytes = 8 DWords
    225         bcc         ARM_MEMSET_HandleTrailingWords
    226 
    227 ARM_MEMSET_LoopHandleBulkWord:
    228         stm         R0!, {R1, R3, R4, R5}
    229         stm         R0!, {R1, R3, R4, R5}
    230         subs        R2, R2, #+0x20
    231         bcs         ARM_MEMSET_LoopHandleBulkWord
    232 
    233 
    234 ; Handle trailing 7 words
    235 
    236 ARM_MEMSET_HandleTrailingWords:
    237         movs        R6, R2, LSL #28            ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    238         stmcs       R0!, {R1, R3, R4, R5}      ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
    239         stmmi       R0!, {R1, R3}              ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
    240 
    241         movs        R6, R2, LSL #+30           ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    242         strcs       R1, [R0], #+4              ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
    243 
    244         ldmia       SP!, {R4, R5, R6}
    245         bxeq        LR                         ; Z flag contain no Trailing Bytes
    246 
    247 
    248 ; Handle trailing 3 bytes
    249 
    250 ARM_MEMSET_HandleTrailingBytes:
    251         movs        R2, R2, LSL #+31           ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    252         strbmi      R1, [R0], #+1              ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
    253         strbcs      R1, [R0], #+1              ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
    254         strbcs      R1, [R0], #+1
    255         bx          LR
    256 
    257 
    258 ;      int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes);
    259 ;-------------------------------------------------------------------------------
    260 ARM_MEMSET8:
    261 ;-------------------------------------------------------------------------------
    262         stmdb       SP!, {R4, R5}
    263         cmp         R2, #4
    264         blt         ARM_MEMSET8_loop3
    265 
    266         ; Alignment is unknown
    267         tst         R0, #1
    268         strneb      R1, [R0], #1
    269         subne       R2, R2, #1
    270 
    271         ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
    272         orr         R1, R1, R1, LSL #8
    273         tst         R0, #2
    274         strneh      R1, [R0], #2
    275         subne       R2, R2, #2
    276 
    277         ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
    278         orr         R1, R1, R1, LSL #16
    279         mov         R3, R1
    280         cmp         R2, #16
    281         blt         ARM_MEMSET8_loop2
    282         tst         R0, #4
    283         strne       R1, [R0], #4
    284         subne       R2, R2, #4
    285         tst         R0, #8
    286         stmneia     R0!, {R1, R3}
    287         subne       R2, R2, #8
    288 
    289         ; Now we are 128-bit aligned
    290         mov         R4, R1
    291         mov         R5, R1
    292 ARM_MEMSET8_loop1:
    293         ; Copy 4 32-bit values per loop iteration
    294         subs        R2, R2, #16
    295         stmgeia     R0!, {R1, R3, R4, R5}
    296         bge         ARM_MEMSET8_loop1
    297         add         R2, R2, #16
    298 
    299 ARM_MEMSET8_loop2:
    300         ; Copy up to 3 remaining 32-bit values
    301         tst         R2, #8
    302         stmneia     R0!, {R1, R3}
    303         tst         R2, #4
    304         strne       R1, [R0], #4
    305         and         R2, R2, #3
    306 
    307 ARM_MEMSET8_loop3:
    308         ; Copy up to 3 remaining bytes
    309         subs        R2, R2, #1
    310         strgeb      R1, [R0], #1
    311         subs        R2, R2, #1
    312         strgeb      R1, [R0], #1
    313         subs        R2, R2, #1
    314         strgeb      R1, [R0], #1
    315         ldmia       SP!, {R4, R5}
    316         bx          LR
    317 
    318 ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords);
    319 ;-------------------------------------------------------------------------------
    320 ARM_MEMSET16:
    321 ;-------------------------------------------------------------------------------
    322         stmdb       SP!, {R4, R5}
    323 
    324         cmp         R2, #2
    325         blt         ARM_MEMSET16_HandleTrailingHalfWord    ; 1 or 0
    326 
    327         ; Alignment is known to be at least 16-bit
    328         tst         R0, #2
    329         strneh      R1, [R0], #2              ; xxxx-xx10 --->
    330         subne       R2, R2, #1                ; xxxx-xx00
    331 
    332         ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
    333         orr         R1, R1, R1, LSL #16
    334         mov         R4, R1
    335 
    336         cmp         R2, #8
    337         blt         ARM_MEMSET16_HandleTrailingWords       ; 7, 6, ... 0
    338 
    339         tst         R0, #4
    340         strne       R1, [R0], #4              ; xxxx-x100 --->
    341         subne       R2, R2, #2                ; xxxx-x000 --->
    342 
    343         ; Now we are 64-bit aligned
    344         tst         R0, #8
    345         stmneia     R0!, {R1, R4}             ; xxxx-1000 --->
    346         subne       R2, R2, #4                ; xxxx-0000 --->
    347 
    348 ARM_MEMSET16_HandleBulkWordData:
    349         ; Now we are 128-bit aligned
    350         mov         R5, R1
    351         mov         R3, R1
    352 
    353 ARM_MEMSET16_LoopHandleBulkWord:
    354         ; Copy 4 32-bit values per loop iteration
    355         subs        R2, R2, #8
    356         stmgeia     R0!, {R1, R3, R4, R5}
    357         bge         ARM_MEMSET16_LoopHandleBulkWord
    358         add         R2, R2, #8
    359 
    360 ARM_MEMSET16_HandleTrailingWords:
    361         ; Copy up to 3 remaining 32-bit values
    362         tst         R2, #4
    363         stmneia     R0!, {R1, R4}
    364 
    365         tst         R2, #2
    366         strne       R1, [R0], #4
    367 
    368         and         R2, R2, #1
    369 
    370 ARM_MEMSET16_HandleTrailingHalfWord:
    371         ; Copy up to 1 remaining 16-bit value
    372         subs        R2, R2, #1
    373         strgeh      R1, [R0], #2
    374 
    375         ldmia       SP!, {R4, R5}
    376         bx          LR
    377 
    378 
    379 ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords);
    380 ;-------------------------------------------------------------------------------
    381 ARM_MEMSET32:
    382 ;-------------------------------------------------------------------------------
    383         stmdb       SP!, {R4, R5}
    384 
    385         cmp         R2, #4
    386         blt         ARM_MEMSET32_loop2
    387 
    388         ; Alignment is known to be at least 32-bit
    389         mov         R3, R1
    390 
    391         tst         R0, #4
    392         strne       R1, [R0], #4
    393         subne       R2, R2, #1
    394 
    395         ; Now we are 64-bit aligned
    396         tst         R0, #8
    397         stmneia     R0!, {R1, R3}
    398         subne       R2, R2, #2
    399 
    400         ; Now we are 128-bit aligned
    401         mov         R4, R1
    402         mov         R5, R1
    403 ARM_MEMSET32_loop1:
    404         ; Copy 4 32-bit values per loop iteration
    405         subs        R2, R2, #4
    406         stmgeia     R0!, {R1, R3, R4, R5}
    407         bge         ARM_MEMSET32_loop1
    408         add         R2, R2, #4
    409 
    410 ARM_MEMSET32_loop2:
    411         ; Copy up to 3 remaining 32-bit values
    412         subs        R2, R2, #1
    413         strge       R1, [R0], #4
    414         subs        R2, R2, #1
    415         strge       R1, [R0], #4
    416         subs        R2, R2, #1
    417         strge       R1, [R0], #4
    418 
    419         ldmia       SP!, {R4, R5}
    420         bx          LR
    421 
    422 ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes);
    423 ;                           r0         r1     r2
    424 ;-------------------------------------------------------------------------------
    425 arm_memxor:
    426 ;-------------------------------------------------------------------------------
    427         orr         R1, R1, R1, LSL #+8
    428         orr         R1, R1, R1, LSL #+16
    429 
    430         cmp         R2, #+3                     ; R2 = NumBytes
    431         bls         arm_memxor_HandleTrailingBytes        ; If we have less than one complete word, use single byte transfer
    432 
    433         ands        R3, R0, #+3                 ; R0 = destination address
    434         beq         arm_memxor_DestIsAligned              ; Is destination address already word aligned ?
    435 
    436 ;-
    437 ; Handle as much bytes as necessary to align destination address
    438 ;-
    439         ldrb        R12, [R0], #+0              ; We need at least one byte to the next word alignment, so we read one.
    440         eor         R12, R12, r1
    441         strb        R12, [R0], #+1              ; We need at least one byte to the next word alignment, so we read one.
    442 
    443         cmp         R3, #+2                    ; Set condition codes according to the mis-alignment
    444         add         R2, R2, R3                 ; Adjust NumBytes
    445 
    446         ldrbls      R3, [R0], #+0              ; We need at least one byte to the next word alignment, so we read one.
    447         eorls       R3, R3, r1
    448         strbls      R3, [R0], #+1             ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
    449 
    450         sub         R2, R2, #+4                ; Adjust NumBytes
    451 
    452         ldrbcc      R3, [R0], #+0              ; We need at least one byte to the next word alignment, so we read one.
    453         eorcc       R3, R3, r1
    454         strbcc      R3, [R0], #+1              ; Carry clear (CC)? -> We need one more byte
    455 
    456 ;-
    457 ; Choose best way to transfer data
    458 ;-
    459 arm_memxor_DestIsAligned:                                  ; destination is aligned, use bulk word transfer
    460 ;-
    461 ; Handle large bulk data in blocks of 8 words (32 bytes)
    462 ;-
    463 arm_memxor_HandleBulkWordData:
    464         stmdb       SP!, {R4, R5, R6, R7}
    465 
    466         subs        R2, R2, #+0x20             ; 32 Bytes = 8 DWords
    467         bcc         arm_memxor_HandleTrailingWords
    468 
    469 arm_memxor_LoopHandleBulkWord:
    470         ldm         R0,  {R3, R4, R5, R6}
    471         eor         r3, r3, r1
    472         eor         r4, r4, r1
    473         eor         r5, r5, r1
    474         eor         r6, r6, r1
    475         stm         R0!, {R3, R4, R5, R6}
    476 
    477         ldm         R0,  {R3, R4, R5, R6}
    478         eor         r3, r3, r1
    479         eor         r4, r4, r1
    480         eor         r5, r5, r1
    481         eor         r6, r6, r1
    482         stm         R0!, {R3, R4, R5, R6}
    483 
    484         subs        R2, R2, #+0x20
    485         bcs         arm_memxor_LoopHandleBulkWord
    486 
    487 ;-
    488 ; Handle trailing 7 words
    489 ;-
    490 arm_memxor_HandleTrailingWords:
    491         movs        R7, R2, LSL #28             ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    492 
    493         ldmcs       R0,  {R3, R4, R5, R6}
    494         eorcs       r3, r3, r1
    495         eorcs       r4, r4, r1
    496         eorcs       r5, r5, r1
    497         eorcs       r6, r6, r1
    498         stmcs       R0!, {R3, R4, R5, R6}       ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor)
    499 
    500         ldmmi       R0,  {R3, R4}
    501         eormi       r3, r3, r1
    502         eormi       r4, r4, r1
    503         stmmi       R0!, {R3, R4}                ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor)
    504 
    505         movs        R7, R2, LSL #+30            ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    506 
    507         ldrcs       R3, [R0]
    508         eorcs       r3, r3, r1
    509         strcs       R3, [R0], #+4              ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor)
    510 
    511         ldmia       SP!, {R4, R5, R6, R7}
    512         bxeq        LR                          ; Z flag contain no Trailing Bytes
    513 
    514 ;-
    515 ; Handle trailing 3 bytes
    516 ;-
    517 arm_memxor_HandleTrailingBytes:
    518         movs        R2, R2, LSL #+31           ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
    519 
    520         ldrmi       R2, [R0]
    521         eormi       R2, R2, r1
    522         strbmi      R2, [R0], #+1              ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor)
    523 
    524         ldrcs       R2, [R0]
    525         eorcs       R2, R2, r1
    526         strbcs      R2, [R0], #+1              ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor)
    527 
    528         ldrcs       R2, [R0]
    529         eorcs       R2, R2, r1
    530         strbcs      R2, [R0], #+1              ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor)
    531 
    532         bx          LR
    533 
    534 ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes);
    535 ;                           r0         r1     r2
    536 ;-------------------------------------------------------------------------------
    537 arm_memxor8:
    538 ;-------------------------------------------------------------------------------
    539         stmdb       SP!, {R4, R5, R6}
    540 
    541         orr         R1, R1, R1, LSL #+8
    542         orr         R1, R1, R1, LSL #+16
    543 
    544         cmp         R2, #4
    545         blt         arm_memxor8_loop3
    546 
    547         ; Alignment is unknown
    548         tst         R0, #1
    549 
    550         ldrneb      R6, [R0]
    551         eorne       R6, r6, R1
    552         strneb      R6, [R0], #1
    553 
    554         subne       R2, R2, #1
    555 
    556         ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
    557         tst         R0, #2
    558 
    559         ldrneh      R6, [R0]
    560         eorne       R6, r6, R1
    561         strneh      R6, [R0], #2
    562 
    563         subne       R2, R2, #2
    564 
    565         ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
    566         cmp         R2, #16
    567         blt         arm_memxor8_loop2
    568         tst         R0, #4
    569 
    570         ldrne       R6, [R0]
    571         eorne       R6, r6, R1
    572         strne       R6, [R0], #4
    573         ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
    574         subne       R2, R2, #4
    575         tst         R0, #8
    576 
    577         ldmneia     R0, {R3, R6}
    578         eorne       R3, r3, R1
    579         eorne       R6, r6, R1
    580         stmneia     R0!, {R3, R6}
    581 
    582         subne       R2, R2, #8
    583 
    584         ; Now we are 128-bit aligned
    585         mov         R4, R1
    586         mov         R5, R1
    587 arm_memxor8_loop1:
    588         ; Copy 4 32-bit values per loop iteration
    589         subs        R2, R2, #16
    590 
    591         ldmgeia     R0,  {R3, R4, R5, R6}
    592         eorge       r3, r3, r1
    593         eorge       r4, r4, r1
    594         eorge       r5, r5, r1
    595         eorge       r6, r6, r1
    596         stmgeia     R0!, {R3, R4, R5, R6}
    597 
    598         bge         arm_memxor8_loop1
    599         add         R2, R2, #16
    600 
    601 arm_memxor8_loop2:
    602         ; Copy up to 3 remaining 32-bit values
    603         tst         R2, #8
    604 
    605         ldmneia     R0, {R3, R4}
    606         eorne       r3, r3, r1
    607         eorne       r4, r4, r1
    608         stmneia     R0!, {R3, R4}
    609 
    610         tst         R2, #4
    611 
    612         ldrne       R3, [R0]
    613         eorne       r3, r3, r1
    614         strne       R3, [R0], #4
    615 
    616         and         R2, R2, #3
    617 
    618 arm_memxor8_loop3:
    619         ; Copy up to 3 remaining bytes
    620         subs        R2, R2, #1
    621 
    622         ldrgeb      R3, [R0]
    623         eorge       r3, r3, r1
    624         strgeb      R3, [R0], #1
    625 
    626         subs        R2, R2, #1
    627 
    628         ldrgeb      R3, [R0]
    629         eorge       r3, r3, r1
    630         strgeb      R1, [R0], #1
    631 
    632         subs        R2, R2, #1
    633 
    634         ldrgeb      R3, [R0]
    635         eorge       r3, r3, r1
    636         strgeb      R1, [R0], #1
    637 
    638         ldmia       SP!, {R4, R5, R6}
    639         bx          LR
    640 
    641 ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords);
    642 ;                           r0         r1     r2
    643 ;-------------------------------------------------------------------------------
    644 arm_memxor16:
    645 ;-------------------------------------------------------------------------------
    646         stmdb       SP!, {R4, R5, R6}
    647         orr         R1, R1, R1, LSL #+16
    648 
    649         cmp         R2, #2
    650         blt         arm_memxor16_HandleTrailingHalfWord    ; 1 or 0
    651 
    652         ; Alignment is known to be at least 16-bit
    653         tst         R0, #2
    654 
    655         ldrneh      R6, [R0]
    656         eorne       R6, r6, R1
    657         strneh      R6, [R0], #2              ; xxxx-xx10 --->
    658 
    659         subne       R2, R2, #1                ; xxxx-xx00
    660 
    661         ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
    662         cmp         R2, #8
    663         blt         arm_memxor16_HandleTrailingWords       ; 7, 6, ... 0
    664 
    665         tst         R0, #4
    666 
    667         ldrne       R3, [R0]
    668         eorne       r3, r3, r1
    669         strne       R3, [R0], #4              ; xxxx-x100 --->
    670 
    671 
    672         subne       R2, R2, #2                ; xxxx-x000 --->
    673 
    674         ; Now we are 64-bit aligned
    675         tst         R0, #8
    676 
    677         ldmneia     R0, {R3, R4}
    678         eorne       r3, r3, r1
    679         eorne       r4, r4, r1
    680         stmneia     R0!, {R3, R4}             ; xxxx-1000 --->
    681 
    682         subne       R2, R2, #4                ; xxxx-0000 --->
    683 
    684 arm_memxor16_HandleBulkWordData:
    685         ; Now we are 128-bit aligned
    686         mov         R5, R1
    687         mov         R6, R1
    688 
    689 arm_memxor16_LoopHandleBulkWord:
    690         ; Copy 4 32-bit values per loop iteration
    691         subs        R2, R2, #8
    692 
    693         ldmgeia     R0,  {R3, R4, R5, R6}
    694         eorge       r3, r3, r1
    695         eorge       r4, r4, r1
    696         eorge       r5, r5, r1
    697         eorge       r6, r6, r1
    698         stmgeia     R0!, {R3, R4, R5, R6}
    699 
    700         bge         arm_memxor16_LoopHandleBulkWord
    701         add         R2, R2, #8
    702 
    703 arm_memxor16_HandleTrailingWords:
    704         ; Copy up to 3 remaining 32-bit values
    705         tst         R2, #4
    706 
    707         ldmneia     R0, {R3, R4}
    708         eorne       r3, r3, r1
    709         eorne       r4, r4, r1
    710         stmneia     R0!, {R3, R4}
    711 
    712         tst         R2, #2
    713 
    714         ldrne       R3, [R0]
    715         eorne       r3, r3, r1
    716         strne       R3, [R0], #4
    717 
    718         and         R2, R2, #1
    719 
    720 arm_memxor16_HandleTrailingHalfWord:
    721         ; Copy up to 1 remaining 16-bit value
    722         subs        R2, R2, #1
    723 
    724         ldrgeh      R3, [R0]
    725         eorge       r3, r3, r1
    726         strgeh      R3, [R0], #2
    727 
    728         ldmia       SP!, {R4, R5, R6}
    729         bx          LR
    730 
    731 
    732 ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords);
    733 ;                           r0         r1     r2
    734 ;-------------------------------------------------------------------------------
    735 arm_memxor32:
    736 ;-------------------------------------------------------------------------------
    737         stmdb       SP!, {R4, R5, R6}
    738 
    739         cmp         R2, #4
    740         blt         arm_memxor32_loop2
    741 
    742         ; Alignment is known to be at least 32-bit, is it 64-bit aligned ?
    743         tst         R0, #4
    744         ; No, it is 32-bit aligned
    745         ldrne       R3, [R0]
    746         eorne       R3, r3, R1
    747         strne       R3, [R0], #4
    748         subne       R2, R2, #1
    749 
    750         ; Now we are 64-bit aligned, is it 128-bit aligned ?
    751         tst         R0, #8
    752         ; No, it is 64-bit aligned
    753         ldmneia     R0, {R3, R4}
    754         eorne       r3, r3, r1
    755         eorne       r4, r4, r1
    756         stmneia     R0!, {R3, R4}             ; xxxx-1000 --->
    757         subne       R2, R2, #2
    758 
    759         ; Now we are 128-bit aligned
    760         mov         R4, R1
    761         mov         R5, R1
    762 arm_memxor32_loop1:
    763         ; Copy 4 32-bit values per loop iteration
    764         subs        R2, R2, #4
    765 
    766         ldmgeia     R0,  {R3, R4, R5, R6}
    767         eorge       r3, r3, r1
    768         eorge       r4, r4, r1
    769         eorge       r5, r5, r1
    770         eorge       r6, r6, r1
    771         stmgeia     R0!, {R3, R4, R5, R6}
    772 
    773         bge         arm_memxor32_loop1
    774         add         R2, R2, #4
    775 
    776 arm_memxor32_loop2:
    777         ; Copy up to 3 remaining 32-bit values
    778 
    779         subs        R2, R2, #1
    780         ldrge       R3, [R0]
    781         eorge       r3, r3, r1
    782         strge       R3, [R0], #4
    783 
    784         subs        R2, R2, #1
    785         ldrge       R3, [R0]
    786         eorge       r3, r3, r1
    787         strge       R3, [R0], #4
    788 
    789         subs        R2, R2, #1
    790         ldrge       R3, [R0]
    791         eorge       r3, r3, r1
    792         strge       R3, [R0], #4
    793 
    794         ldmia       SP!, {R4, R5, R6}
    795         bx          LR
    796 
    797 
    798         END
  • 相关阅读:
    自动化测试-02-Unittest-单元测试框架介绍
    自动化测试-01-cookie认识
    Java面试汇总
    多线程总结一:基本概念
    用Java编写你自己的简单HTTP服务器
    Spring高级事务管理难点剖析
    Http协议详解
    Java 互联网工程师要具备哪些技能或技术?
    架构师都要懂哪些知识
    java学习路线
  • 原文地址:https://www.cnblogs.com/shangdawei/p/4651343.html
Copyright © 2011-2022 走看看