由于DELPHI里面动态数组的初始值并不是时时为0,对于一维数组每次使用前先SetLength后然一般做法都是使用FillChar来清零,但如果是超过几十M的数组如果FillChar来清零效率就很底,为此我专门写了一些清空数组或者内存的优化代码.
1.使用MMX指令优化的清零内存代码:
procedure ZeroMemoryMMX(Destination: Pointer; aSize: DWORD);
//EDX,ECX,ESI,EDI,EBP
asm
pushad
MOV EDI,Destination
MOV ECX, aSize
shr ecx, 3
lea edi, [edi+ecx*8]
neg ecx
PXOR MM0,MM0
PXOR MM1,MM1
PXOR MM2,MM2
PXOR MM3,MM3
PXOR MM4,MM4
PXOR MM5,MM5
PXOR MM6,MM6
PXOR MM7,MM7
@writeloop:
movntq qword [edi+ecx*8], mm0
movntq qword [edi+ecx*8+8], mm1
movntq qword [edi+ecx*8+16], mm2
movntq qword [edi+ecx*8+24], mm3
movntq qword [edi+ecx*8+32], mm4
movntq qword [edi+ecx*8+40], mm5
movntq qword [edi+ecx*8+48], mm6
movntq qword [edi+ecx*8+56], mm7
add ecx, 8
mov eax,ecx
neg eax
cmp eax,8
JLE @Comm
jnz @writeloop
jmp @end
@Comm:
@end:
sfence
emms
popad
end;
2.使用rep指令优化的清零内存代码:
procedure ZeroMemoryRep(Destination: Pointer; Length: DWORD); stdcall;
asm
pushad
xor eax, eax //写入数据
mov ecx, Length //获得长度
mov edi, Destination //目标指针
rep stosb
popad
end;
以上两种代码经过测试发现50M上数据使用MMX效率要高于使用rep,50M下推荐使用Rep.
procedure ZeroMemoryMMX(Destination: Pointer; aSize: DWORD);
//EDX,ECX,ESI,EDI,EBP
asm
pushad
MOV EDI,Destination
MOV ECX, aSize
shr ecx, 3
lea edi, [edi+ecx*8]
neg ecx
PXOR MM0,MM0
PXOR MM1,MM1
PXOR MM2,MM2
PXOR MM3,MM3
PXOR MM4,MM4
PXOR MM5,MM5
PXOR MM6,MM6
PXOR MM7,MM7
@writeloop:
movntq qword [edi+ecx*8], mm0
movntq qword [edi+ecx*8+8], mm1
movntq qword [edi+ecx*8+16], mm2
movntq qword [edi+ecx*8+24], mm3
movntq qword [edi+ecx*8+32], mm4
movntq qword [edi+ecx*8+40], mm5
movntq qword [edi+ecx*8+48], mm6
movntq qword [edi+ecx*8+56], mm7
add ecx, 8
mov eax,ecx
neg eax
cmp eax,8
JLE @Comm
jnz @writeloop
jmp @end
@Comm:
@end:
sfence
emms
popad
end;
2.使用rep指令优化的清零内存代码:
procedure ZeroMemoryRep(Destination: Pointer; Length: DWORD); stdcall;
asm
pushad
xor eax, eax //写入数据
mov ecx, Length //获得长度
mov edi, Destination //目标指针
rep stosb
popad
end;