均值模糊原理很容易了,就不解释了。
我测了一下,使用汇编的代码处理时间为1-2ms左右。
cpp代码处理时间为3-4ms左右。
这里只给出关键代码,其余代码见这里。
void asmMeanFilter(BitmapData *data) { UINT Height=data->Height; UINT s_Height=Height-1; UINT Width=data->Width; UINT s_Width=Width-1; Pix* p=(Pix*)data->Scan0; UINT h,w,i,j; __asm { push esi; push edi; pxor mm7,mm7; mov esi,[p]; mov edi,[p]; mov h,1; h_loop: mov w,1; w_loop: pxor mm1,mm1; mov ecx,h; dec ecx; mov i,ecx; i_loop: mov edx,w; dec edx; mov j,edx; j_loop: mov eax,i; mov ebx,Width; mul ebx; add eax,j; mov ebx,4; //每个像素占4字节 mul ebx; movd mm0,[esi+eax]; punpcklbw mm0,mm7; paddw mm1,mm0; inc j; mov edx,w; add edx,2; cmp edx,j; jnz j_loop; //end j loop inc i; mov ecx,h; add ecx,2; cmp ecx,i; jnz i_loop; //end i loop movq mm2,mm1; movq mm3,mm1; psrlw mm1,4; //寄存器中每个word右移四位 psrlw mm2,5; psrlw mm3,6; paddw mm1,mm2; paddw mm1,mm3; //这里本来是mm1每个word除9的,我用1/16+1/32+1/64模拟了 packuswb mm1,mm7; //将字缩并为字节 mov eax,h; mov ebx,Width; mul ebx; add eax,w; mov ebx,4; mul ebx; movd [edi+eax],mm1; mov [edi+eax+3],0xff; //图像alpha通道置255 inc w; mov ebx,w; cmp ebx,s_Width; jnz w_loop; //end w loop inc h; mov eax,h; cmp eax,s_Height; jnz h_loop; //end h loop pop edi; pop esi; emms; } } VOID cppMeanFilter(BitmapData *data) { Pix* p=(Pix*)data->Scan0; for(UINT h = 1; h < data->Height-1; ++h) { for(UINT w =1; w < data->Width-1; ++w) { unsigned int r,g,b; r=0;g=0;b=0; for (UINT i=h-1;i<=h+1;i++) { for (UINT j=w-1;j<=w+1;j++) { r=p[j+i*data->Width].Red+r; g=p[j+i*data->Width].Green+g; b=p[j+i*data->Width].Blue+b; } } p[w+h*data->Width].Red=r/9; p[w+h*data->Width].Green=g/9; p[w+h*data->Width].Blue=b/9;; } } }
处理效果: