一、来源
来源:《PC平台新技术MMX(上册):开发编程指南》第8章 MMX编码技术
书籍信息——
http://e.360buy.com/30027396.html
PC平台新技术MMX(上册):开发编程指南
作 者: 吴乐南 编
出 版 社: 东南大学出版社
ISBN:9787810502528
出版时间:1997-10-01
页 数:149
字 数:237000
所属分类:
电子书 > 计算机与互联网 > 编程语言与程序设计
电子书 > 计算机与互联网 > 计算机工具书
二、整理后的代码
代码——
#include <Windows.h> #include <stdlib.h> #include <stdio.h> #include <time.h> #include <conio.h> #include <assert.h> // MMX, SSE, SSE2 #include <emmintrin.h> // 紧缩无符号字 解包为 两组紧缩无符号双字 // 章节:8.1 数据拆封/8.1.1 无符号数拆封 // // result: 两个零扩展的32位双字,来自源的两个低端字。 // mm1_dst_hi: 两个零扩展的32位双字,来自源的两个高端字。 // mm0_src: 源值(紧缩16位无符号数)。 inline __m64 md_unpack_mud4muw(__m64& mm1_dst_hi, const __m64 mm0_src) { __m64 muwZero = _mm_setzero_si64(); // [MMX]赋值为0 mm1_dst_hi = _mm_unpackhi_pi16(mm0_src, muwZero); // 把两个高端字拆封到两个32位双字中。[MMX]高位解包.字到双字 return _mm_unpacklo_pi16(mm0_src, muwZero); // 把两个低端字拆封到两个32位双字中。[MMX]低位解包.字到双字 } // 紧缩带符号字 解包为 两组紧缩带符号双字 // 章节:8.1 数据拆封/8.1.2 带符号数拆封 // // result: 两个符号扩展的32位双字,来自源的两个低端字。 // mm1_dst_hi: 两个符号扩展的32位双字,来自源的两个高端字。 // mm0_src: 源值(紧缩16位带符号数)。 inline __m64 md_unpack_mid4miw(__m64& mm1_dst_hi, const __m64 mm0_src) { // 注:其实并不需要读取mm1_dst_hi,但为了符合语法,只能这样写。 mm1_dst_hi = _mm_srai_pi32(_mm_unpackhi_pi16(mm1_dst_hi, mm0_src), 16); // 把源数据的两个高端字拆分到 第1字与第3字(即两个紧缩双字的高16位),再紧缩双字算术右移16位。使源数据的两个高端字扩展为2个32位带符号双字。 return _mm_srai_pi32(_mm_unpacklo_pi16(mm0_src, mm0_src), 16); // 把源数据的两个低端字拆分到 第1字与第3字(即两个紧缩双字的高16位),再紧缩双字算术右移16位。使源数据的两个低端字扩展为2个32位带符号双字。 } // 两组紧缩带符号双字 交叉饱和紧缩为 紧缩带符号字 // 章节:8.2 数据紧缩/8.2.1 带饱和的交叉紧缩 // 例如:将 {[B1,B0], [A1,A0]} 交叉紧缩为 {[B1',A1',B0',A0']} // 注:紧缩(_mm_packs_pi32)是将 {[B1,B0], [A1,A0]} 转为 {[B1',B0',A1',A0']} // // result: 紧缩16位带符号数。第0字和第2字来自mm0_lo的带符号饱和双字,第1字和第3字来自mm1_hi的带符号饱和双字。 // mm0_lo: 低位源值(A)。 // mm1_hi: 高位源值(B)。 inline __m64 md_pack_s_cross_miw4mid(__m64 mm0_lo, __m64 mm1_hi) { mm1_hi = _mm_packs_pi32(mm1_hi, mm1_hi); // 紧缩并且符号饱和。即变为[B1',B0',B1',B0']。[MMX]饱和打包.双字到字 mm0_lo = _mm_packs_pi32(mm0_lo, mm0_lo); // 紧缩并且符号饱和。即变为[A1',A0',A1',A0']。 return _mm_unpacklo_pi16(mm0_lo, mm1_hi); // 交叉操作数的低16位。[MMX]低位解包.字到双字 } // 两组紧缩无符号双字 交叉环绕紧缩为 紧缩无符号字 // 章节:8.2 数据紧缩/8.2.2 不带饱和的交叉紧缩 // 例如:将 {[B1,B0], [A1,A0]} 交叉紧缩为 {[B1',A1',B0',A0']} // // result: 紧缩16位无符号数。第0字和第2字来自mm0_lo的无符号双字,第1字和第3字来自mm1_hi的无符号双字。 // mm0_lo: 低位源值(A)。 // mm1_hi: 高位源值(B)。 inline __m64 md_pack_w_cross_muw4mud(__m64 mm0_lo, __m64 mm1_hi) { mm1_hi = _mm_slli_pi32(mm1_hi, 16); // 将每个双字的低16位左移至高16位 mm0_lo = _mm_and_si64(mm0_lo, _mm_set_pi16(0, (short)0xFFFF, 0, (short)0xFFFF)); // 用0屏蔽每个双字的最高16位 return _mm_or_si64(mm0_lo, mm1_hi); // 合并两个操作数 } // 2x2矩阵转置.紧缩双字 // 章节:8.3 非交叉拆分 // 例如:将2x2矩阵 [[A1,A0] [B1,B0]] 转置为 [[B0,A0] [B1,A1]]。 // // [A1 A0] [B0 A0] // [B1 B0] -> [B1 A1] // msb<-lsb // // mm0_row0: 2x2矩阵的第0行(A)。 // mm1_row1: 2x2矩阵的第1行(B)。 inline void md_matrix_transpose_2x2_mmd(__m64& mm0_row0, __m64& mm1_row1) { __m64 tmp = mm0_row0; // 备份第0行 mm0_row0 = _mm_unpacklo_pi32(mm0_row0, mm1_row1); // 高32位为mm1_row1的低32位(B0),低32位为源mm0_row0的低32位(A0)。[MMX]低位解包.双字到四字 mm1_row1 = _mm_unpackhi_pi32(tmp , mm1_row1); // 高32位为mm1_row1的高32位(B1),低32位为源mm0_row0的高32位(A1)。[MMX]高位解包.双字到四字 } // 复数与常量相乘(紧缩字->紧缩双字) // 章节:8.4 复数与常量相乘 // // result: 复数乘法的结果,高32位是实部,低32位是虚部。 // mm0_src: 被乘数([?,?,Dr,Di])。 // mm1_c: 已调整好顺序的常量乘数([Cr,-Ci,Ci,Cr])。 inline __m64 md_complex_mul_c_mid4miw(__m64 mm0_src, const __m64 mm1_c) { mm0_src = _mm_unpacklo_pi32(mm0_src, mm0_src); // 产生 [Dr,Di,Dr,Di]。[MMX]低位解包.双字到四字 return _mm_madd_pi16(mm0_src, mm1_c); // 操作结果是 [(Dr*Cr-Di*Ci), (Dr*Ci+Di*Cr)]。[MMX]乘后二加.带符号16位至带符号32位 } // 无符号紧缩字节的绝对差 // 章节:8.5 数的绝对差\8.5.1 无符号数的绝对差 // // result: 无符号紧缩字节的绝对差。伪代码——result[i]=abs(mm0[i] - mm1[i])。 // mm0: 源操作数A。 // mm1: 源操作数B。 inline __m64 md_absolute_deviation_mub(const __m64 mm0, const __m64 mm1) { return _mm_or_si64(_mm_subs_pu8(mm0, mm1), _mm_subs_pu8(mm1, mm0)); // 1. "_mm_subs_pu8(mm0, mm1)": 计算差值 // 2. "_mm_subs_pu8(mm1, mm0)": 以另一种途径计算差值 // 3. "_mm_or_si64(..., ...)": 合并结果 } // 带符号紧缩字的绝对差 // 章节:8.5 数的绝对差\8.5.2 带符号数的绝对差 // // result: 带符号紧缩字的绝对差。伪代码——result[i]=abs(mm0[i] - mm1[i])。 // mm0: 源操作数A。 // mm1: 源操作数B。 inline __m64 md_absolute_deviation_miw(const __m64 mm0, const __m64 mm1) { __m64 miwMaskGt = _mm_cmpgt_pi16(mm0, mm1); // 产生 A>B 的屏蔽值 __m64 miwXor = _mm_and_si64(_mm_xor_si64(mm0, mm1), miwMaskGt); // 产生交换屏蔽值(仅在A>B时的XOR(A,B)值)。即当A>B时,该字是XOR(A,B);而A<=B时,该字是是0。 __m64 miwMin = _mm_xor_si64(mm0, miwXor); // 当A>B时就用xor交换,产生最小值 __m64 miwMax = _mm_xor_si64(mm1, miwXor); // 当B<=A时就用xor交换,产生最大值 return _mm_sub_pi16(miwMax, miwMin); // 绝对差 = 最大值 - 最小值 } // 带符号紧缩字的绝对值 // 章节:8.6 绝对值 // // result: 带符号紧缩字的绝对值。伪代码——result[i]=abs(mm0[i])。 // mm0: 源操作数。 inline __m64 md_abs_miw(const __m64 mm0) { __m64 miwSign = _mm_srai_pi16(mm0, 15); // 将符号位转为掩码。使每个字为全0(对于非负数)或全1(对于负数)。注:补码下的“全1”代表数值“-1”,减法碰到“-1”就形成了“加一”。 return _mm_subs_pi16(_mm_xor_si64(mm0, miwSign), miwSign); // 为了获得绝对值,仅对负数求相反数。补码求相反数规则——原码取反再加一。 } // 将带符号紧缩字限制在[iLow,iHigh]区间 // 章节:8.7 数值的截取/8.7.1 对任意有符号数范围截取符号数/[0] // // result: 限制后的带符号紧缩字。伪代码——result[i]=(mm0[i]<iLow)?iLow:( (mm0[i]>iHigh)?iHigh:mm0[i] )。 // mm0: 源操作数。 inline __m64 md_clamp_miw(const __m64 mm0, short iLow, short iHigh) { const __m64 miwMinInt16 = _mm_set1_pi16((short)0x8000); // 带符号16位的最小值 __m64 tmp = _mm_add_pi16(mm0, miwMinInt16); // 利用环绕加法,将带符号数 偏移至 无符号数的空间。 tmp = _mm_adds_pu16(tmp, _mm_set1_pi16( (short)(0xFFFF-(iHigh+0x8000)) )); // 限制最高值 tmp = _mm_subs_pu16(tmp, _mm_set1_pi16( (short)(0xFFFF-(iHigh+0x8000)+(iLow+0x8000)) )); // 限制最低值 return _mm_add_pi16(tmp, _mm_set1_pi16( iLow )); // 恢复偏移 } // 将无符号紧缩字限制在[uLow,uHigh]区间 // 章节:8.7 数值的截取/8.7.2 对任意有符号数范围截取符号数 // // result: 限制后的带符号紧缩字。伪代码——result[i]=(mm0[i]<uLow)?uLow:( (mm0[i]>uHigh)?uHigh:mm0[i] )。 // mm0: 源操作数。 inline __m64 md_clamp_muw(const __m64 mm0, unsigned short uLow, unsigned short uHigh) { __m64 tmp = _mm_adds_pu16(mm0, _mm_set1_pi16( (short)(0xFFFFU-uHigh) )); // 限制最高值 tmp = _mm_subs_pu16(tmp, _mm_set1_pi16( (short)(0xFFFFU-uHigh+uLow) )); // 限制最低值 return _mm_add_pi16(tmp, _mm_set1_pi16( uLow )); // 恢复偏移 } // 返回常数:0 // 章节:8.8 生成常量/[0]在MM0产生0寄存器 inline __m64 md_setzero_mmq() { __m64 tmp=_mm_setzero_si64(); // 其实并不需要赋值,但为了符合语法,只能这样写。 return _mm_xor_si64(tmp, tmp); // 其实Intrinsics函数中有这样的函数—— // return _mm_setzero_si64(); } // 返回常数:全1 // 章节:8.8 生成常量/[1]在寄存器MM1中置全1,它在每一个紧缩数据类型的值域中都是-1 inline __m64 md_setfull_mmq() { __m64 tmp=_mm_setzero_si64(); // 其实并不需要赋值,但为了符合语法,只能这样写。 return _mm_cmpeq_pi8(tmp, tmp); } // 返回常数:每个紧缩字节为1 // 章节:8.8 生成常量/[2]在每一个紧缩字节[或紧缩字](或紧缩双字)的值域中产生常数1 inline __m64 md_set_1_mib() { __m64 mibZero = _mm_setzero_si64(); __m64 mibNegativeOne = _mm_cmpeq_pi8(mibZero, mibZero); return _mm_sub_pi8(mibZero, mibNegativeOne); } // 返回常数:每个紧缩字为pow(2,n)-1 // 章节:8.8 生成常量/[3]在每一个紧缩字(或紧缩双字)的值域中产生带符号常数pow(2,n)-1 inline __m64 md_set_pow2n_sub1_miw(int n) { assert((n>=1) && (n<=16)); __m64 mibZero = _mm_setzero_si64(); __m64 mibFull = _mm_cmpeq_pi8(mibZero, mibZero); return _mm_srli_pi16(mibFull, 16-n); } // 返回常数:每个紧缩字为-pow(2,n) // 章节:8.8 生成常量/[4]在每一个紧缩字(或紧缩双字)的值域中产生带符号常数-pow(2,n) inline __m64 md_set_neg_pow2n_miw(int n) { assert((n>=0) && (n<=15)); __m64 mibZero = _mm_setzero_si64(); __m64 mibFull = _mm_cmpeq_pi8(mibZero, mibZero); return _mm_slli_pi16(mibFull, n); } // 验证 void doTest(int cnt) { __m64 t0,t1,t2; int i; // 紧缩无符号字 解包为 两组紧缩无符号双字 printf("md_unpack_mud4muw:\n"); t0 = _mm_set_pi32(0x01234567, 0x89ABCDEF); printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); for(i=0; i<cnt; ++i) { t2 = md_unpack_mud4muw(t1, t0); } printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); printf("\n"); // 紧缩带符号字 解包为 两组紧缩带符号双字 printf("md_unpack_mid4miw:\n"); t0 = _mm_set_pi32(0x01234567, 0x89ABCDEF); printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); for(i=0; i<cnt; ++i) { t2 = md_unpack_mid4miw(t1, t0); } printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); printf("\n"); // 两组紧缩带符号双字 交叉饱和紧缩为 紧缩带符号字 printf("md_pack_s_cross_miw4mid:\n"); t1 = _mm_set_pi32(0x00001111, 0x000F2222); t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { t0 = md_pack_s_cross_miw4mid(t2, t1); } printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); printf("\n"); // 两组紧缩无符号双字 交叉环绕紧缩为 紧缩无符号字 printf("md_pack_w_cross_muw4mud:\n"); t1 = _mm_set_pi32(0x00001111, 0x000F2222); t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { t0 = md_pack_w_cross_muw4mud(t2, t1); } printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); printf("\n"); // 2x2矩阵转置.紧缩双字 printf("md_matrix_transpose_2x2_mmd:\n"); t1 = _mm_set_pi32(0x00001111, 0x000F2222); t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { md_matrix_transpose_2x2_mmd(t1, t2); } printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); printf("\n"); // 复数与常量相乘(紧缩字->紧缩双字) printf("md_complex_mul_c_mid4miw:\n"); t1 = _mm_set_pi16(0,0, 1, 1); // 1+i t2 = _mm_set_pi16(3,-2, 2,3); // 3+2i. (1+i)*(3+2i) = 1+5i printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { t0 = md_complex_mul_c_mid4miw(t1, t2); } printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); printf("\n"); // 无符号紧缩字节的绝对差 printf("md_absolute_deviation_mub:\n"); t1 = _mm_set_pi8(1,2,3,4,5,6,7,8); t2 = _mm_set_pi8(8,7,6,5,4,3,2,1); printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { t0 = md_absolute_deviation_mub(t1, t2); } printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); printf("\n"); // 带符号紧缩字的绝对差 printf("md_absolute_deviation_miw:\n"); t1 = _mm_set_pi16(-1, 1, 3, 5); t2 = _mm_set_pi16( 2, 2, 2, 2); printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); for(i=0; i<cnt; ++i) { t0 = md_absolute_deviation_miw(t1, t2); } printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); printf("\n"); // 带符号紧缩字的绝对值 printf("md_abs_miw4miw:\n"); t0 = _mm_set_pi16(-1, 1, 3, -5); printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); for(i=0; i<cnt; ++i) { t1 = md_abs_miw(t0); } printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); printf("\n"); // 将带符号紧缩字限制在[iLow,iHigh]区间 printf("md_clamp_miw:\n"); t0 = _mm_set_pi16(-15, 1, 254, 257); printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); for(i=0; i<cnt; ++i) { t1 = md_clamp_miw(t0, -1, 255); } printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); printf("\n"); // 将无符号紧缩字限制在[uLow,uHigh]区间 printf("md_clamp_muw:\n"); t0 = _mm_set_pi16(1, 254, 257, 32769U); printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); for(i=0; i<cnt; ++i) { t1 = md_clamp_muw(t0, 16, 255); } printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); printf("\n"); // 返回常数:0 printf("md_setzero_mmq:\t"); t0 = md_setzero_mmq(); printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); // 返回常数:全1 printf("md_setfull_mmq:\t"); t0 = md_setfull_mmq(); printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); // 返回常数:每个紧缩字节为1 printf("md_set_1_mib:\t"); t0 = md_set_1_mib(); printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); // 返回常数:每个紧缩字为pow(2,n)-1 printf("md_set_pow2n_sub1_miw:\t"); t0 = md_set_pow2n_sub1_miw(8); printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); // 返回常数:每个紧缩字为pow(2,n)-1 printf("md_set_neg_pow2n_miw:\t"); t0 = md_set_neg_pow2n_miw(15); printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); } int main(int argc, char* argv[]) { doTest((rand()&1) + 1); // 用一个随机数作为循环次数,避免编译器优化循环 return 0; }
三、编译器生成的汇编代码
VC6编译器生成的汇编代码——
; Listing generated by Microsoft (R) Optimizing Compiler Version 12.00.9044.0 TITLE E:\zylKanbox\Doc\Program\ASM\x86\SIMD\my\md\md01_mmxguide_ch08\md01_mmxguide_ch08.cpp .386P include listing.inc if @Version gt 510 .model FLAT else _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS CONST SEGMENT DWORD USE32 PUBLIC 'CONST' CONST ENDS _BSS SEGMENT DWORD USE32 PUBLIC 'BSS' _BSS ENDS $$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' $$SYMBOLS ENDS _TLS SEGMENT DWORD USE32 PUBLIC 'TLS' _TLS ENDS ; COMDAT ??_C@_0BE@EFJN@md_unpack_mud4muw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_01BJG@?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BE@EMED@md_unpack_mid4miw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BK@MCMC@md_pack_s_cross_miw4mid?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BK@BLJL@md_pack_w_cross_muw4mud?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BO@GMLJ@md_matrix_transpose_2x2_mmd?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BL@FJHD@md_complex_mul_c_mid4miw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BM@HKHJ@md_absolute_deviation_mub?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BM@KLKG@md_absolute_deviation_miw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BB@KOFH@md_abs_miw4miw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0P@DEEP@md_clamp_miw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0P@NOLG@md_clamp_muw?3?6?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BB@BLNI@md_setzero_mmq?3?7?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BB@ICKB@md_setfull_mmq?3?7?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0P@NKIN@md_set_1_mib?3?7?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BI@DPAE@md_set_pow2n_sub1_miw?3?7?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??_C@_0BH@NLNM@md_set_neg_pow2n_miw?3?7?$AA@ _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' _DATA ENDS ; COMDAT ??8@YAHABU_GUID@@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT __mm_cvtpi16_ps _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT __mm_cvtpu16_ps _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT __mm_cvtps_pi16 _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_unpack_mud4muw@@YA?AT__m64@@AAT1@T1@@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_unpack_mid4miw@@YA?AT__m64@@AAT1@T1@@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_pack_s_cross_miw4mid@@YA?AT__m64@@T1@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_pack_w_cross_muw4mud@@YA?AT__m64@@T1@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_matrix_transpose_2x2_mmd@@YAXAAT__m64@@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_complex_mul_c_mid4miw@@YA?AT__m64@@T1@T1@@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_absolute_deviation_mub@@YA?AT__m64@@T1@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_absolute_deviation_miw@@YA?AT__m64@@T1@0@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_abs_miw@@YA?AT__m64@@T1@@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_clamp_miw@@YA?AT__m64@@T1@FF@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_clamp_muw@@YA?AT__m64@@T1@GG@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_setzero_mmq@@YA?AT__m64@@XZ _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_setfull_mmq@@YA?AT__m64@@XZ _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_set_1_mib@@YA?AT__m64@@XZ _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_set_pow2n_sub1_miw@@YA?AT__m64@@H@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?md_set_neg_pow2n_miw@@YA?AT__m64@@H@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT ?doTest@@YAXH@Z _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS ; COMDAT _main _TEXT SEGMENT PARA USE32 PUBLIC 'CODE' _TEXT ENDS FLAT GROUP _DATA, CONST, _BSS ASSUME CS: FLAT, DS: FLAT, SS: FLAT endif INCLUDELIB LIBC INCLUDELIB OLDNAMES PUBLIC ?doTest@@YAXH@Z ; doTest PUBLIC ??_C@_0BE@EFJN@md_unpack_mud4muw?3?6?$AA@ ; `string' PUBLIC ??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' PUBLIC ??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' PUBLIC ??_C@_01BJG@?6?$AA@ ; `string' PUBLIC ??_C@_0BE@EMED@md_unpack_mid4miw?3?6?$AA@ ; `string' PUBLIC ??_C@_0BK@MCMC@md_pack_s_cross_miw4mid?3?6?$AA@ ; `string' PUBLIC ??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' PUBLIC ??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' PUBLIC ??_C@_0BK@BLJL@md_pack_w_cross_muw4mud?3?6?$AA@ ; `string' PUBLIC ??_C@_0BO@GMLJ@md_matrix_transpose_2x2_mmd?3?6?$AA@ ; `string' PUBLIC ??_C@_0BL@FJHD@md_complex_mul_c_mid4miw?3?6?$AA@ ; `string' PUBLIC ??_C@_0BM@HKHJ@md_absolute_deviation_mub?3?6?$AA@ ; `string' PUBLIC ??_C@_0BM@KLKG@md_absolute_deviation_miw?3?6?$AA@ ; `string' PUBLIC ??_C@_0BB@KOFH@md_abs_miw4miw?3?6?$AA@ ; `string' PUBLIC ??_C@_0P@DEEP@md_clamp_miw?3?6?$AA@ ; `string' PUBLIC ??_C@_0P@NOLG@md_clamp_muw?3?6?$AA@ ; `string' PUBLIC ??_C@_0BB@BLNI@md_setzero_mmq?3?7?$AA@ ; `string' PUBLIC ??_C@_0BB@ICKB@md_setfull_mmq?3?7?$AA@ ; `string' PUBLIC ??_C@_0P@NKIN@md_set_1_mib?3?7?$AA@ ; `string' PUBLIC ??_C@_0BI@DPAE@md_set_pow2n_sub1_miw?3?7?$AA@ ; `string' PUBLIC ??_C@_0BH@NLNM@md_set_neg_pow2n_miw?3?7?$AA@ ; `string' EXTRN _printf:NEAR ; COMDAT ??_C@_0BE@EFJN@md_unpack_mud4muw?3?6?$AA@ ; File E:\zylKanbox\Doc\Program\ASM\x86\SIMD\my\md\md01_mmxguide_ch08\md01_mmxguide_ch08.cpp _DATA SEGMENT ??_C@_0BE@EFJN@md_unpack_mud4muw?3?6?$AA@ DB 'md_unpack_mud4muw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ _DATA SEGMENT ??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ DB '[%.8X%.8X] -> ', 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ _DATA SEGMENT ??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ DB '[' DB '%.8X%.8X],[%.8X%.8X]', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_01BJG@?6?$AA@ _DATA SEGMENT ??_C@_01BJG@?6?$AA@ DB 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BE@EMED@md_unpack_mid4miw?3?6?$AA@ _DATA SEGMENT ??_C@_0BE@EMED@md_unpack_mid4miw?3?6?$AA@ DB 'md_unpack_mid4miw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BK@MCMC@md_pack_s_cross_miw4mid?3?6?$AA@ _DATA SEGMENT ??_C@_0BK@MCMC@md_pack_s_cross_miw4mid?3?6?$AA@ DB 'md_pack_s_cross_miw4m' DB 'id:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ _DATA SEGMENT ??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ DB '[' DB '%.8X%.8X],[%.8X%.8X] -> ', 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ _DATA SEGMENT ??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ DB '[%.8X%.8X]', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BK@BLJL@md_pack_w_cross_muw4mud?3?6?$AA@ _DATA SEGMENT ??_C@_0BK@BLJL@md_pack_w_cross_muw4mud?3?6?$AA@ DB 'md_pack_w_cross_muw4m' DB 'ud:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BO@GMLJ@md_matrix_transpose_2x2_mmd?3?6?$AA@ _DATA SEGMENT ??_C@_0BO@GMLJ@md_matrix_transpose_2x2_mmd?3?6?$AA@ DB 'md_matrix_transpo' DB 'se_2x2_mmd:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BL@FJHD@md_complex_mul_c_mid4miw?3?6?$AA@ _DATA SEGMENT ??_C@_0BL@FJHD@md_complex_mul_c_mid4miw?3?6?$AA@ DB 'md_complex_mul_c_mid' DB '4miw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BM@HKHJ@md_absolute_deviation_mub?3?6?$AA@ _DATA SEGMENT ??_C@_0BM@HKHJ@md_absolute_deviation_mub?3?6?$AA@ DB 'md_absolute_deviati' DB 'on_mub:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BM@KLKG@md_absolute_deviation_miw?3?6?$AA@ _DATA SEGMENT ??_C@_0BM@KLKG@md_absolute_deviation_miw?3?6?$AA@ DB 'md_absolute_deviati' DB 'on_miw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BB@KOFH@md_abs_miw4miw?3?6?$AA@ _DATA SEGMENT ??_C@_0BB@KOFH@md_abs_miw4miw?3?6?$AA@ DB 'md_abs_miw4miw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0P@DEEP@md_clamp_miw?3?6?$AA@ _DATA SEGMENT ??_C@_0P@DEEP@md_clamp_miw?3?6?$AA@ DB 'md_clamp_miw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0P@NOLG@md_clamp_muw?3?6?$AA@ _DATA SEGMENT ??_C@_0P@NOLG@md_clamp_muw?3?6?$AA@ DB 'md_clamp_muw:', 0aH, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BB@BLNI@md_setzero_mmq?3?7?$AA@ _DATA SEGMENT ??_C@_0BB@BLNI@md_setzero_mmq?3?7?$AA@ DB 'md_setzero_mmq:', 09H, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BB@ICKB@md_setfull_mmq?3?7?$AA@ _DATA SEGMENT ??_C@_0BB@ICKB@md_setfull_mmq?3?7?$AA@ DB 'md_setfull_mmq:', 09H, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0P@NKIN@md_set_1_mib?3?7?$AA@ _DATA SEGMENT ??_C@_0P@NKIN@md_set_1_mib?3?7?$AA@ DB 'md_set_1_mib:', 09H, 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BI@DPAE@md_set_pow2n_sub1_miw?3?7?$AA@ _DATA SEGMENT ??_C@_0BI@DPAE@md_set_pow2n_sub1_miw?3?7?$AA@ DB 'md_set_pow2n_sub1_miw:', 09H DB 00H ; `string' _DATA ENDS ; COMDAT ??_C@_0BH@NLNM@md_set_neg_pow2n_miw?3?7?$AA@ _DATA SEGMENT ??_C@_0BH@NLNM@md_set_neg_pow2n_miw?3?7?$AA@ DB 'md_set_neg_pow2n_miw:', 09H DB 00H ; `string' ; Function compile flags: /Ogty _DATA ENDS ; COMDAT ?doTest@@YAXH@Z _TEXT SEGMENT _cnt$ = 8 _t0$ = -40 _t1$ = -32 _t2$ = -24 ?doTest@@YAXH@Z PROC NEAR ; doTest, COMDAT ; 232 : { push ebp mov ebp, esp and esp, -8 ; fffffff8H sub esp, 40 ; 00000028H push esi push edi ; 233 : __m64 t0,t1,t2; ; 234 : int i; ; 235 : ; 236 : // 紧缩无符号字 解包为 两组紧缩无符号双字 ; 237 : printf("md_unpack_mud4muw:\n"); push OFFSET FLAT:??_C@_0BE@EFJN@md_unpack_mud4muw?3?6?$AA@ ; `string' call _printf ; 238 : t0 = _mm_set_pi32(0x01234567, 0x89ABCDEF); mov DWORD PTR -24+[esp+52], -1985229329 ; 89abcdefH mov DWORD PTR -24+[esp+56], 19088743 ; 01234567H movq mm0, MMWORD PTR -24+[esp+52] movq MMWORD PTR -8+[esp+52], mm0 movq MMWORD PTR _t0$[esp+52], mm0 ; 239 : printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+52] mov ecx, DWORD PTR _t0$[esp+56] push eax push ecx push OFFSET FLAT:??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf ; 240 : for(i=0; i<cnt; ++i) mov esi, DWORD PTR _cnt$[ebp] xor edi, edi add esp, 16 ; 00000010H cmp esi, edi jle SHORT $L43808 ; 241 : { ; 242 : t2 = md_unpack_mud4muw(t1, t0); movq mm1, MMWORD PTR _t0$[esp+48] pxor mm0, mm0 movq mm2, mm0 movq mm3, mm1 punpckhwd mm3, mm2 movq MMWORD PTR _t1$[esp+48], mm3 punpcklwd mm1, mm0 movq MMWORD PTR _t2$[esp+48], mm1 $L43808: ; 243 : } ; 244 : printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov edx, DWORD PTR _t2$[esp+48] mov eax, DWORD PTR _t2$[esp+52] mov ecx, DWORD PTR _t1$[esp+48] push edx mov edx, DWORD PTR _t1$[esp+56] push eax push ecx push edx push OFFSET FLAT:??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 245 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 246 : ; 247 : // 紧缩带符号字 解包为 两组紧缩带符号双字 ; 248 : printf("md_unpack_mid4miw:\n"); push OFFSET FLAT:??_C@_0BE@EMED@md_unpack_mid4miw?3?6?$AA@ ; `string' call _printf ; 249 : t0 = _mm_set_pi32(0x01234567, 0x89ABCDEF); movq mm0, MMWORD PTR -8+[esp+76] movq MMWORD PTR _t0$[esp+76], mm0 ; 250 : printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+76] mov ecx, DWORD PTR _t0$[esp+80] push eax push ecx push OFFSET FLAT:??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 40 ; 00000028H ; 251 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43818 ; 252 : { ; 253 : t2 = md_unpack_mid4miw(t1, t0); movq mm0, MMWORD PTR _t0$[esp+48] movq mm1, mm0 movq mm2, mm0 mov eax, esi punpcklwd mm2, mm1 psrad mm2, 16 ; 00000010H movq MMWORD PTR _t2$[esp+48], mm2 $L43816: movq mm1, mm0 dec eax movq mm2, MMWORD PTR _t1$[esp+48] punpckhwd mm2, mm1 psrad mm2, 16 ; 00000010H movq MMWORD PTR _t1$[esp+48], mm2 jne SHORT $L43816 $L43818: ; 254 : } ; 255 : printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov edx, DWORD PTR _t2$[esp+48] mov eax, DWORD PTR _t2$[esp+52] mov ecx, DWORD PTR _t1$[esp+48] push edx mov edx, DWORD PTR _t1$[esp+56] push eax push ecx push edx push OFFSET FLAT:??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 256 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 257 : ; 258 : // 两组紧缩带符号双字 交叉饱和紧缩为 紧缩带符号字 ; 259 : printf("md_pack_s_cross_miw4mid:\n"); push OFFSET FLAT:??_C@_0BK@MCMC@md_pack_s_cross_miw4mid?3?6?$AA@ ; `string' call _printf ; 260 : t1 = _mm_set_pi32(0x00001111, 0x000F2222); mov DWORD PTR -24+[esp+76], 991778 ; 000f2222H mov DWORD PTR -24+[esp+80], 4369 ; 00001111H movq mm0, MMWORD PTR -24+[esp+76] ; 261 : t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); mov DWORD PTR -24+[esp+76], -8739 ; ffffddddH movq MMWORD PTR _t1$[esp+76], mm0 mov DWORD PTR -24+[esp+80], -13108 ; ffffccccH ; 262 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov edx, DWORD PTR _t1$[esp+76] movq MMWORD PTR -16+[esp+76], mm0 movq mm0, MMWORD PTR -24+[esp+76] movq MMWORD PTR -8+[esp+76], mm0 movq MMWORD PTR _t2$[esp+76], mm0 mov eax, DWORD PTR _t2$[esp+76] mov ecx, DWORD PTR _t2$[esp+80] push eax mov eax, DWORD PTR _t1$[esp+84] push ecx push edx push eax push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 48 ; 00000030H ; 263 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43824 ; 264 : { ; 265 : t0 = md_pack_s_cross_miw4mid(t2, t1); movq mm0, MMWORD PTR _t1$[esp+48] movq mm1, mm0 packssdw mm0, mm1 movq mm1, mm0 movq mm0, MMWORD PTR _t2$[esp+48] movq mm2, mm0 packssdw mm0, mm2 punpcklwd mm0, mm1 movq MMWORD PTR _t0$[esp+48], mm0 $L43824: ; 266 : } ; 267 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov ecx, DWORD PTR _t0$[esp+48] mov edx, DWORD PTR _t0$[esp+52] push ecx push edx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 268 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 269 : ; 270 : // 两组紧缩无符号双字 交叉环绕紧缩为 紧缩无符号字 ; 271 : printf("md_pack_w_cross_muw4mud:\n"); push OFFSET FLAT:??_C@_0BK@BLJL@md_pack_w_cross_muw4mud?3?6?$AA@ ; `string' call _printf ; 272 : t1 = _mm_set_pi32(0x00001111, 0x000F2222); movq mm0, MMWORD PTR -16+[esp+68] movq MMWORD PTR _t1$[esp+68], mm0 ; 273 : t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); movq mm0, MMWORD PTR -8+[esp+68] ; 274 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov edx, DWORD PTR _t1$[esp+68] movq MMWORD PTR _t2$[esp+68], mm0 mov eax, DWORD PTR _t2$[esp+68] mov ecx, DWORD PTR _t2$[esp+72] push eax mov eax, DWORD PTR _t1$[esp+76] push ecx push edx push eax push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 40 ; 00000028H ; 275 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43832 ; 276 : { ; 277 : t0 = md_pack_w_cross_muw4mud(t2, t1); movq mm0, MMWORD PTR _t1$[esp+48] or eax, -1 pslld mm0, 16 ; 00000010H mov WORD PTR -32+[esp+48], ax mov WORD PTR -32+[esp+50], di mov WORD PTR -32+[esp+52], ax mov WORD PTR -32+[esp+54], di movq mm1, MMWORD PTR -32+[esp+48] movq mm2, MMWORD PTR _t2$[esp+48] pand mm2, mm1 por mm2, mm0 movq MMWORD PTR _t0$[esp+48], mm2 $L43832: ; 278 : } ; 279 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov ecx, DWORD PTR _t0$[esp+48] mov edx, DWORD PTR _t0$[esp+52] push ecx push edx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 280 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 281 : ; 282 : // 2x2矩阵转置.紧缩双字 ; 283 : printf("md_matrix_transpose_2x2_mmd:\n"); push OFFSET FLAT:??_C@_0BO@GMLJ@md_matrix_transpose_2x2_mmd?3?6?$AA@ ; `string' call _printf ; 284 : t1 = _mm_set_pi32(0x00001111, 0x000F2222); movq mm0, MMWORD PTR -16+[esp+68] movq MMWORD PTR _t1$[esp+68], mm0 ; 285 : t2 = _mm_set_pi32(0xFFFFCCCC, 0xFFFFDDDD); movq mm0, MMWORD PTR -8+[esp+68] ; 286 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov edx, DWORD PTR _t1$[esp+68] movq MMWORD PTR _t2$[esp+68], mm0 mov eax, DWORD PTR _t2$[esp+68] mov ecx, DWORD PTR _t2$[esp+72] push eax mov eax, DWORD PTR _t1$[esp+76] push ecx push edx push eax push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 40 ; 00000028H ; 287 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43841 ; 278 : } ; 279 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, esi $L43839: ; 288 : { ; 289 : md_matrix_transpose_2x2_mmd(t1, t2); movq mm1, MMWORD PTR _t2$[esp+48] movq mm0, MMWORD PTR _t1$[esp+48] movq mm2, mm1 dec eax movq mm3, mm0 punpckldq mm3, mm2 movq MMWORD PTR _t1$[esp+48], mm3 punpckhdq mm0, mm1 movq MMWORD PTR _t2$[esp+48], mm0 jne SHORT $L43839 $L43841: ; 290 : } ; 291 : printf("[%.8X%.8X],[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov ecx, DWORD PTR _t2$[esp+48] mov edx, DWORD PTR _t2$[esp+52] mov eax, DWORD PTR _t1$[esp+48] push ecx mov ecx, DWORD PTR _t1$[esp+56] push edx push eax push ecx push OFFSET FLAT:??_C@_0BH@CPNE@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 292 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 293 : ; 294 : // 复数与常量相乘(紧缩字->紧缩双字) ; 295 : printf("md_complex_mul_c_mid4miw:\n"); push OFFSET FLAT:??_C@_0BL@FJHD@md_complex_mul_c_mid4miw?3?6?$AA@ ; `string' call _printf ; 296 : t1 = _mm_set_pi16(0,0, 1, 1); // 1+i mov eax, 1 mov WORD PTR -24+[esp+80], di mov WORD PTR -24+[esp+76], ax mov WORD PTR -24+[esp+78], ax mov WORD PTR -24+[esp+82], di ; 297 : t2 = _mm_set_pi16(3,-2, 2,3); // 3+2i. (1+i)*(3+2i) = 1+5i mov eax, 3 movq mm0, MMWORD PTR -24+[esp+76] mov WORD PTR -24+[esp+76], ax movq MMWORD PTR _t1$[esp+76], mm0 mov WORD PTR -24+[esp+78], 2 mov WORD PTR -24+[esp+80], -2 ; fffffffeH mov WORD PTR -24+[esp+82], ax movq mm0, MMWORD PTR -24+[esp+76] ; 298 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov ecx, DWORD PTR _t1$[esp+76] movq MMWORD PTR _t2$[esp+76], mm0 mov edx, DWORD PTR _t2$[esp+76] mov eax, DWORD PTR _t2$[esp+80] push edx mov edx, DWORD PTR _t1$[esp+84] push eax push ecx push edx push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 48 ; 00000030H ; 299 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43845 ; 300 : { ; 301 : t0 = md_complex_mul_c_mid4miw(t1, t2); movq mm1, MMWORD PTR _t2$[esp+48] movq mm0, MMWORD PTR _t1$[esp+48] movq mm2, mm0 punpckldq mm0, mm2 pmaddwd mm0, mm1 movq MMWORD PTR _t0$[esp+48], mm0 $L43845: ; 302 : } ; 303 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+48] mov ecx, DWORD PTR _t0$[esp+52] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 304 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 305 : ; 306 : // 无符号紧缩字节的绝对差 ; 307 : printf("md_absolute_deviation_mub:\n"); push OFFSET FLAT:??_C@_0BM@HKHJ@md_absolute_deviation_mub?3?6?$AA@ ; `string' call _printf ; 308 : t1 = _mm_set_pi8(1,2,3,4,5,6,7,8); mov al, 5 mov cl, 3 mov dl, 2 mov BYTE PTR -24+[esp+68], 8 mov BYTE PTR -24+[esp+69], 7 mov BYTE PTR -24+[esp+70], 6 mov BYTE PTR -24+[esp+71], al mov BYTE PTR -24+[esp+72], 4 mov BYTE PTR -24+[esp+73], cl mov BYTE PTR -24+[esp+74], dl mov BYTE PTR -24+[esp+75], 1 movq mm0, MMWORD PTR -24+[esp+68] ; 309 : t2 = _mm_set_pi8(8,7,6,5,4,3,2,1); mov BYTE PTR -24+[esp+68], 1 movq MMWORD PTR _t1$[esp+68], mm0 mov BYTE PTR -24+[esp+69], dl mov BYTE PTR -24+[esp+70], cl mov BYTE PTR -24+[esp+71], 4 mov BYTE PTR -24+[esp+72], al mov BYTE PTR -24+[esp+73], 6 mov BYTE PTR -24+[esp+74], 7 mov BYTE PTR -24+[esp+75], 8 movq mm0, MMWORD PTR -24+[esp+68] ; 310 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov ecx, DWORD PTR _t1$[esp+68] movq MMWORD PTR _t2$[esp+68], mm0 mov edx, DWORD PTR _t2$[esp+68] mov eax, DWORD PTR _t2$[esp+72] push edx mov edx, DWORD PTR _t1$[esp+76] push eax push ecx push edx push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 40 ; 00000028H ; 311 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43852 ; 312 : { ; 313 : t0 = md_absolute_deviation_mub(t1, t2); movq mm0, MMWORD PTR _t1$[esp+48] movq mm1, MMWORD PTR _t2$[esp+48] movq mm2, mm0 movq mm3, mm1 psubusb mm3, mm2 psubusb mm0, mm1 por mm0, mm3 movq MMWORD PTR _t0$[esp+48], mm0 $L43852: ; 314 : } ; 315 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+48] mov ecx, DWORD PTR _t0$[esp+52] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 316 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 317 : ; 318 : // 带符号紧缩字的绝对差 ; 319 : printf("md_absolute_deviation_miw:\n"); push OFFSET FLAT:??_C@_0BM@KLKG@md_absolute_deviation_miw?3?6?$AA@ ; `string' call _printf ; 320 : t1 = _mm_set_pi16(-1, 1, 3, 5); mov WORD PTR -24+[esp+68], 5 mov WORD PTR -24+[esp+70], 3 mov WORD PTR -24+[esp+72], 1 mov WORD PTR -24+[esp+74], -1 movq mm0, MMWORD PTR -24+[esp+68] ; 321 : t2 = _mm_set_pi16( 2, 2, 2, 2); mov eax, 2 movq MMWORD PTR _t1$[esp+68], mm0 mov WORD PTR -24+[esp+68], ax mov WORD PTR -24+[esp+70], ax mov WORD PTR -24+[esp+72], ax mov WORD PTR -24+[esp+74], ax ; 322 : printf("[%.8X%.8X],[%.8X%.8X] -> ", t1.m64_u32[1], t1.m64_u32[0], t2.m64_u32[1], t2.m64_u32[0]); mov ecx, DWORD PTR _t1$[esp+68] movq mm0, MMWORD PTR -24+[esp+68] movq MMWORD PTR _t2$[esp+68], mm0 mov edx, DWORD PTR _t2$[esp+68] mov eax, DWORD PTR _t2$[esp+72] push edx mov edx, DWORD PTR _t1$[esp+76] push eax push ecx push edx push OFFSET FLAT:??_C@_0BK@OAF@?$FL?$CF?48X?$CF?48X?$FN?0?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 40 ; 00000028H ; 323 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43859 ; 324 : { ; 325 : t0 = md_absolute_deviation_miw(t1, t2); movq mm2, MMWORD PTR _t2$[esp+48] movq mm1, MMWORD PTR _t1$[esp+48] movq mm0, mm2 movq mm3, mm1 pcmpgtw mm3, mm0 movq mm4, mm1 pxor mm4, mm0 movq mm0, mm4 pand mm0, mm3 movq mm3, mm0 pxor mm1, mm3 pxor mm2, mm0 psubw mm2, mm1 movq MMWORD PTR _t0$[esp+48], mm2 $L43859: ; 326 : } ; 327 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+48] mov ecx, DWORD PTR _t0$[esp+52] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 328 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 329 : ; 330 : // 带符号紧缩字的绝对值 ; 331 : printf("md_abs_miw4miw:\n"); push OFFSET FLAT:??_C@_0BB@KOFH@md_abs_miw4miw?3?6?$AA@ ; `string' call _printf ; 332 : t0 = _mm_set_pi16(-1, 1, 3, -5); mov WORD PTR -24+[esp+68], -5 ; fffffffbH mov WORD PTR -24+[esp+70], 3 mov WORD PTR -24+[esp+72], 1 mov WORD PTR -24+[esp+74], -1 movq mm0, MMWORD PTR -24+[esp+68] movq MMWORD PTR _t0$[esp+68], mm0 ; 333 : printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); mov edx, DWORD PTR _t0$[esp+68] mov eax, DWORD PTR _t0$[esp+72] push edx push eax push OFFSET FLAT:??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 32 ; 00000020H ; 334 : for(i=0; i<cnt; ++i) cmp esi, edi jle SHORT $L43865 ; 335 : { ; 336 : t1 = md_abs_miw(t0); movq mm1, MMWORD PTR _t0$[esp+48] movq mm0, mm1 psraw mm0, 15 ; 0000000fH movq mm2, mm0 pxor mm1, mm0 psubsw mm1, mm2 movq MMWORD PTR _t1$[esp+48], mm1 $L43865: ; 337 : } ; 338 : printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); mov ecx, DWORD PTR _t1$[esp+48] mov edx, DWORD PTR _t1$[esp+52] push ecx push edx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 339 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 340 : ; 341 : // 将带符号紧缩字限制在[iLow,iHigh]区间 ; 342 : printf("md_clamp_miw:\n"); push OFFSET FLAT:??_C@_0P@DEEP@md_clamp_miw?3?6?$AA@ ; `string' call _printf ; 343 : t0 = _mm_set_pi16(-15, 1, 254, 257); mov edi, 254 ; 000000feH mov WORD PTR -24+[esp+68], 257 ; 00000101H mov WORD PTR -24+[esp+70], di mov WORD PTR -24+[esp+72], 1 mov WORD PTR -24+[esp+74], -15 ; fffffff1H movq mm0, MMWORD PTR -24+[esp+68] movq MMWORD PTR _t0$[esp+68], mm0 ; 344 : printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+68] mov ecx, DWORD PTR _t0$[esp+72] push eax push ecx push OFFSET FLAT:??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 32 ; 00000020H ; 345 : for(i=0; i<cnt; ++i) test esi, esi jle SHORT $L43871 ; 346 : { ; 347 : t1 = md_clamp_miw(t0, -1, 255); or dx, -1 mov ax, -257 ; fffffeffH movd mm0, dx mov cx, 32512 ; 00007f00H movq mm1, mm0 mov dx, -32768 ; ffff8000H punpcklwd mm1, mm0 movq mm0, mm1 punpcklwd mm1, mm0 movd mm0, ax movq mm2, mm0 punpcklwd mm2, mm0 movq mm0, mm2 punpcklwd mm2, mm0 movd mm0, cx movq mm3, mm0 punpcklwd mm3, mm0 movq mm0, mm3 punpcklwd mm3, mm0 movd mm0, dx movq mm4, mm0 punpcklwd mm4, mm0 movq mm0, mm4 punpcklwd mm4, mm0 movq mm0, MMWORD PTR _t0$[esp+48] paddw mm0, mm4 paddusw mm0, mm3 psubusw mm0, mm2 paddw mm0, mm1 movq MMWORD PTR _t1$[esp+48], mm0 $L43871: ; 348 : } ; 349 : printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); mov eax, DWORD PTR _t1$[esp+48] mov ecx, DWORD PTR _t1$[esp+52] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 350 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 351 : ; 352 : // 将无符号紧缩字限制在[uLow,uHigh]区间 ; 353 : printf("md_clamp_muw:\n"); push OFFSET FLAT:??_C@_0P@NOLG@md_clamp_muw?3?6?$AA@ ; `string' call _printf ; 354 : t0 = _mm_set_pi16(1, 254, 257, 32769U); mov WORD PTR -24+[esp+68], -32767 ; ffff8001H mov WORD PTR -24+[esp+70], 257 ; 00000101H mov WORD PTR -24+[esp+72], di mov WORD PTR -24+[esp+74], 1 movq mm0, MMWORD PTR -24+[esp+68] movq MMWORD PTR _t0$[esp+68], mm0 ; 355 : printf("[%.8X%.8X] -> ", t0.m64_u32[1], t0.m64_u32[0]); mov edx, DWORD PTR _t0$[esp+68] mov eax, DWORD PTR _t0$[esp+72] push edx push eax push OFFSET FLAT:??_C@_0P@GKNG@?$FL?$CF?48X?$CF?48X?$FN?5?9?$DO?5?$AA@ ; `string' call _printf add esp, 32 ; 00000020H ; 356 : for(i=0; i<cnt; ++i) test esi, esi jle SHORT $L43877 ; 357 : { ; 358 : t1 = md_clamp_muw(t0, 16, 255); mov cx, 16 ; 00000010H mov dx, -240 ; ffffff10H movd mm0, cx mov ax, -256 ; ffffff00H movq mm1, mm0 punpcklwd mm1, mm0 movq mm0, mm1 punpcklwd mm1, mm0 movd mm0, dx movq mm2, mm0 punpcklwd mm2, mm0 movq mm0, mm2 punpcklwd mm2, mm0 movd mm0, ax movq mm3, mm0 punpcklwd mm3, mm0 movq mm0, mm3 punpcklwd mm3, mm0 movq mm0, MMWORD PTR _t0$[esp+48] paddusw mm0, mm3 psubusw mm0, mm2 paddw mm0, mm1 movq MMWORD PTR _t1$[esp+48], mm0 $L43877: ; 359 : } ; 360 : printf("[%.8X%.8X]\n", t1.m64_u32[1], t1.m64_u32[0]); mov ecx, DWORD PTR _t1$[esp+48] mov edx, DWORD PTR _t1$[esp+52] push ecx push edx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 361 : printf("\n"); push OFFSET FLAT:??_C@_01BJG@?6?$AA@ ; `string' call _printf ; 362 : ; 363 : // 返回常数:0 ; 364 : printf("md_setzero_mmq:\t"); push OFFSET FLAT:??_C@_0BB@BLNI@md_setzero_mmq?3?7?$AA@ ; `string' call _printf ; 365 : t0 = md_setzero_mmq(); pxor mm0, mm0 movq MMWORD PTR -24+[esp+68], mm0 movq mm1, mm0 pxor mm0, mm1 movq MMWORD PTR _t0$[esp+68], mm0 ; 366 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+68] mov ecx, DWORD PTR _t0$[esp+72] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 367 : ; 368 : // 返回常数:全1 ; 369 : printf("md_setfull_mmq:\t"); push OFFSET FLAT:??_C@_0BB@ICKB@md_setfull_mmq?3?7?$AA@ ; `string' call _printf ; 370 : t0 = md_setfull_mmq(); movq mm0, MMWORD PTR -24+[esp+84] movq mm1, mm0 pcmpeqb mm0, mm1 movq MMWORD PTR _t0$[esp+84], mm0 ; 371 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov edx, DWORD PTR _t0$[esp+84] mov eax, DWORD PTR _t0$[esp+88] push edx push eax push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 372 : ; 373 : // 返回常数:每个紧缩字节为1 ; 374 : printf("md_set_1_mib:\t"); push OFFSET FLAT:??_C@_0P@NKIN@md_set_1_mib?3?7?$AA@ ; `string' call _printf ; 375 : t0 = md_set_1_mib(); movq mm0, MMWORD PTR -24+[esp+100] movq mm1, mm0 movq mm2, mm0 pcmpeqb mm2, mm1 psubb mm0, mm2 movq MMWORD PTR _t0$[esp+100], mm0 ; 376 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov ecx, DWORD PTR _t0$[esp+100] mov edx, DWORD PTR _t0$[esp+104] push ecx push edx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf add esp, 64 ; 00000040H ; 377 : ; 378 : // 返回常数:每个紧缩字为pow(2,n)-1 ; 379 : printf("md_set_pow2n_sub1_miw:\t"); push OFFSET FLAT:??_C@_0BI@DPAE@md_set_pow2n_sub1_miw?3?7?$AA@ ; `string' call _printf ; 380 : t0 = md_set_pow2n_sub1_miw(8); movq mm0, MMWORD PTR -24+[esp+52] movq mm1, mm0 pcmpeqb mm0, mm1 psrlw mm0, 8 movq MMWORD PTR _t0$[esp+52], mm0 ; 381 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov eax, DWORD PTR _t0$[esp+52] mov ecx, DWORD PTR _t0$[esp+56] push eax push ecx push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf ; 382 : ; 383 : // 返回常数:每个紧缩字为pow(2,n)-1 ; 384 : printf("md_set_neg_pow2n_miw:\t"); push OFFSET FLAT:??_C@_0BH@NLNM@md_set_neg_pow2n_miw?3?7?$AA@ ; `string' call _printf ; 385 : t0 = md_set_neg_pow2n_miw(15); movq mm0, MMWORD PTR -24+[esp+68] movq mm1, mm0 pcmpeqb mm0, mm1 psllw mm0, 15 ; 0000000fH movq MMWORD PTR _t0$[esp+68], mm0 ; 386 : printf("[%.8X%.8X]\n", t0.m64_u32[1], t0.m64_u32[0]); mov edx, DWORD PTR _t0$[esp+68] mov eax, DWORD PTR _t0$[esp+72] push edx push eax push OFFSET FLAT:??_C@_0M@GLHH@?$FL?$CF?48X?$CF?48X?$FN?6?$AA@ ; `string' call _printf add esp, 32 ; 00000020H ; 387 : ; 388 : } pop edi pop esi mov esp, ebp pop ebp ret 0 ?doTest@@YAXH@Z ENDP ; doTest _TEXT ENDS PUBLIC _main EXTRN _rand:NEAR ; Function compile flags: /Ogty ; COMDAT _main _TEXT SEGMENT _main PROC NEAR ; COMDAT ; 392 : doTest((rand()&1) + 1); // 用一个随机数作为循环次数,避免编译器优化循环 call _rand and eax, 1 inc eax push eax call ?doTest@@YAXH@Z ; doTest add esp, 4 ; 393 : return 0; xor eax, eax ; 394 : } ret 0 _main ENDP _TEXT ENDS END