关于NEON的vsub
方法的溢出,结果如下:
vsub
会产生溢出,根据数据bit表示规律,可知溢出结果和理论正确结果形成互补,比如249-(-7)=256
- 使用类
vreinterpretq_s16_u16
方法可以得到正确的结果
void neontest()
{
uchar v1[] = {0,1,2,3,4,5,6,7};
uchar v2[] = {7,6,5,4,3, 2, 1, 7};
uchar vr[8];
short vr2[8];
unsigned short vr3[8];
uint8x8_t d1 = vld1_u8(v1);
uint8x8_t d2 = vld1_u8(v2);
uint8x8_t r1 = vsub_u8(d1, d2);
vst1_u8(vr, r1);
LOGD("%d %d %d %d %d %d %d %d", vr[0], vr[1], vr[2], vr[3], vr[4], vr[5], vr[6], vr[7]);
int16x8_t r2 = vreinterpretq_s16_u16(vsubl_u8(d1, d2));
vst1q_s16(vr2, r2);
LOGD("%d %d %d %d %d %d %d %d", vr2[0], vr2[1], vr2[2], vr2[3], vr2[4], vr2[5], vr2[6], vr2[7]);
uint16x8_t r3 = vsubl_u8(d1, d2);
vst1q_u16(vr3, r3);
LOGD("%d %d %d %d %d %d %d %d", vr3[0], vr3[1], vr3[2], vr3[3], vr3[4], vr3[5], vr3[6], vr3[7]);
}
///////////////out put
D/LOG:==========main start================
D/LOG:249 251 253 255 1 3 5 0
D/LOG:-7 -5 -3 -1 1 3 5 0
D/LOG:65529 65531 65533 65535 1 3 5 0
D/LOG:==========main end================