This is neon intrincs for sign3, which can be used in sample adaptive offset in HEVC.
void sign3_neon(const uint8_t *src1, const int src_stride1, const uint8_t *src2, const int src_stride2, int8_t* dst, const int dst_stride, const int width, const int height) { int i, j; for(i = 0; i < height; i++) { const uint8_t* _s1 = src1 + i*src_stride1; const uint8_t* _s2 = src2 + i*src_stride2; int8_t* _d = dst + i*dst_stride; for(j = 0; j < width - 15; j += 16) { uint8x16_t c1 = vld1q_u8(_s1); _s1 += 16; uint8x16_t c2 = vld1q_u8(_s2); _s2 += 16; uint8x16_t r1 = vcltq_u8(c1, c2); uint8x16_t r2 = vcgtq_u8(c1, c2); uint8x16_t r3 = vshrq_n_u8(r2, 7); uint8x16_t r4 = vorrq_u8(r3, r1); int8x16_t r5 = vreinterpretq_s8_u8(r4); vst1q_s8(_d, r5); _d += 16; } for(; j < width; j++) { uint8_t c1 = *_s1; uint8_t c2 = *_s2; int8_t d = 0; if (c1 < c2) d = -1; if (c1 > c2) d = 1; *_d = d; _s1++; _s2++; _d ++; } } }
enjoy it!