void makeOffsets(int pixel[25], int rowStride, int patternSize) { static const int offsets16[][2] = { {0, 3}, { 1, 3}, { 2, 2}, { 3, 1}, { 3, 0}, { 3, -1}, { 2, -2}, { 1, -3}, {0, -3}, {-1, -3}, {-2, -2}, {-3, -1}, {-3, 0}, {-3, 1}, {-2, 2}, {-1, 3} }; static const int offsets12[][2] = { {0, 2}, { 1, 2}, { 2, 1}, { 2, 0}, { 2, -1}, { 1, -2}, {0, -2}, {-1, -2}, {-2, -1}, {-2, 0}, {-2, 1}, {-1, 2} }; static const int offsets8[][2] = { {0, 1}, { 1, 1}, { 1, 0}, { 1, -1}, {0, -1}, {-1, -1}, {-1, 0}, {-1, 1} }; const int(*offsets)[2] = patternSize == 16 ? offsets16 : patternSize == 12 ? offsets12 : patternSize == 8 ? offsets8 : 0; CV_Assert(pixel && offsets); int k = 0; for (; k < patternSize; k++) pixel[k] = offsets[k][0] + offsets[k][1] * rowStride; for (; k < 25; k++) pixel[k] = pixel[k - patternSize]; } int cornerScore(const uchar* ptr, const int pixel[], int threshold) { const int K = 8, N = K * 3 + 1; int k, v = ptr[0]; short d[N]; for (k = 0; k < N; k++) d[k] = (short)(v - ptr[pixel[k]]); #if CV_SIMD128 if (true) { v_int16x8 q0 = v_setall_s16(-1000), q1 = v_setall_s16(1000); for (k = 0; k < 16; k += 8) { v_int16x8 v0 = v_load(d + k + 1); v_int16x8 v1 = v_load(d + k + 2); v_int16x8 a = v_min(v0, v1); v_int16x8 b = v_max(v0, v1); v0 = v_load(d + k + 3); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k + 4); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k + 5); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k + 6); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k + 7); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k + 8); a = v_min(a, v0); b = v_max(b, v0); v0 = v_load(d + k); q0 = v_max(q0, v_min(a, v0)); q1 = v_min(q1, v_max(b, v0)); v0 = v_load(d + k + 9); q0 = v_max(q0, v_min(a, v0)); q1 = v_min(q1, v_max(b, v0)); } q0 = v_max(q0, v_setzero_s16() - q1); threshold = v_reduce_max(q0) - 1; } else #endif { int a0 = threshold; for (k = 0; k < 16; k += 2) { int a = MIN((int)d[k + 1], (int)d[k + 2]); a = MIN(a, (int)d[k + 3]); if (a <= a0) continue; a = MIN(a, (int)d[k + 4]); a = MIN(a, (int)d[k + 5]); a = MIN(a, (int)d[k + 6]); a = MIN(a, (int)d[k + 7]); a = MIN(a, (int)d[k + 8]); a0 = MAX(a0, MIN(a, (int)d[k])); a0 = MAX(a0, MIN(a, (int)d[k + 9])); } int b0 = -a0; for (k = 0; k < 16; k += 2) { int b = MAX((int)d[k + 1], (int)d[k + 2]); b = MAX(b, (int)d[k + 3]); b = MAX(b, (int)d[k + 4]); b = MAX(b, (int)d[k + 5]); if (b >= b0) continue; b = MAX(b, (int)d[k + 6]); b = MAX(b, (int)d[k + 7]); b = MAX(b, (int)d[k + 8]); b0 = MIN(b0, MAX(b, (int)d[k])); b0 = MIN(b0, MAX(b, (int)d[k + 9])); } threshold = -b0 - 1; } #if VERIFY_CORNERS testCorner(ptr, pixel, K, N, threshold); #endif return threshold; } //角点检测 template<int patternSize> void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression) { Mat img = _img.getMat(); const int K = patternSize / 2, N = patternSize + K + 1; int i, j, k, pixel[25]; makeOffsets(pixel, (int)img.step, patternSize); #if CV_SIMD128 const int quarterPatternSize = patternSize / 4; v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K); #if CV_TRY_AVX2 Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2; if (CV_CPU_HAS_SUPPORT_AVX2) fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel); #endif #endif keypoints.clear(); threshold = MIN(MAX(threshold, 0), 255); uchar threshold_tab[512]; for (i = -255; i <= 255; i++) threshold_tab[i + 255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0); AutoBuffer<uchar> _buf((img.cols + 16) * 3 * (sizeof(int) + sizeof(uchar)) + 128); uchar* buf[3]; buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols; int* cpbuf[3]; cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1; cpbuf[1] = cpbuf[0] + img.cols + 1; cpbuf[2] = cpbuf[1] + img.cols + 1; memset(buf[0], 0, img.cols * 3); for (i = 3; i < img.rows - 2; i++) { const uchar* ptr = img.ptr<uchar>(i) + 3; uchar* curr = buf[(i - 3) % 3]; int* cornerpos = cpbuf[(i - 3) % 3]; memset(curr, 0, img.cols); int ncorners = 0; if (i < img.rows - 3) { j = 3; #if CV_SIMD128 { if (patternSize == 16) { #if CV_TRY_AVX2 if (fast_t_impl_avx2) fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners); #endif //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements { for (; j < img.cols - 16 - 3; j += 16, ptr += 16) { v_uint8x16 v = v_load(ptr); v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta); v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta); v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta)); v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta)); v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2 * quarterPatternSize]), delta)); v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3 * quarterPatternSize]), delta)); v_int8x16 m0, m1; m0 = (v0 < x0) & (v0 < x1); m1 = (x0 < v1) & (x1 < v1); m0 = m0 | ((v0 < x1) & (v0 < x2)); m1 = m1 | ((x1 < v1) & (x2 < v1)); m0 = m0 | ((v0 < x2) & (v0 < x3)); m1 = m1 | ((x2 < v1) & (x3 < v1)); m0 = m0 | ((v0 < x3) & (v0 < x0)); m1 = m1 | ((x3 < v1) & (x0 < v1)); m0 = m0 | m1; if (!v_check_any(m0)) continue; if (!v_check_any(v_combine_low(m0, m0))) { j -= 8; ptr -= 8; continue; } v_int8x16 c0 = v_setzero_s8(); v_int8x16 c1 = v_setzero_s8(); v_uint8x16 max0 = v_setzero_u8(); v_uint8x16 max1 = v_setzero_u8(); for (k = 0; k < N; k++) { v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta); m0 = v0 < x; m1 = x < v1; c0 = v_sub_wrap(c0, m0) & m0; c1 = v_sub_wrap(c1, m1) & m1; max0 = v_max(max0, v_reinterpret_as_u8(c0)); max1 = v_max(max1, v_reinterpret_as_u8(c1)); } max0 = K16 < v_max(max0, max1); unsigned int m = v_signmask(v_reinterpret_as_s8(max0)); for (k = 0; m > 0 && k < 16; k++, m >>= 1) { if (m & 1) { cornerpos[ncorners++] = j + k; if (nonmax_suppression) { short d[25]; for (int _k = 0; _k < 25; _k++) d[_k] = (short)(ptr[k] - ptr[k + pixel[_k]]); v_int16x8 a0, b0, a1, b1; a0 = b0 = a1 = b1 = v_load(d + 8); for (int shift = 0; shift < 8; ++shift) { v_int16x8 v_nms = v_load(d + shift); a0 = v_min(a0, v_nms); b0 = v_max(b0, v_nms); v_nms = v_load(d + 9 + shift); a1 = v_min(a1, v_nms); b1 = v_max(b1, v_nms); } curr[j + k] = (uchar)(v_reduce_max(v_max(v_max(a0, a1), v_setzero_s16() - v_min(b0, b1))) - 1); } } } } } } } #endif for (; j < img.cols - 3; j++, ptr++) { int v = ptr[0]; const uchar* tab = &threshold_tab[0] - v + 255; int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]]; if (d == 0) continue; d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]]; d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]]; d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]]; if (d == 0) continue; d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]]; d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]]; d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]]; d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]]; if (d & 1) { int vt = v - threshold, count = 0; for (k = 0; k < N; k++) { int x = ptr[pixel[k]]; if (x < vt) { if (++count > K) { cornerpos[ncorners++] = j; if (nonmax_suppression) curr[j] = (uchar)cornerScore(ptr, pixel, threshold); break; } } else count = 0; } } if (d & 2) { int vt = v + threshold, count = 0; for (k = 0; k < N; k++) { int x = ptr[pixel[k]]; if (x > vt) { if (++count > K) { cornerpos[ncorners++] = j; if (nonmax_suppression) curr[j] = (uchar)cornerScore(ptr, pixel, threshold); break; } } else count = 0; } } } } cornerpos[-1] = ncorners; if (i == 3) continue; const uchar* prev = buf[(i - 4 + 3) % 3]; const uchar* pprev = buf[(i - 5 + 3) % 3]; cornerpos = cpbuf[(i - 4 + 3) % 3]; ncorners = cornerpos[-1]; for (k = 0; k < ncorners; k++) { j = cornerpos[k]; int score = prev[j]; if (!nonmax_suppression || (score > prev[j + 1] && score > prev[j - 1] && score > pprev[j - 1] && score > pprev[j] && score > pprev[j + 1] && score > curr[j - 1] && score > curr[j] && score > curr[j + 1])) { keypoints.push_back(KeyPoint((float)j, (float)(i - 1), 7.f, -1, (float)score)); } } } }