很早之前写过《双边滤波算法的简易实现bilateralFilter》。
当时学习参考的代码来自cuda的样例。
相关代码可以参阅:
https://github.com/johng12/cudaSamples/tree/master/cudaSamples/3_Imaging/bilateralFilter
由于算法逻辑非常清晰,就不多解释了。
需要补课的,请移步《o(1)复杂度之双边滤波算法的原理、流程、实现及效果。》
代码见:bilateralFilter_cpu.cpp 文件。
#include <math.h> #include <string.h> //////////////////////////////////////////////////////////////////////////////// // export C interface #define EPSILON 1e-3 extern "C" void updateGaussianGold(float delta, int radius); extern "C" void bilateralFilterGold(unsigned int *pSrc, unsigned int *pDest, float e_d, int w, int h, int r); //variables float gaussian[50]; struct float4 { float x; float y; float z; float w; float4() {}; float4(float value) { x = y = z = w = value; } }; void updateGaussianGold(float delta, int radius) { for (int i = 0; i < 2 * radius + 1; i++) { int x = i - radius; gaussian[i] = exp(-(x * x) / (2 * delta * delta)); } } float heuclideanLen(float4 a, float4 b, float d) { float mod = (b.x - a.x) * (b.x - a.x) + (b.y - a.y) * (b.y - a.y) + (b.z - a.z) * (b.z - a.z) + (b.w - a.w) * (b.w - a.w); return exp(-mod / (2 * d * d)); } unsigned int hrgbaFloatToInt(float4 rgba) { unsigned int w = (((unsigned int)(fabs(rgba.w) * 255.0f)) & 0xff) << 24; unsigned int z = (((unsigned int)(fabs(rgba.z) * 255.0f)) & 0xff) << 16; unsigned int y = (((unsigned int)(fabs(rgba.y) * 255.0f)) & 0xff) << 8; unsigned int x = ((unsigned int)(fabs(rgba.x) * 255.0f)) & 0xff; return (w | z | y | x); } float4 hrgbaIntToFloat(unsigned int c) { float4 rgba; rgba.x = (c & 0xff) * 0.003921568627f; // /255.0f; rgba.y = ((c>>8) & 0xff) * 0.003921568627f; // /255.0f; rgba.z = ((c>>16) & 0xff) * 0.003921568627f; // /255.0f; rgba.w = ((c>>24) & 0xff) * 0.003921568627f; // /255.0f; return rgba; } float4 mul(float a, float4 b) { float4 ans; ans.x = a * b.x; ans.y = a * b.y; ans.z = a * b.z; ans.w = a * b.w; return ans; } float4 add4(float4 a, float4 b) { float4 ans; ans.x = a.x + b.x; ans.y = a.y + b.y; ans.z = a.z + b.z; ans.w = a.w + b.w; return ans; } void bilateralFilterGold(unsigned int *pSrc, unsigned int *pDest, float e_d, int w, int h, int r) { float4 *hImage = new float4[w * h]; float domainDist, colorDist, factor; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { hImage[y * w + x] = hrgbaIntToFloat(pSrc[y * w + x]); } } for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float4 t(0.0f); float sum = 0.0f; for (int i = -r; i <= r; i++) { int neighborY = y + i; //clamp the neighbor pixel, prevent overflow if (neighborY < 0) { neighborY = 0; } else if (neighborY >= h) { neighborY = h - 1; } for (int j = -r; j <= r; j++) { domainDist = gaussian[r + i] * gaussian[r + j]; //clamp the neighbor pixel, prevent overflow int neighborX = x + j; if (neighborX < 0) { neighborX = 0; } else if (neighborX >= w) { neighborX = w - 1; } colorDist = heuclideanLen(hImage[neighborY * w + neighborX], hImage[y * w + x], e_d); factor = domainDist * colorDist; sum += factor; t = add4(t, mul(factor, hImage[neighborY * w + neighborX])); } } pDest[y * w + x] = hrgbaFloatToInt(mul(1 / sum, t)); } } delete[] hImage; }
在网上很多人都采用双边滤波作为磨皮算法,包括比较知名的gpuimage库。
用的也是小半径的双边,而双边滤波有很多快速算法的变种。
详情可参阅:http://people.csail.mit.edu/sparis/bf/
而就在几个月前,github上有个朋友放出来了一份快速双边滤波的tiny版本代码。
https://github.com/ufoym/RecursiveBF/
#ifndef INCLUDE_RBF #define INCLUDE_RBF #include <math.h> #include <string.h> #define QX_DEF_CHAR_MAX 255 /* ====================================================================== RecursiveBF: A lightweight library for recursive bilateral filtering. ------------------------------------------------------------------------- Intro: Recursive bilateral filtering (developed by Qingxiong Yang) is pretty fast compared with most edge-preserving filtering methods. - computational complexity is linear in both input size and dimensionality - takes about 43 ms to process a one mega-pixel color image (i7 1.8GHz & 4GB memory) - about 18x faster than Fast high-dimensional filtering using the permutohedral lattice - about 86x faster than Gaussian kd-trees for fast high- dimensional filtering Usage: // ---------------------------------------------------------- // Basic Usage // ---------------------------------------------------------- unsigned char * img = ...; // input image unsigned char * img_out = 0; // output image int width = ..., height = ..., channel = ...; // image size recursive_bf(img, img_out, sigma_spatial, sigma_range, width, height, channel); // ---------------------------------------------------------- // Advanced: using external buffer for better performance // ---------------------------------------------------------- unsigned char * img = ...; // input image unsigned char * img_out = 0; // output image int width = ..., height = ..., channel = ...; // image size float * buffer = new float[ // external buf ( width * height* channel + width * height + width * channel + width) * 2]; recursive_bf(img, img_out, sigma_spatial, sigma_range, width, height, channel, buffer); delete[] buffer; Notice: Large sigma_spatial/sigma_range parameter may results in visible artifact which can be removed by an additional filter with small sigma_spatial/sigma_range parameter. ------------------------------------------------------------------------- Reference: Qingxiong Yang, Recursive Bilateral Filtering, European Conference on Computer Vision (ECCV) 2012, 399-413. ====================================================================== */ inline void recursive_bf( unsigned char * img_in, unsigned char *& img_out, float sigma_spatial, float sigma_range, int width, int height, int channel, float * buffer /*= 0*/); // ---------------------------------------------------------------------- inline void _recursive_bf( unsigned char * img, float sigma_spatial, float sigma_range, int width, int height, int channel, float * buffer = 0) { const int width_height = width * height; const int width_channel = width * channel; const int width_height_channel = width * height * channel; bool is_buffer_internal = (buffer == 0); if (is_buffer_internal) buffer = new float[(width_height_channel + width_height + width_channel + width) * 2]; float * img_out_f = buffer; float * img_temp = &img_out_f[width_height_channel]; float * map_factor_a = &img_temp[width_height_channel]; float * map_factor_b = &map_factor_a[width_height]; float * slice_factor_a = &map_factor_b[width_height]; float * slice_factor_b = &slice_factor_a[width_channel]; float * line_factor_a = &slice_factor_b[width_channel]; float * line_factor_b = &line_factor_a[width]; //compute a lookup table float range_table[QX_DEF_CHAR_MAX + 1]; float inv_sigma_range = 1.0f / (sigma_range * QX_DEF_CHAR_MAX); for (int i = 0; i <= QX_DEF_CHAR_MAX; i++) range_table[i] = static_cast<float>(exp(-i * inv_sigma_range)); float alpha = static_cast<float>(exp(-sqrt(2.0) / (sigma_spatial * width))); float ypr, ypg, ypb, ycr, ycg, ycb; float fp, fc; float inv_alpha_ = 1 - alpha; for (int y = 0; y < height; y++) { float * temp_x = &img_temp[y * width_channel]; unsigned char * in_x = &img[y * width_channel]; unsigned char * texture_x = &img[y * width_channel]; *temp_x++ = ypr = *in_x++; *temp_x++ = ypg = *in_x++; *temp_x++ = ypb = *in_x++; unsigned char tpr = *texture_x++; unsigned char tpg = *texture_x++; unsigned char tpb = *texture_x++; float * temp_factor_x = &map_factor_a[y * width]; *temp_factor_x++ = fp = 1; // from left to right for (int x = 1; x < width; x++) { unsigned char tcr = *texture_x++; unsigned char tcg = *texture_x++; unsigned char tcb = *texture_x++; unsigned char dr = abs(tcr - tpr); unsigned char dg = abs(tcg - tpg); unsigned char db = abs(tcb - tpb); int range_dist = (((dr << 1) + dg + db) >> 2); float weight = range_table[range_dist]; float alpha_ = weight*alpha; *temp_x++ = ycr = inv_alpha_*(*in_x++) + alpha_*ypr; *temp_x++ = ycg = inv_alpha_*(*in_x++) + alpha_*ypg; *temp_x++ = ycb = inv_alpha_*(*in_x++) + alpha_*ypb; tpr = tcr; tpg = tcg; tpb = tcb; ypr = ycr; ypg = ycg; ypb = ycb; *temp_factor_x++ = fc = inv_alpha_ + alpha_*fp; fp = fc; } *--temp_x; *temp_x = 0.5f*((*temp_x) + (*--in_x)); *--temp_x; *temp_x = 0.5f*((*temp_x) + (*--in_x)); *--temp_x; *temp_x = 0.5f*((*temp_x) + (*--in_x)); tpr = *--texture_x; tpg = *--texture_x; tpb = *--texture_x; ypr = *in_x; ypg = *in_x; ypb = *in_x; *--temp_factor_x; *temp_factor_x = 0.5f*((*temp_factor_x) + 1); fp = 1; // from right to left for (int x = width - 2; x >= 0; x--) { unsigned char tcr = *--texture_x; unsigned char tcg = *--texture_x; unsigned char tcb = *--texture_x; unsigned char dr = abs(tcr - tpr); unsigned char dg = abs(tcg - tpg); unsigned char db = abs(tcb - tpb); int range_dist = (((dr << 1) + dg + db) >> 2); float weight = range_table[range_dist]; float alpha_ = weight * alpha; ycr = inv_alpha_ * (*--in_x) + alpha_ * ypr; ycg = inv_alpha_ * (*--in_x) + alpha_ * ypg; ycb = inv_alpha_ * (*--in_x) + alpha_ * ypb; *--temp_x; *temp_x = 0.5f*((*temp_x) + ycr); *--temp_x; *temp_x = 0.5f*((*temp_x) + ycg); *--temp_x; *temp_x = 0.5f*((*temp_x) + ycb); tpr = tcr; tpg = tcg; tpb = tcb; ypr = ycr; ypg = ycg; ypb = ycb; fc = inv_alpha_ + alpha_*fp; *--temp_factor_x; *temp_factor_x = 0.5f*((*temp_factor_x) + fc); fp = fc; } } alpha = static_cast<float>(exp(-sqrt(2.0) / (sigma_spatial * height))); inv_alpha_ = 1 - alpha; float * ycy, * ypy, * xcy; unsigned char * tcy, * tpy; memcpy(img_out_f, img_temp, sizeof(float)* width_channel); float * in_factor = map_factor_a; float*ycf, *ypf, *xcf; memcpy(map_factor_b, in_factor, sizeof(float) * width); for (int y = 1; y < height; y++) { tpy = &img[(y - 1) * width_channel]; tcy = &img[y * width_channel]; xcy = &img_temp[y * width_channel]; ypy = &img_out_f[(y - 1) * width_channel]; ycy = &img_out_f[y * width_channel]; xcf = &in_factor[y * width]; ypf = &map_factor_b[(y - 1) * width]; ycf = &map_factor_b[y * width]; for (int x = 0; x < width; x++) { unsigned char dr = abs((*tcy++) - (*tpy++)); unsigned char dg = abs((*tcy++) - (*tpy++)); unsigned char db = abs((*tcy++) - (*tpy++)); int range_dist = (((dr << 1) + dg + db) >> 2); float weight = range_table[range_dist]; float alpha_ = weight*alpha; for (int c = 0; c < channel; c++) *ycy++ = inv_alpha_*(*xcy++) + alpha_*(*ypy++); *ycf++ = inv_alpha_*(*xcf++) + alpha_*(*ypf++); } } int h1 = height - 1; ycf = line_factor_a; ypf = line_factor_b; memcpy(ypf, &in_factor[h1 * width], sizeof(float) * width); for (int x = 0; x < width; x++) map_factor_b[h1 * width + x] = 0.5f*(map_factor_b[h1 * width + x] + ypf[x]); ycy = slice_factor_a; ypy = slice_factor_b; memcpy(ypy, &img_temp[h1 * width_channel], sizeof(float)* width_channel); int k = 0; for (int x = 0; x < width; x++) { for (int c = 0; c < channel; c++) { int idx = (h1 * width + x) * channel + c; img_out_f[idx] = 0.5f*(img_out_f[idx] + ypy[k++]) / map_factor_b[h1 * width + x]; } } for (int y = h1 - 1; y >= 0; y--) { tpy = &img[(y + 1) * width_channel]; tcy = &img[y * width_channel]; xcy = &img_temp[y * width_channel]; float*ycy_ = ycy; float*ypy_ = ypy; float*out_ = &img_out_f[y * width_channel]; xcf = &in_factor[y * width]; float*ycf_ = ycf; float*ypf_ = ypf; float*factor_ = &map_factor_b[y * width]; for (int x = 0; x < width; x++) { unsigned char dr = abs((*tcy++) - (*tpy++)); unsigned char dg = abs((*tcy++) - (*tpy++)); unsigned char db = abs((*tcy++) - (*tpy++)); int range_dist = (((dr << 1) + dg + db) >> 2); float weight = range_table[range_dist]; float alpha_ = weight*alpha; float fcc = inv_alpha_*(*xcf++) + alpha_*(*ypf_++); *ycf_++ = fcc; *factor_ = 0.5f * (*factor_ + fcc); for (int c = 0; c < channel; c++) { float ycc = inv_alpha_*(*xcy++) + alpha_*(*ypy_++); *ycy_++ = ycc; *out_ = 0.5f * (*out_ + ycc) / (*factor_); *out_++; } *factor_++; } memcpy(ypy, ycy, sizeof(float) * width_channel); memcpy(ypf, ycf, sizeof(float) * width); } for (int i = 0; i < width_height_channel; ++i) img[i] = static_cast<unsigned char>(img_out_f[i]); if (is_buffer_internal) delete[] buffer; } inline void recursive_bf( unsigned char * img_in, unsigned char *& img_out, float sigma_spatial, float sigma_range, int width, int height, int channel, float * buffer = 0) { if (img_out == 0) img_out = new unsigned char[width * height * channel]; for (int i = 0; i < width * height * channel; ++i) img_out[i] = img_in[i]; _recursive_bf(img_out, sigma_spatial, sigma_range, width, height, channel, buffer); } #endif // INCLUDE_RBF
而这份代码的作者,真的很粗心,代码中有多处逻辑写错,代码风格也有待加强。
当然也有,有心者,在此基础上做了修正。
https://github.com/Fig1024/OP_RBF
这个作者还在这个基础上做了simd指令集优化。
优化代码暂时不看。
关注下基础实现的代码。即RBFilterPlain.cpp:
#include "stdafx.h" #include "RBFilterPlain.h" #include "stdafx.h" #include <algorithm> using namespace std; #define QX_DEF_CHAR_MAX 255 CRBFilterPlain::CRBFilterPlain() { } CRBFilterPlain::~CRBFilterPlain() { releaseMemory(); } // assumes 3/4 channel images, 1 byte per channel void CRBFilterPlain::reserveMemory(int max_width, int max_height, int channels) { // basic sanity check _ASSERT(max_width >= 10 && max_width < 10000); _ASSERT(max_height >= 10 && max_height < 10000); _ASSERT(channels >= 1 && channels <= 4); releaseMemory(); m_reserve_width = max_width; m_reserve_height = max_height; m_reserve_channels = channels; int width_height = m_reserve_width * m_reserve_height; int width_height_channel = width_height * m_reserve_channels; m_left_pass_color = new float[width_height_channel]; m_left_pass_factor = new float[width_height]; m_right_pass_color = new float[width_height_channel]; m_right_pass_factor = new float[width_height]; m_down_pass_color = new float[width_height_channel]; m_down_pass_factor = new float[width_height]; m_up_pass_color = new float[width_height_channel]; m_up_pass_factor = new float[width_height]; } void CRBFilterPlain::releaseMemory() { m_reserve_width = 0; m_reserve_height = 0; m_reserve_channels = 0; if (m_left_pass_color) { delete[] m_left_pass_color; m_left_pass_color = nullptr; } if (m_left_pass_factor) { delete[] m_left_pass_factor; m_left_pass_factor = nullptr; } if (m_right_pass_color) { delete[] m_right_pass_color; m_right_pass_color = nullptr; } if (m_right_pass_factor) { delete[] m_right_pass_factor; m_right_pass_factor = nullptr; } if (m_down_pass_color) { delete[] m_down_pass_color; m_down_pass_color = nullptr; } if (m_down_pass_factor) { delete[] m_down_pass_factor; m_down_pass_factor = nullptr; } if (m_up_pass_color) { delete[] m_up_pass_color; m_up_pass_color = nullptr; } if (m_up_pass_factor) { delete[] m_up_pass_factor; m_up_pass_factor = nullptr; } } int CRBFilterPlain::getDiffFactor(const unsigned char* color1, const unsigned char* color2) const { int final_diff; int component_diff[4]; // find absolute difference between each component for (int i = 0; i < m_reserve_channels; i++) { component_diff[i] = abs(color1[i] - color2[i]); } // based on number of components, produce a single difference value in the 0-255 range switch (m_reserve_channels) { case 1: final_diff = component_diff[0]; break; case 2: final_diff = ((component_diff[0] + component_diff[1]) >> 1); break; case 3: final_diff = ((component_diff[0] + component_diff[2]) >> 2) + (component_diff[1] >> 1); break; case 4: final_diff = ((component_diff[0] + component_diff[1] + component_diff[2] + component_diff[3]) >> 2); break; default: final_diff = 0; } _ASSERT(final_diff >= 0 && final_diff <= 255); return final_diff; } // memory must be reserved before calling image filter // this implementation of filter uses plain C++, single threaded // channel count must be 3 or 4 (alpha not used) void CRBFilterPlain::filter(unsigned char* img_src, unsigned char* img_dst, float sigma_spatial, float sigma_range, int width, int height, int channel) { _ASSERT(img_src); _ASSERT(img_dst); _ASSERT(m_reserve_channels == channel); _ASSERT(m_reserve_width >= width); _ASSERT(m_reserve_height >= height); // compute a lookup table float alpha_f = static_cast<float>(exp(-sqrt(2.0) / (sigma_spatial * 255))); float inv_alpha_f = 1.f - alpha_f; float range_table_f[QX_DEF_CHAR_MAX + 1]; float inv_sigma_range = 1.0f / (sigma_range * QX_DEF_CHAR_MAX); { float ii = 0.f; for (int i = 0; i <= QX_DEF_CHAR_MAX; i++, ii -= 1.f) { range_table_f[i] = alpha_f * exp(ii * inv_sigma_range); } } /////////////// // Left pass { const unsigned char* src_color = img_src; float* left_pass_color = m_left_pass_color; float* left_pass_factor = m_left_pass_factor; for (int y = 0; y < height; y++) { const unsigned char* src_prev = src_color; const float* prev_factor = left_pass_factor; const float* prev_color = left_pass_color; // process 1st pixel separately since it has no previous *left_pass_factor++ = 1.f; for (int c = 0; c < channel; c++) { *left_pass_color++ = *src_color++; } // handle other pixels for (int x = 1; x < width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels int diff = getDiffFactor(src_color, src_prev); src_prev = src_color; float alpha_f = range_table_f[diff]; *left_pass_factor++ = inv_alpha_f + alpha_f * (*prev_factor++); for (int c = 0; c < channel; c++) { *left_pass_color++ = inv_alpha_f * (*src_color++) + alpha_f * (*prev_color++); } } } } /////////////// // Right pass { // start from end and then go up to begining int last_index = width * height * channel - 1; const unsigned char* src_color = img_src + last_index; float* right_pass_color = m_right_pass_color + last_index; float* right_pass_factor = m_right_pass_factor + width * height - 1; for (int y = 0; y < height; y++) { const unsigned char* src_prev = src_color; const float* prev_factor = right_pass_factor; const float* prev_color = right_pass_color; // process 1st pixel separately since it has no previous *right_pass_factor-- = 1.f; for (int c = 0; c < channel; c++) { *right_pass_color-- = *src_color--; } // handle other pixels for (int x = 1; x < width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels int diff = getDiffFactor(src_color, src_color - 3); // src_prev = src_color; float alpha_f = range_table_f[diff]; *right_pass_factor-- = inv_alpha_f + alpha_f * (*prev_factor--); for (int c = 0; c < channel; c++) { *right_pass_color-- = inv_alpha_f * (*src_color--) + alpha_f * (*prev_color--); } } } } // vertical pass will be applied on top on horizontal pass, while using pixel differences from original image // result color stored in 'm_left_pass_color' and vertical pass will use it as source color { float* img_out = m_left_pass_color; // use as temporary buffer const float* left_pass_color = m_left_pass_color; const float* left_pass_factor = m_left_pass_factor; const float* right_pass_color = m_right_pass_color; const float* right_pass_factor = m_right_pass_factor; int width_height = width * height; for (int i = 0; i < width_height; i++) { // average color divided by average factor float factor = 1.f / ((*left_pass_factor++) + (*right_pass_factor++)); for (int c = 0; c < channel; c++) { *img_out++ = (factor * ((*left_pass_color++) + (*right_pass_color++))); } } } /////////////// // Down pass { const float* src_color_hor = m_left_pass_color; // result of horizontal pass filter const unsigned char* src_color = img_src; float* down_pass_color = m_down_pass_color; float* down_pass_factor = m_down_pass_factor; const unsigned char* src_prev = src_color; const float* prev_color = down_pass_color; const float* prev_factor = down_pass_factor; // 1st line done separately because no previous line for (int x = 0; x < width; x++) { *down_pass_factor++ = 1.f; for (int c = 0; c < channel; c++) { *down_pass_color++ = *src_color_hor++; } src_color += channel; } // handle other lines for (int y = 1; y < height; y++) { for (int x = 0; x < width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels int diff = getDiffFactor(src_color, src_prev); src_prev += channel; src_color += channel; float alpha_f = range_table_f[diff]; *down_pass_factor++ = inv_alpha_f + alpha_f * (*prev_factor++); for (int c = 0; c < channel; c++) { *down_pass_color++ = inv_alpha_f * (*src_color_hor++) + alpha_f * (*prev_color++); } } } } /////////////// // Up pass { // start from end and then go up to begining int last_index = width * height * channel - 1; const unsigned char* src_color = img_src + last_index; const float* src_color_hor = m_left_pass_color + last_index; // result of horizontal pass filter float* up_pass_color = m_up_pass_color + last_index; float* up_pass_factor = m_up_pass_factor + (width * height - 1); // const unsigned char* src_prev = src_color; const float* prev_color = up_pass_color; const float* prev_factor = up_pass_factor; // 1st line done separately because no previous line for (int x = 0; x < width; x++) { *up_pass_factor-- = 1.f; for (int c = 0; c < channel; c++) { *up_pass_color-- = *src_color_hor--; } src_color -= channel; } // handle other lines for (int y = 1; y < height; y++) { for (int x = 0; x < width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels src_color -= channel; int diff = getDiffFactor(src_color, src_color + width * channel); float alpha_f = range_table_f[diff]; *up_pass_factor-- = inv_alpha_f + alpha_f * (*prev_factor--); for (int c = 0; c < channel; c++) { *up_pass_color-- = inv_alpha_f * (*src_color_hor--) + alpha_f * (*prev_color--); } } } } /////////////// // average result of vertical pass is written to output buffer { const float* down_pass_color = m_down_pass_color; const float* down_pass_factor = m_down_pass_factor; const float* up_pass_color = m_up_pass_color; const float* up_pass_factor = m_up_pass_factor; int width_height = width * height; for (int i = 0; i < width_height; i++) { // average color divided by average factor float factor = 1.f / ((*up_pass_factor++) + (*down_pass_factor++)); for (int c = 0; c < channel; c++) { *img_dst++ = (unsigned char)(factor * ((*up_pass_color++) + (*down_pass_color++))); } } } }
这份代码把算法逻辑梳理得非常清晰。
一开始我也觉得这个代码效果和速度都很不错了。
但是没有仔细看,经过朋友的提醒,说是这个算法有严重的bug,会有线条瑕疵。
把对应的参数值调大之后,的确。特别明显的条纹效应,直接毁图了。
后来我经过一段时间的思考和研究,明确了算法的问题所在。
针对这个算法的修复,已经有明确思路了。
在这里,暂时先放出我经过稍加整理了OP_RBF的完整C代码版本。
做了一些代码逻辑上的优化,降低内存的使用。
int getDiffFactor(const unsigned char* color1, const unsigned char* color2, const int & channels) { int final_diff; int component_diff[4]; // find absolute difference between each component for (int i = 0; i < channels; i++) { component_diff[i] = abs(color1[i] - color2[i]); } // based on number of components, produce a single difference value in the 0-255 range switch (channels) { case 1: final_diff = component_diff[0]; break; case 2: final_diff = ((component_diff[0] + component_diff[1]) >> 1); break; case 3: final_diff = ((component_diff[0] + component_diff[2]) >> 2) + (component_diff[1] >> 1); break; case 4: final_diff = ((component_diff[0] + component_diff[1] + component_diff[2] + component_diff[3]) >> 2); break; default: final_diff = 0; } _ASSERT(final_diff >= 0 && final_diff <= 255); return final_diff; } void CRB_HorizontalFilter(unsigned char* Input, unsigned char* Output, int Width, int Height, int Channels, float * range_table_f, float inv_alpha_f, float* left_Color_Buffer, float* left_Factor_Buffer, float* right_Color_Buffer, float* right_Factor_Buffer) { // Left pass and Right pass int Stride = Width * Channels; const unsigned char* src_left_color = Input; float* left_Color = left_Color_Buffer; float* left_Factor = left_Factor_Buffer; int last_index = Stride * Height - 1; const unsigned char* src_right_color = Input + last_index; float* right_Color = right_Color_Buffer + last_index; float* right_Factor = right_Factor_Buffer + Width * Height - 1; for (int y = 0; y < Height; y++) { const unsigned char* src_left_prev = Input; const float* left_prev_factor = left_Factor; const float* left_prev_color = left_Color; const unsigned char* src_right_prev = src_right_color; const float* right_prev_factor = right_Factor; const float* right_prev_color = right_Color; // process 1st pixel separately since it has no previous { //if x = 0 *left_Factor++ = 1.f; *right_Factor-- = 1.f; for (int c = 0; c < Channels; c++) { *left_Color++ = *src_left_color++; *right_Color-- = *src_right_color--; } } // handle other pixels for (int x = 1; x < Width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels int left_diff = getDiffFactor(src_left_color, src_left_prev, Channels); src_left_prev = src_left_color; int right_diff = getDiffFactor(src_right_color, src_right_color - Channels, Channels); src_right_prev = src_right_color; float left_alpha_f = range_table_f[left_diff]; float right_alpha_f = range_table_f[right_diff]; *left_Factor++ = inv_alpha_f + left_alpha_f * (*left_prev_factor++); *right_Factor-- = inv_alpha_f + right_alpha_f * (*right_prev_factor--); for (int c = 0; c < Channels; c++) { *left_Color++ = (inv_alpha_f * (*src_left_color++) + left_alpha_f * (*left_prev_color++)); *right_Color-- = (inv_alpha_f * (*src_right_color--) + right_alpha_f * (*right_prev_color--)); } } } // vertical pass will be applied on top on horizontal pass, while using pixel differences from original image // result color stored in 'leftcolor' and vertical pass will use it as source color { unsigned char* dst_color = Output; // use as temporary buffer const float* leftcolor = left_Color_Buffer; const float* leftfactor = left_Factor_Buffer; const float* rightcolor = right_Color_Buffer; const float* rightfactor = right_Factor_Buffer; int width_height = Width * Height; for (int i = 0; i < width_height; i++) { // average color divided by average factor float factor = 1.f / ((*leftfactor++) + (*rightfactor++)); for (int c = 0; c < Channels; c++) { *dst_color++ = (factor * ((*leftcolor++) + (*rightcolor++))); } } } } void CRB_VerticalFilter(unsigned char* Input, unsigned char* Output, int Width, int Height, int Channels, float * range_table_f, float inv_alpha_f, float* down_Color_Buffer, float* down_Factor_Buffer, float* up_Color_Buffer, float* up_Factor_Buffer) { // Down pass and Up pass int Stride = Width * Channels; const unsigned char* src_color_first_hor = Output; // result of horizontal pass filter const unsigned char* src_down_color = Input; float* down_color = down_Color_Buffer; float* down_factor = down_Factor_Buffer; const unsigned char* src_down_prev = src_down_color; const float* down_prev_color = down_color; const float* down_prev_factor = down_factor; int last_index = Stride * Height - 1; const unsigned char* src_up_color = Input + last_index; const unsigned char* src_color_last_hor = Output + last_index; // result of horizontal pass filter float* up_color = up_Color_Buffer + last_index; float* up_factor = up_Factor_Buffer + (Width * Height - 1); const float* up_prev_color = up_color; const float* up_prev_factor = up_factor; // 1st line done separately because no previous line { //if y=0 for (int x = 0; x < Width; x++) { *down_factor++ = 1.f; *up_factor-- = 1.f; for (int c = 0; c < Channels; c++) { *down_color++ = *src_color_first_hor++; *up_color-- = *src_color_last_hor--; } src_down_color += Channels; src_up_color -= Channels; } } // handle other lines for (int y = 1; y < Height; y++) { for (int x = 0; x < Width; x++) { // determine difference in pixel color between current and previous // calculation is different depending on number of channels int down_diff = getDiffFactor(src_down_color, src_down_prev, Channels); src_down_prev += Channels; src_down_color += Channels; src_up_color -= Channels; int up_diff = getDiffFactor(src_up_color, src_up_color + Stride, Channels); float down_alpha_f = range_table_f[down_diff]; float up_alpha_f = range_table_f[up_diff]; *down_factor++ = inv_alpha_f + down_alpha_f * (*down_prev_factor++); *up_factor-- = inv_alpha_f + up_alpha_f * (*up_prev_factor--); for (int c = 0; c < Channels; c++) { *down_color++ = inv_alpha_f * (*src_color_first_hor++) + down_alpha_f * (*down_prev_color++); *up_color-- = inv_alpha_f * (*src_color_last_hor--) + up_alpha_f * (*up_prev_color--); } } } // average result of vertical pass is written to output buffer { unsigned char *dst_color = Output; const float* downcolor = down_Color_Buffer; const float* downfactor = down_Factor_Buffer; const float* upcolor = up_Color_Buffer; const float* upfactor = up_Factor_Buffer; int width_height = Width * Height; for (int i = 0; i < width_height; i++) { // average color divided by average factor float factor = 1.f / ((*upfactor++) + (*downfactor++)); for (int c = 0; c < Channels; c++) { *dst_color++ = (factor * ((*upcolor++) + (*downcolor++))); } } } } // memory must be reserved before calling image filter // this implementation of filter uses plain C++, single threaded // channel count must be 3 or 4 (alpha not used) void CRBFilter(unsigned char* Input, unsigned char* Output, int Width, int Height, int Stride, float sigmaSpatial, float sigmaRange) { int Channels = Stride / Width; int reserveWidth = Width; int reserveHeight = Height; // basic sanity check _ASSERT(Input); _ASSERT(Output); _ASSERT(reserveWidth >= 10 && reserveWidth < 10000); _ASSERT(reserveHeight >= 10 && reserveHeight < 10000); _ASSERT(Channels >= 1 && Channels <= 4); int reservePixels = reserveWidth * reserveHeight; int numberOfPixels = reservePixels * Channels; float* leftColorBuffer = (float*)calloc(sizeof(float)*numberOfPixels, 1); float* leftFactorBuffer = (float*)calloc(sizeof(float)*reservePixels, 1); float* rightColorBuffer = (float*)calloc(sizeof(float)*numberOfPixels, 1); float* rightFactorBuffer = (float*)calloc(sizeof(float)*reservePixels, 1); if (leftColorBuffer == NULL || leftFactorBuffer == NULL || rightColorBuffer == NULL || rightFactorBuffer == NULL) { if (leftColorBuffer) free(leftColorBuffer); if (leftFactorBuffer) free(leftFactorBuffer); if (rightColorBuffer) free(rightColorBuffer); if (rightFactorBuffer) free(rightFactorBuffer); return; } float* downColorBuffer = leftColorBuffer; float* downFactorBuffer = leftFactorBuffer; float* upColorBuffer = rightColorBuffer; float* upFactorBuffer = rightFactorBuffer; // compute a lookup table float alpha_f = static_cast<float>(exp(-sqrt(2.0) / (sigmaSpatial * 255))); float inv_alpha_f = 1.f - alpha_f; float range_table_f[255 + 1]; float inv_sigma_range = 1.0f / (sigmaRange * 255); float ii = 0.f; for (int i = 0; i <= 255; i++, ii -= 1.f) { range_table_f[i] = alpha_f * exp(ii * inv_sigma_range); } CRB_HorizontalFilter(Input, Output, Width, Height, Channels, range_table_f, inv_alpha_f, leftColorBuffer, leftFactorBuffer, rightColorBuffer, rightFactorBuffer); CRB_VerticalFilter(Input, Output, Width, Height, Channels, range_table_f, inv_alpha_f, downColorBuffer, downFactorBuffer, upColorBuffer, upFactorBuffer); if (leftColorBuffer) { free(leftColorBuffer); leftColorBuffer = NULL; } if (leftFactorBuffer) { free(leftFactorBuffer); leftFactorBuffer = NULL; } if (rightColorBuffer) { free(rightColorBuffer); rightColorBuffer = NULL; } if (rightFactorBuffer) { free(rightFactorBuffer); rightFactorBuffer = NULL; } }
void CRBFilter(unsigned char* Input, unsigned char* Output, int Width, int Height, int Stride, float sigmaSpatial, float sigmaRange);
就这样一个简单的接口函数。
代码也比较清晰,不多做解释了。
上个效果演示图。
前后对比,注意看人物的眼袋部分,有明显的条纹效应。
好久没上来发文了,最近有好多网友,询问关于《票据OCR前预处理 (附Demo)》一些算法相关的细节。
说实话,这个代码写得有点久了,逻辑记不太清楚了。
待后面有时间,把代码梳理一下,再写篇博客分享完整逻辑代码。
俺的联系方式如下:
邮箱: gaozhihan@vip.qq.com
联系我时请说明来意,不然一律忽略,谢谢。
若此博文能帮到您,欢迎扫码小额赞助。
微信:
支付宝: