zoukankan      html  css  js  c++  java
  • opencv7-ml之KNN

    准备知识
    在文件”opencvsourcesmodulesmlsrcprecomp.hpp“中
    有cvPrepareTrainData的函数原型。

    int
    cvPrepareTrainData( const char* /*funcname*/,
                        const CvMat* train_data, int tflag,
                        const CvMat* responses, int response_type,
                        const CvMat* var_idx,
                        const CvMat* sample_idx,
                        bool always_copy_data,
                        const float*** out_train_samples,
                        int* _sample_count,
                        int* _var_count,
                        int* _var_all,
                        CvMat** out_responses,
                        CvMat** out_response_map,
                        CvMat** out_var_idx,
                        CvMat** out_sample_idx=0 )

    从函数原型的参数可以看出主要为:
    const char* /funcname/: 函数的名称
    const CvMat* train_data, int tflag,: 训练集、训练集样本的布局
    const CvMat* responses, int response_type,:训练集标签、对应数据类型
    const CvMat* var_idx,: 用了哪些特征
    const CvMat* sample_idx,: 用了哪些样本
    bool always_copy_data,: 是否复制数据集
    const float*** out_train_samples,: 输出处理过的的训练集
    int* _sample_count, : 样本的总数
    int* _var_count,: 特征的总数
    int* _var_all,
    CvMat** out_responses,: 输出训练集标签
    CvMat** out_response_map,
    CvMat** out_var_idx, : 输出用了哪些特征
    CvMat** out_sample_idx=0 : 默认输出为使用了所有的样本

    在文件”opencvsourcesmodulesmlsrcinner_functions.cpp“中有cvPrepareTrainData的函数实现:

    int
    cvPrepareTrainData( const char* /*funcname*/,
                        const CvMat* train_data, int tflag,
                        const CvMat* responses, int response_type,
                        const CvMat* var_idx,
                        const CvMat* sample_idx,
                        bool always_copy_data,
                        const float*** out_train_samples,
                        int* _sample_count,
                        int* _var_count,
                        int* _var_all,
                        CvMat** out_responses,
                        CvMat** out_response_map,
                        CvMat** out_var_idx,
                        CvMat** out_sample_idx )
    {
        int ok = 0;//用于标记该函数是否成功执行
        CvMat* _var_idx = 0;//默认使用所有的特征
        CvMat* _sample_idx = 0;//默认使用所有的样本
        CvMat* _responses = 0;
        int sample_all = 0, sample_count = 0, var_all = 0, var_count = 0;
    
        CV_FUNCNAME( "cvPrepareTrainData" );
    
        // step 0. clear all the output pointers to ensure we do not try
        // to call free() with uninitialized pointers
        //第0步,先释放所有输出的指针以确保不会有未初始化的指针。
        if( out_responses )
            *out_responses = 0;
    
        if( out_response_map )
            *out_response_map = 0;
    
        if( out_var_idx )
            *out_var_idx = 0;
    
        if( out_sample_idx )
            *out_sample_idx = 0;
    
        if( out_train_samples )
            *out_train_samples = 0;
    
        if( _sample_count )
            *_sample_count = 0;
    
        if( _var_count )
            *_var_count = 0;
    
        if( _var_all )
            *_var_all = 0;
    //重置完成
        __BEGIN__;
    
        if( !out_train_samples )
            CV_ERROR( CV_StsBadArg, "output pointer to train samples is NULL" );
    
        CV_CALL( cvCheckTrainData( train_data, tflag, 0, &var_all, &sample_all ));
    
        if( sample_idx )
            CV_CALL( _sample_idx = cvPreprocessIndexArray( sample_idx, sample_all ));
        if( var_idx )
            CV_CALL( _var_idx = cvPreprocessIndexArray( var_idx, var_all ));
    
        if( responses )
        {
            if( !out_responses )
                CV_ERROR( CV_StsNullPtr, "output response pointer is NULL" );
    
            if( response_type == CV_VAR_NUMERICAL )
            {
                CV_CALL( _responses = cvPreprocessOrderedResponses( responses,
                                                    _sample_idx, sample_all ));
            }
            else
            {
                CV_CALL( _responses = cvPreprocessCategoricalResponses( responses,
                                    _sample_idx, sample_all, out_response_map, 0 ));
            }
        }
    
        CV_CALL( *out_train_samples =
                    cvGetTrainSamples( train_data, tflag, _var_idx, _sample_idx,
                                       &var_count, &sample_count, always_copy_data ));
    
        ok = 1;
    
        __END__;
    //如果上面的操作都结束了,那么检测对应的输出需要的指针,是否已经初始化,然后接着对各自的输出指针指向的对象进行置0初始化
        if( ok )
        {
            if( out_responses )
                *out_responses = _responses, _responses = 0;
    
            if( out_var_idx )
                *out_var_idx = _var_idx, _var_idx = 0;
    
            if( out_sample_idx )
                *out_sample_idx = _sample_idx, _sample_idx = 0;
    
            if( _sample_count )
                *_sample_count = sample_count;
    
            if( _var_count )
                *_var_count = var_count;
    
            if( _var_all )
                *_var_all = var_all;
        }
        else
        {
            if( out_response_map )
                cvReleaseMat( out_response_map );
            cvFree( out_train_samples );
        }
    
        if( _responses != responses )
            cvReleaseMat( &_responses );
        cvReleaseMat( &_var_idx );
        cvReleaseMat( &_sample_idx );
    
        return ok;//返回该函数是否成功执行
    }
    

    首先在文件“opencvsourcesmodulesmlincludeopencv2mlml.hpp”中有:

    #define CV_TYPE_NAME_ML_KNN     "opencv-ml-knn"
    struct CvVectors
    {
        int type;
        int dims, count;
        CvVectors* next;
        union
        {
            uchar** ptr;
            float** fl;
            double** db;
        } data;
    };
    
    /*********K-Nearest Neighbour Classifier **********/
    
    class CV_EXPORTS_W CvKNearest : public CvStatModel
    {
    public:
    
        CV_WRAP CvKNearest();//默认构造函数
        virtual ~CvKNearest();//虚析构函数
    //2个重载的构造函数,参数中一个是CvMat,一个是Mat
    //trainData:训练集
    //responses:训练集的目标值
    //sampleIdx:用来指定使用哪些训练样本,0表示使用所有的训练样本
    //isRegression:trueb表示knn作为回归,否则为分类器
    //max_k:K的上限
        CvKNearest( const CvMat* trainData, const CvMat* responses,
                    const CvMat* sampleIdx=0, bool isRegression=false, int max_k=32 );
    
        CV_WRAP CvKNearest( const cv::Mat& trainData, const cv::Mat& responses,
                   const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false, int max_k=32 );
    //训练模型,两个重载函数,一个使用的是CvMat;一个使用的是Mat;Mat的会调用CvMat的train实现训练功能。
    //updateBase:用于指定该模型是从头开始训练?(update_base=false),还是使用新的训练数据来进行更新
    //(update_base=true).在后者中,参数maxK不能大于原始的值
             virtual bool train( const CvMat* trainData, const CvMat* responses,
                            const CvMat* sampleIdx=0, bool is_regression=false,
                            int maxK=32, bool updateBase=false );
    
        CV_WRAP virtual bool train( const cv::Mat& trainData, const cv::Mat& responses,
                           const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
                           int maxK=32, bool updateBase=false );
    //寻找邻居 然后预测输入向量的目标:3个重载函数
       virtual float find_nearest( const CvMat* samples, int k, CV_OUT CvMat* results=0,
            const float** neighbors=0, CV_OUT CvMat* neighborResponses=0, CV_OUT CvMat* dist=0 ) const;
    
        virtual float find_nearest( const cv::Mat& samples, int k, cv::Mat* results=0,
                                    const float** neighbors=0, cv::Mat* neighborResponses=0,
                                    cv::Mat* dist=0 ) const;
    
        CV_WRAP virtual float find_nearest( const cv::Mat& samples, int k, CV_OUT cv::Mat& results,
                                            CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const;
    //
            virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
            float* neighbor_responses, const float** neighbors, float* dist ) const;
    
        virtual void clear();
        int get_max_k() const;//返回近邻中最大值的个数
        int get_var_count() const;//返回特征的维度(变量的个数)
        int get_sample_count() const;//返回训练样本的个数
        bool is_regression() const;//返回的true为回归;false为分类
    //将结果记录下来
        virtual float write_results( int k, int k1, int start, int end,
            const float* neighbor_responses, const float* dist, CvMat* _results,
            CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const;
    
    
    protected:
    
        int max_k, var_count;//最大的k值;特征维度
        int total;//
        bool regression;//是否是回归
        CvVectors* samples;//样本向量
    };
    
    typedef CvKNearest KNearest;
    

    对于该文件的实现和上面的类定义一样,为了兼容opencv1.0版本,所以之前的CvMat都是1.0的形式;而在后面Mat的都是opencv2.0的形式。

    下面是文件“opencvsourcesmodulesmlsrcknearest.cpp”中实现部分:

    #include "precomp.hpp"
    
    /********K-Nearest Neighbors Classifier ***********/
    //1、opencv1.0形式
    // k Nearest Neighbors
    //默认构造函数
    CvKNearest::CvKNearest()
    {
        samples = 0;//指针初始化
        clear();
    }
    
    //析构函数
    CvKNearest::~CvKNearest()
    {
        clear();//调用清除函数,该函数在下面
    }
    
    //构造函数
    //_train_data:训练集合,行为样本,列为特征
    // _responses:训练集合对应的类别目标
    
    CvKNearest::CvKNearest( const CvMat* _train_data, const CvMat* _responses,
                            const CvMat* _sample_idx, bool _is_regression, int _max_k )
    {
        samples = 0;//指针赋值为0
        //调用类成员函数train来训练模型
        train( _train_data, _responses, _sample_idx, _is_regression, _max_k, false );
    }
    
    
    void CvKNearest::clear()
    {
    //类似释放链表一样释放所有的样本
        while( samples )
        {
            CvVectors* next_samples = samples->next;
            cvFree( &samples->data.fl );
            cvFree( &samples );
            samples = next_samples;
        }
        //将对应的类成员置0
        var_count = 0;
        total = 0;
        max_k = 0;
    }
    
    //返回类成员max_k,表示K的最大上限
    int CvKNearest::get_max_k() const { return max_k; }
    //返回样本的特征维度
    int CvKNearest::get_var_count() const { return var_count; }
    //返回当前knn作为分类器,还是用来做回归
    bool CvKNearest::is_regression() const { return regression; }
    //返回训练集的样本总数
    int CvKNearest::get_sample_count() const { return total; }
    
    //模型训练函数。主要是对训练数据进行排序,然后采用合适的数据结构来存储数据集。
    bool CvKNearest::train( const CvMat* _train_data, const CvMat* _responses,
                            const CvMat* _sample_idx, bool _is_regression,
                            int _max_k, bool _update_base )
    {
        bool ok = false;//用来标记该train函数是否成功执行
        CvMat* responses = 0;
    
        CV_FUNCNAME( "CvKNearest::train" );
    
        __BEGIN__;
    
        CvVectors* _samples = 0;
        float** _data = 0;
        int _count = 0, _dims = 0, _dims_all = 0, _rsize = 0;
    
        if( !_update_base )
            clear();
    
        // Prepare training data and related parameters.
        //准备好数据和对应的参数
        // Treat categorical responses as ordered - to prevent class label compression and
        // to enable entering new classes in the updates
      //让类别的目标有序,这样能够防止类别标签被压缩并且能够可以在更新中加入新的类 。该工作通过调用cvPrepareTrainData函数来完成 
        CV_CALL( cvPrepareTrainData( "CvKNearest::train", _train_data, CV_ROW_SAMPLE,
            _responses, CV_VAR_ORDERED, 0, _sample_idx, true, (const float***)&_data,
            &_count, &_dims, &_dims_all, &responses, 0, 0 ));
    
        if( !responses )
            CV_ERROR( CV_StsNoMem, "Could not allocate memory for responses" );
    
        if( _update_base && _dims != var_count )
            CV_ERROR( CV_StsBadArg, "The newly added data have different dimensionality" );
    
        if( !_update_base )
        {
        //如果用户设定的k小于1,那么报错
            if( _max_k < 1 )
                CV_ERROR( CV_StsOutOfRange, "max_k must be a positive number" );
    
            regression = _is_regression;//是否用作回归
            var_count = _dims;//特征的维度
            max_k = _max_k;//k的上限
        }
    
        _rsize = _count*sizeof(float);
    
        _samples->next = samples;
        _samples->type = CV_32F;
        _samples->data.fl = _data;
        _samples->count = _count;
        total += _count;
    
        samples = _samples;
        memcpy( _samples + 1, responses->data.fl, _rsize );
    
        ok = true;
    
        __END__;
    
        if( responses && responses->data.ptr != _responses->data.ptr )
            cvReleaseMat(&responses);
    
        return ok;
    }
    
    
    
    void CvKNearest::find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
                        float* neighbor_responses, const float** neighbors, float* dist ) const
    {
        int i, j, count = end - start, k1 = 0, k2 = 0, d = var_count;
        CvVectors* s = samples;
    
        for( ; s != 0; s = s->next )
        {
            int n = s->count;
            for( j = 0; j < n; j++ )
            {
                for( i = 0; i < count; i++ )
                {
                    double sum = 0;
                    Cv32suf si;
                    const float* v = s->data.fl[j];
                    const float* u = (float*)(_samples->data.ptr + _samples->step*(start + i));
                    Cv32suf* dd = (Cv32suf*)(dist + i*k);
                    float* nr;
                    const float** nn;
                    int t, ii, ii1;
    
                    for( t = 0; t <= d - 4; t += 4 )
                    {
                        double t0 = u[t] - v[t], t1 = u[t+1] - v[t+1];
                        double t2 = u[t+2] - v[t+2], t3 = u[t+3] - v[t+3];
                        sum += t0*t0 + t1*t1 + t2*t2 + t3*t3;
                    }
    
                    for( ; t < d; t++ )
                    {
                        double t0 = u[t] - v[t];
                        sum += t0*t0;
                    }
    
                    si.f = (float)sum;
                    for( ii = k1-1; ii >= 0; ii-- )
                        if( si.i > dd[ii].i )
                            break;
                    if( ii >= k-1 )
                        continue;
    
                    nr = neighbor_responses + i*k;
                    nn = neighbors ? neighbors + (start + i)*k : 0;
                    for( ii1 = k2 - 1; ii1 > ii; ii1-- )
                    {
                        dd[ii1+1].i = dd[ii1].i;
                        nr[ii1+1] = nr[ii1];
                        if( nn ) nn[ii1+1] = nn[ii1];
                    }
                    dd[ii+1].i = si.i;
                    nr[ii+1] = ((float*)(s + 1))[j];
                    if( nn )
                        nn[ii+1] = v;
                }
                k1 = MIN( k1+1, k );
                k2 = MIN( k1, k-1 );
            }
        }
    }
    
    
    float CvKNearest::write_results( int k, int k1, int start, int end,
        const float* neighbor_responses, const float* dist,
        CvMat* _results, CvMat* _neighbor_responses,
        CvMat* _dist, Cv32suf* sort_buf ) const
    {
        float result = 0.f;
        int i, j, j1, count = end - start;
        double inv_scale = 1./k1;
        int rstep = _results && !CV_IS_MAT_CONT(_results->type) ? _results->step/sizeof(result) : 1;
    
        for( i = 0; i < count; i++ )
        {
            const Cv32suf* nr = (const Cv32suf*)(neighbor_responses + i*k);
            float* dst;
            float r;
            if( _results || start+i == 0 )
            {
                if( regression )
                {
                    double s = 0;
                    for( j = 0; j < k1; j++ )
                        s += nr[j].f;
                    r = (float)(s*inv_scale);
                }
                else
                {
                    int prev_start = 0, best_count = 0, cur_count;
                    Cv32suf best_val;
    
                    for( j = 0; j < k1; j++ )
                        sort_buf[j].i = nr[j].i;
    
                    for( j = k1-1; j > 0; j-- )
                    {
                        bool swap_fl = false;
                        for( j1 = 0; j1 < j; j1++ )
                            if( sort_buf[j1].i > sort_buf[j1+1].i )
                            {
                                int t;
                                CV_SWAP( sort_buf[j1].i, sort_buf[j1+1].i, t );
                                swap_fl = true;
                            }
                        if( !swap_fl )
                            break;
                    }
    
                    best_val.i = 0;
                    for( j = 1; j <= k1; j++ )
                        if( j == k1 || sort_buf[j].i != sort_buf[j-1].i )
                        {
                            cur_count = j - prev_start;
                            if( best_count < cur_count )
                            {
                                best_count = cur_count;
                                best_val.i = sort_buf[j-1].i;
                            }
                            prev_start = j;
                        }
                    r = best_val.f;
                }
    
                if( start+i == 0 )
                    result = r;
    
                if( _results )
                    _results->data.fl[(start + i)*rstep] = r;
            }
    
            if( _neighbor_responses )
            {
                dst = (float*)(_neighbor_responses->data.ptr +
                    (start + i)*_neighbor_responses->step);
                for( j = 0; j < k1; j++ )
                    dst[j] = nr[j].f;
                for( ; j < k; j++ )
                    dst[j] = 0.f;
            }
    
            if( _dist )
            {
                dst = (float*)(_dist->data.ptr + (start + i)*_dist->step);
                for( j = 0; j < k1; j++ )
                    dst[j] = dist[j + i*k];
                for( ; j < k; j++ )
                    dst[j] = 0.f;
            }
        }
    
        return result;
    }
    
    struct P1 : cv::ParallelLoopBody {
      P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
         int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
      {
        pointer = _pointer;
        k = _k;
        _samples = __samples;
        _neighbors = __neighbors;
        k1 = _k1;
        _results = __results;
        _neighbor_responses = __neighbor_responses;
        _dist = __dist;
        result = _result;
        buf_sz = _buf_sz;
      }
    
      const CvKNearest* pointer;
      int k;
      const CvMat* _samples;
      const float** _neighbors;
      int k1;
      CvMat* _results;
      CvMat* _neighbor_responses;
      CvMat* _dist;
      float* result;
      int buf_sz;
    
      void operator()( const cv::Range& range ) const
      {
        cv::AutoBuffer<float> buf(buf_sz);
        for(int i = range.start; i < range.end; i += 1 )
        {
            float* neighbor_responses = &buf[0];
            float* dist = neighbor_responses + 1*k;
            Cv32suf* sort_buf = (Cv32suf*)(dist + 1*k);
    
            pointer->find_neighbors_direct( _samples, k, i, i + 1,
                        neighbor_responses, _neighbors, dist );
    
            float r = pointer->write_results( k, k1, i, i + 1, neighbor_responses, dist,
                                     _results, _neighbor_responses, _dist, sort_buf );
    
            if( i == 0 )
                *result = r;
        }
      }
    
    };
    //对每个输入向量(表示为_sample矩阵的每一行),该方法找到k(k≤get_max_k() )个最近邻。
    //在回归中,预测结果将是响应的近邻向量的均值。在分类中,通过投票决定类别结果。
    
    //对传统分类和回归预测来说,该方法可以有选择的返回近邻向量本身的指针
    //(neighbors, array of k*_samples->rows pointers),
    //它们相对应的输出值(neighbor_responses, a vector of k*_samples->rows elements) ,
    //和输入向量与近邻之间的距离(dist, also a vector of k*_samples->rows elements)。
    
    //对每个输入向量来说,近邻将按照它们到该向量的距离排序。
    
    //对单个输入向量,所有的输出矩阵是可选的,而且预测值将由该方法返回。
    float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
        const float** _neighbors, CvMat* _neighbor_responses, CvMat* _dist ) const
    {
        float result = 0.f;
        const int max_blk_count = 128, max_buf_sz = 1 << 12;
    
        if( !samples )
            CV_Error( CV_StsError, "The search tree must be constructed first using train method" );
    
        if( !CV_IS_MAT(_samples) ||
            CV_MAT_TYPE(_samples->type) != CV_32FC1 ||
            _samples->cols != var_count )
            CV_Error( CV_StsBadArg, "Input samples must be floating-point matrix (<num_samples>x<var_count>)" );
    
        if( _results && (!CV_IS_MAT(_results) ||
            (_results->cols != 1 && _results->rows != 1) ||
            _results->cols + _results->rows - 1 != _samples->rows) )
            CV_Error( CV_StsBadArg,
            "The results must be 1d vector containing as much elements as the number of samples" );
    
        if( _results && CV_MAT_TYPE(_results->type) != CV_32FC1 &&
            (CV_MAT_TYPE(_results->type) != CV_32SC1 || regression))
            CV_Error( CV_StsUnsupportedFormat,
            "The results must be floating-point or integer (in case of classification) vector" );
    
        if( k < 1 || k > max_k )
            CV_Error( CV_StsOutOfRange, "k must be within 1..max_k range" );
    
        if( _neighbor_responses )
        {
            if( !CV_IS_MAT(_neighbor_responses) || CV_MAT_TYPE(_neighbor_responses->type) != CV_32FC1 ||
                _neighbor_responses->rows != _samples->rows || _neighbor_responses->cols != k )
                CV_Error( CV_StsBadArg,
                "The neighbor responses (if present) must be floating-point matrix of <num_samples> x <k> size" );
        }
    
        if( _dist )
        {
            if( !CV_IS_MAT(_dist) || CV_MAT_TYPE(_dist->type) != CV_32FC1 ||
                _dist->rows != _samples->rows || _dist->cols != k )
                CV_Error( CV_StsBadArg,
                "The distances from the neighbors (if present) must be floating-point matrix of <num_samples> x <k> size" );
        }
    
        int count = _samples->rows;
        int count_scale = k*2;
        int blk_count0 = MIN( count, max_blk_count );
        int buf_sz = MIN( blk_count0 * count_scale, max_buf_sz );
        blk_count0 = MAX( buf_sz/count_scale, 1 );
        blk_count0 += blk_count0 % 2;
        blk_count0 = MIN( blk_count0, count );
        buf_sz = blk_count0 * count_scale + k;
        int k1 = get_sample_count();
        k1 = MIN( k1, k );
    
        cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
                                                 _results, _neighbor_responses, _dist, &result)
        );
    
        return result;
    }
    
    /////////////////////////////////////////////////////
    //2、下面是采用opencv2.0的形式写的
    using namespace cv;
    //构造函数,参数为:训练数据;训练数据的目标值
    //_train_data:行表示样本,列表示维度
    CvKNearest::CvKNearest( const Mat& _train_data, const Mat& _responses,
                           const Mat& _sample_idx, bool _is_regression, int _max_k )
    {
        samples = 0;
        //调用对应cpp版本的训练函数
        train(_train_data, _responses, _sample_idx, _is_regression, _max_k, false );
    }
    //训练模型
    bool CvKNearest::train( const Mat& _train_data, const Mat& _responses,
                            const Mat& _sample_idx, bool _is_regression,
                            int _max_k, bool _update_base )
    {
    //将训练数据、训练数据的目标值、提取出来。然后调用opencv1。0版本的训练函数来训练。
        CvMat tdata = _train_data, responses = _responses, sidx = _sample_idx;
    
        return train(&tdata, &responses, sidx.data.ptr ? &sidx : 0, _is_regression, _max_k, _update_base );
    }
    
    //寻找最近邻
    float CvKNearest::find_nearest( const Mat& _samples, int k, Mat* _results,
                                    const float** _neighbors, Mat* _neighbor_responses,
                                    Mat* _dist ) const
    {
        CvMat s = _samples, results, *presults = 0, nresponses, *pnresponses = 0, dist, *pdist = 0;
    
        if( _results )
        {
            if(!(_results->data && (_results->type() == CV_32F ||
                (_results->type() == CV_32S && regression)) &&
                 (_results->cols == 1 || _results->rows == 1) &&
                 _results->cols + _results->rows - 1 == _samples.rows) )
                _results->create(_samples.rows, 1, CV_32F);
            presults = &(results = *_results);
        }
    
        if( _neighbor_responses )
        {
            if(!(_neighbor_responses->data && _neighbor_responses->type() == CV_32F &&
                 _neighbor_responses->cols == k && _neighbor_responses->rows == _samples.rows) )
                _neighbor_responses->create(_samples.rows, k, CV_32F);
            pnresponses = &(nresponses = *_neighbor_responses);
        }
    
        if( _dist )
        {
            if(!(_dist->data && _dist->type() == CV_32F &&
                 _dist->cols == k && _dist->rows == _samples.rows) )
                _dist->create(_samples.rows, k, CV_32F);
            pdist = &(dist = *_dist);
        }
    //调用另一个寻找近邻的函数完成剩下的工作(即之前opencv1.0版本的那个函数)
        return find_nearest(&s, k, presults, _neighbors, pnresponses, pdist );
    }
    
    //寻找最近邻,该方法是传递引用
    float CvKNearest::find_nearest( const cv::Mat& _samples, int k, CV_OUT cv::Mat& results,
                                    CV_OUT cv::Mat& neighborResponses, CV_OUT cv::Mat& dists) const
    {
    //调用另一个寻找最近邻的函数实现(即上面那个函数)
        return find_nearest(_samples, k, &results, 0, &neighborResponses, &dists);
    }
    
    /* End of file */
    

    例子:

    #include "ml.h"
    #include "highgui.h"
    int main( int argc, char** argv ){
      const int K = 10;
      int i, j, k, accuracy;
      float response;
      int train_sample_count = 100;//100个训练样本
      CvRNG rng_state = cvRNG(-1);//随机数状态
      //建立训练集中的数据和类别标签
      CvMat* trainData = cvCreateMat( train_sample_count,
                                      2, CV_32FC1 );
      CvMat* trainClasses = cvCreateMat( train_sample_count,
                                         1, CV_32FC1 );
        //创建一张画布用来呈现结果                                 
      IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 );
    
      float _sample[2];
    
      CvMat sample = cvMat( 1, 2, CV_32FC1, _sample );
    
      cvZero( img );//对画布进行清零操作
    
      CvMat trainData1, trainData2, trainClasses1, trainClasses2;
    
    // form the training samples
    //将trainData中上部分提取出来,然后填充随机数
      cvGetRows( trainData, &trainData1, 0,   train_sample_count/2 );  
      cvRandArr( &rng_state, &trainData1, CV_RAND_NORMAL,    cvScalar(200,200), cvScalar(50,50) );
    
      //将trainData下部分提取出来,填充随机数
      cvGetRows(trainData, &trainData2, train_sample_count / 2, train_sample_count);
      cvRandArr( &rng_state, &trainData2, CV_RAND_NORMAL,   cvScalar(300,300), cvScalar(50,50) );
    
    //对于trainClasses的上半部分置为1,表示前50个都是第1类
      cvGetRows( trainClasses, &trainClasses1, 0, train_sample_count/2 );
    cvSet( &trainClasses1, cvScalar(1) );
    
    //对于trainClasses的下半部分置为2,表示后50个都是第2类
      cvGetRows( trainClasses, &trainClasses2, train_sample_count/2, train_sample_count ); 
      cvSet( &trainClasses2, cvScalar(2) );
    
    //训练knn用作分类
      CvKNearest knn( trainData, trainClasses, 0, false, K );
    //建立一个K矩阵,用来存储K个最近邻
       CvMat* nearests = cvCreateMat( 1, K, CV_32FC1);
     //将画布上每个点都进行分类 
      for( i = 0; i < img->height; i++ ){
        for( j = 0; j < img->width; j++ ){
          sample.data.fl[0] = (float)j;
          sample.data.fl[1] = (float)i;
    // estimate the response and get the neighbors’ labels
    //该函数返回该点的类别
          response = knn.find_nearest(&sample,K,0,0,nearests,0);
    // compute the number of neighbors representing the majority
    //计算这K个近邻中有多少是支持第response类的。即准确度
          for( k = 0, accuracy = 0; k < K; k++ ){
             if( nearests->data.fl[k] == response)
               accuracy++;
          }
    // highlight the pixel depending on the accuracy (or confidence)
    //按照不同的类别赋值不同的颜色,并根据准确度的多少赋值颜色的混合区域,即
    //最后的结果的图中中间层就是有争议的部分。
      cvSet2D( img, i, j, response == 1 ?
    (accuracy > 5 ? CV_RGB(180,0,0) : CV_RGB(180,120,0)) :
    (accuracy > 5 ? CV_RGB(0,180,0) : CV_RGB(120,120,0)) );
        }
      }
    // display the original training samples
    //显示原始的100训练样本,用小点标记出来,以区分画布中的其他点
      for( i = 0; i < train_sample_count/2; i++ ){
        CvPoint pt;
        pt.x = cvRound(trainData1.data.fl[i*2]);
        pt.y = cvRound(trainData1.data.fl[i*2+1]);
        cvCircle( img, pt, 2, CV_RGB(255,0,0), CV_FILLED );
        pt.x = cvRound(trainData2.data.fl[i*2]);
        pt.y = cvRound(trainData2.data.fl[i*2+1]);
        cvCircle( img, pt, 2, CV_RGB(0,255,0), CV_FILLED );
      }
      //将结果呈现出来
      cvNamedWindow( "classifier result", 1 );
      cvShowImage( "classifier result", img );
      cvWaitKey(0);
      cvReleaseMat( &trainClasses );
      cvReleaseMat( &trainData );
      return 0;
    }
    

    上面例子的结果:

    对应的使用了opencv2.0的形式写了与上面类似的代码:

    #include "opencv2mlml.hpp"
    #include "opencv2highguihighgui.hpp"
    #include<iostream>
    using namespace std;
    using namespace cv;
    int main(int argc, char** argv){
        const int K = 10;
        int  accuracy=0;
        float response;
        int train_sample_count = 100;
        RNG rng;
        Mat trainData = Mat::zeros(100,2,CV_32FC1);
        Mat trainClasses = Mat::zeros(100, 1, CV_32FC1);
    
        Mat Image(Size2i(500, 500), CV_8UC3);
        Image.setTo(0);
        Mat sample(1, 2, CV_32FC1);
    
        Mat trainData1 = trainData(Range(0, train_sample_count / 2), Range::all());
        Mat trainData2 = trainData(Range(train_sample_count / 2, train_sample_count), Range::all());
        rng.fill(trainData1, RNG::UNIFORM, Scalar(50, 50), Scalar(200, 200));
        rng.fill(trainData2, RNG::UNIFORM, Scalar(200, 200), Scalar(300, 300));
    
        Mat trainClasses1 = trainClasses(Range(0, train_sample_count / 2),Range::all());
        Mat trainClasses2 = trainClasses(Range(train_sample_count / 2, train_sample_count), Range::all());
        trainClasses1.setTo(1);
        trainClasses2.setTo(2);
    
        KNearest Knn(trainData, trainClasses, Mat(), false, K);
        Mat nearests(1, K, CV_32FC1);
    
        Mat_<Vec3b> _I = Image;
        for (size_t i = 0; i < Image.rows; ++i){
            for (size_t j = 0; j < Image.cols; ++j) {
                sample.at<float>(0,0) = static_cast<float>(i);
                sample.at<float>(0,1) = static_cast<float>(j);
    
                response = Knn.find_nearest(sample, K);
    
                for (int k = 0, accuracy = 0; k < K; k++){
                        if (nearests.data[k] == response)
                                    accuracy++;
                }
    
                response == 1 ?
                    (accuracy > 5 ? (_I(i, j)[0] = 180,  _I(i, j)[1] = 0,    _I(i, j)[2]=0):
                                    (_I(i, j)[0] = 180,  _I(i, j)[1] = 120,  _I(i, j)[2] = 0 )) :
                    (accuracy > 5 ?  (_I(i, j)[0] = 0,   _I(i, j)[1] = 180,  _I(i, j)[2] = 0 ):
                                     (_I(i, j)[0] = 120, _I(i, j)[1] = 120,  _I(i, j)[2] = 0));
            }
        }
        Image = _I;
    
    
        for (int i = 0; i < train_sample_count / 2; i++){
            Point pt;
    
            pt.x = static_cast<int>(trainData1.at<float>(i, 0));
            pt.y = static_cast<int>(trainData1.at<float>(i, 1));
            circle(Image, pt, 1, Scalar(0, 0, 255),-1,8);
    
            pt.x = static_cast<int>(trainData2.at<float>(i, 0));
            pt.y = static_cast<int>(trainData2.at<float>(i, 1));
            circle(Image, pt, 1, Scalar(255, 0, 0), -1, 8);
        }
    
        namedWindow("result");
        imshow("result", Image);
        waitKey(0);
        cin.get();
        return 0;
    }
    

    生成的结果为:
    这里写图片描述

    上面的源码部分还未完全注释,待后续接着注释。

    2015年09月27日,第0次修改!

  • 相关阅读:
    Java中IO流的总结
    Java常用集合体系以及相互区别
    TreeMap集合特点、排序原理
    HashMap集合
    TreeSet集合
    redis 数据类型详解 以及 redis适用场景场合
    You need tcl 8.5 or newer in order to run the Redis test
    PHP 获取二维数组中某个key的集合
    Linux 定时任务
    phpmailer邮件类
  • 原文地址:https://www.cnblogs.com/shouhuxianjian/p/7375457.html
Copyright © 2011-2022 走看看