zoukankan      html  css  js  c++  java
  • Correlation Filter in Visual Tracking

         涉及两篇论文:Visual Object Tracking using Adaptive Correlation Filters 和Fast Visual Tracking via Dense Spatio-Temporal Context Learning

         可参考这位博主笔记:http://www.cnblogs.com/hanhuili/p/4266990.html

         第一篇我说下自己的理解:训练时的输出都认为是高斯形状,因为这种形状符合PSR。

                                  训练得到模板后开始跟踪,由输出继续按照新的规则更校模板,进行跟踪。

          第二篇主要用到了上下文的信息,通过背景信息来确定目标的位置。可参考这篇博文:http://blog.csdn.net/zouxy09/article/details/16889905,博主还将其用C++实现了,很有启发性。

            STCTracker.h

    // Fast object tracking algorithm  
    // Author : zouxy  
    // Date   : 2013-11-21  
    // HomePage : http://blog.csdn.net/zouxy09  
    // Email  : zouxy09@qq.com  
    // Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  
    // HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  
    // Email: zhkhua@gmail.com   
    #pragma once  
      
    #include <opencv2/opencv.hpp>  
      
    using namespace cv;  
    using namespace std;  
      
    class STCTracker  
    {  
    public:  
        STCTracker();  
        ~STCTracker();  
        void init(const Mat frame, const Rect box);   
        void tracking(const Mat frame, Rect &trackBox);  
      
    private:  
        void createHammingWin();  
        void complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag = 0);  
        void getCxtPriorPosteriorModel(const Mat image);  
        void learnSTCModel(const Mat image);  
      
    private:  
        double sigma;           // scale parameter (variance)  
        double alpha;           // scale parameter  
        double beta;            // shape parameter  
        double rho;             // learning parameter  
        Point center;           // the object position  
        Rect cxtRegion;         // context region  
          
        Mat cxtPriorPro;        // prior probability  
        Mat cxtPosteriorPro;    // posterior probability  
        Mat STModel;            // conditional probability  
        Mat STCModel;           // spatio-temporal context model  
        Mat hammingWin;         // Hamming window  
    };  
    View Code

           STCTracker.cpp

    // Fast object tracking algorithm  
    // Author : zouxy  
    // Date   : 2013-11-21  
    // HomePage : http://blog.csdn.net/zouxy09  
    // Email  : zouxy09@qq.com  
    // Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  
    // HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  
    // Email: zhkhua@gmail.com   
      
    #include "STCTracker.h"  
      
    STCTracker::STCTracker()  
    {  
          
    }  
      
    STCTracker::~STCTracker()  
    {  
      
    }  
      
    /************ Create a Hamming window ********************/  
    void STCTracker::createHammingWin()  
    {  
        for (int i = 0; i < hammingWin.rows; i++)  
        {  
            for (int j = 0; j < hammingWin.cols; j++)  
            {  
                hammingWin.at<double>(i, j) = (0.54 - 0.46 * cos( 2 * CV_PI * i / hammingWin.rows ))   
                                            * (0.54 - 0.46 * cos( 2 * CV_PI * j / hammingWin.cols ));  
            }  
        }  
    }  
      
    /************ Define two complex-value operation *****************/  
    void STCTracker::complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag)  
    {  
        CV_Assert(src1.size == src2.size);  
        CV_Assert(src1.channels() == 2);  
      
        Mat A_Real, A_Imag, B_Real, B_Imag, R_Real, R_Imag;  
        vector<Mat> planes;  
        split(src1, planes);  
        planes[0].copyTo(A_Real);  
        planes[1].copyTo(A_Imag);  
          
        split(src2, planes);  
        planes[0].copyTo(B_Real);  
        planes[1].copyTo(B_Imag);  
          
        dst.create(src1.rows, src1.cols, CV_64FC2);  
        split(dst, planes);  
        R_Real = planes[0];  
        R_Imag = planes[1];  
          
        for (int i = 0; i < A_Real.rows; i++)  
        {  
            for (int j = 0; j < A_Real.cols; j++)  
            {  
                double a = A_Real.at<double>(i, j);  
                double b = A_Imag.at<double>(i, j);  
                double c = B_Real.at<double>(i, j);  
                double d = B_Imag.at<double>(i, j);  
      
                if (flag)  
                {  
                    // division: (a+bj) / (c+dj)  
                    R_Real.at<double>(i, j) = (a * c + b * d) / (c * c + d * d + 0.000001);  
                    R_Imag.at<double>(i, j) = (b * c - a * d) / (c * c + d * d + 0.000001);  
                }  
                else  
                {  
                    // multiplication: (a+bj) * (c+dj)  
                    R_Real.at<double>(i, j) = a * c - b * d;  
                    R_Imag.at<double>(i, j) = b * c + a * d;  
                }  
            }  
        }  
        merge(planes, dst);  
    }  
      
    /************ Get context prior and posterior probability ***********/  
    void STCTracker::getCxtPriorPosteriorModel(const Mat image)  
    {  
        CV_Assert(image.size == cxtPriorPro.size);  
      
        double sum_prior(0), sum_post(0);  
        for (int i = 0; i < cxtRegion.height; i++)  
        {  
            for (int j = 0; j < cxtRegion.width; j++)  
            {  
                double x = j + cxtRegion.x;  
                double y = i + cxtRegion.y;  
                double dist = sqrt((center.x - x) * (center.x - x) + (center.y - y) * (center.y - y));  
      
                // equation (5) in the paper  
                cxtPriorPro.at<double>(i, j) = exp(- dist * dist / (2 * sigma * sigma));  
                sum_prior += cxtPriorPro.at<double>(i, j);  
      
                // equation (6) in the paper  
                cxtPosteriorPro.at<double>(i, j) = exp(- pow(dist / sqrt(alpha), beta));  
                sum_post += cxtPosteriorPro.at<double>(i, j);  
            }  
        }  
        cxtPriorPro.convertTo(cxtPriorPro, -1, 1.0/sum_prior);  
        cxtPriorPro = cxtPriorPro.mul(image);  
        cxtPosteriorPro.convertTo(cxtPosteriorPro, -1, 1.0/sum_post);  
    }  
      
    /************ Learn Spatio-Temporal Context Model ***********/  
    void STCTracker::learnSTCModel(const Mat image)  
    {  
        // step 1: Get context prior and posterior probability  
        getCxtPriorPosteriorModel(image);  
          
        // step 2-1: Execute 2D DFT for prior probability  
        Mat priorFourier;  
        Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};  
        merge(planes1, 2, priorFourier);  
        dft(priorFourier, priorFourier);  
      
        // step 2-2: Execute 2D DFT for posterior probability  
        Mat postFourier;  
        Mat planes2[] = {cxtPosteriorPro, Mat::zeros(cxtPosteriorPro.size(), CV_64F)};  
        merge(planes2, 2, postFourier);  
        dft(postFourier, postFourier);  
      
        // step 3: Calculate the division  
        Mat conditionalFourier;  
        complexOperation(postFourier, priorFourier, conditionalFourier, 1);  
      
        // step 4: Execute 2D inverse DFT for conditional probability and we obtain STModel  
        dft(conditionalFourier, STModel, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE);  
      
        // step 5: Use the learned spatial context model to update spatio-temporal context model  
        addWeighted(STCModel, 1.0 - rho, STModel, rho, 0.0, STCModel);  
    }  
      
    /************ Initialize the hyper parameters and models ***********/  
    void STCTracker::init(const Mat frame, const Rect box)  
    {  
        // initial some parameters  
        alpha = 2.25;  
        beta = 1;  
        rho = 0.075;  
        sigma = 0.5 * (box.width + box.height);  
      
        // the object position  
        center.x = box.x + 0.5 * box.width;  
        center.y = box.y + 0.5 * box.height;  
      
        // the context region  
        cxtRegion.width = 2 * box.width;  
        cxtRegion.height = 2 * box.height;  
        cxtRegion.x = center.x - cxtRegion.width * 0.5;  
        cxtRegion.y = center.y - cxtRegion.height * 0.5;  
        cxtRegion &= Rect(0, 0, frame.cols, frame.rows);  
      
        // the prior, posterior and conditional probability and spatio-temporal context model  
        cxtPriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  
        cxtPosteriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  
        STModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  
        STCModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  
      
        // create a Hamming window  
        hammingWin = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  
        createHammingWin();  
      
        Mat gray;  
        cvtColor(frame, gray, CV_RGB2GRAY);  
      
        // normalized by subtracting the average intensity of that region  
        Scalar average = mean(gray(cxtRegion));  
        Mat context;  
        gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  
      
        // multiplies a Hamming window to reduce the frequency effect of image boundary  
        context = context.mul(hammingWin);  
      
        // learn Spatio-Temporal context model from first frame  
        learnSTCModel(context);  
    }  
      
    /******** STCTracker: calculate the confidence map and find the max position *******/  
    void STCTracker::tracking(const Mat frame, Rect &trackBox)  
    {  
        Mat gray;  
        cvtColor(frame, gray, CV_RGB2GRAY);  
      
        // normalized by subtracting the average intensity of that region  
        Scalar average = mean(gray(cxtRegion));  
        Mat context;  
        gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  
      
        // multiplies a Hamming window to reduce the frequency effect of image boundary  
        context = context.mul(hammingWin);  
      
        // step 1: Get context prior probability  
        getCxtPriorPosteriorModel(context);  
      
        // step 2-1: Execute 2D DFT for prior probability  
        Mat priorFourier;  
        Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};  
        merge(planes1, 2, priorFourier);  
        dft(priorFourier, priorFourier);  
      
        // step 2-2: Execute 2D DFT for conditional probability  
        Mat STCModelFourier;  
        Mat planes2[] = {STCModel, Mat::zeros(STCModel.size(), CV_64F)};  
        merge(planes2, 2, STCModelFourier);  
        dft(STCModelFourier, STCModelFourier);  
      
        // step 3: Calculate the multiplication  
        Mat postFourier;  
        complexOperation(STCModelFourier, priorFourier, postFourier, 0);  
      
        // step 4: Execute 2D inverse DFT for posterior probability namely confidence map  
        Mat confidenceMap;  
        dft(postFourier, confidenceMap, DFT_INVERSE | DFT_REAL_OUTPUT| DFT_SCALE);  
      
        // step 5: Find the max position  
        Point point;  
        minMaxLoc(confidenceMap, 0, 0, 0, &point);  
      
        // step 6-1: update center, trackBox and context region  
        center.x = cxtRegion.x + point.x;  
        center.y = cxtRegion.y + point.y;  
        trackBox.x = center.x - 0.5 * trackBox.width;  
        trackBox.y = center.y - 0.5 * trackBox.height;  
        trackBox &= Rect(0, 0, frame.cols, frame.rows);  
      
        cxtRegion.x = center.x - cxtRegion.width * 0.5;  
        cxtRegion.y = center.y - cxtRegion.height * 0.5;  
        cxtRegion &= Rect(0, 0, frame.cols, frame.rows);  
      
        // step 7: learn Spatio-Temporal context model from this frame for tracking next frame  
        average = mean(gray(cxtRegion));  
        gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  
        context = context.mul(hammingWin);  
        learnSTCModel(context);  
    }  
    View Code

         runTracker.cpp

        

    // Fast object tracking algorithm  
    // Author : zouxy  
    // Date   : 2013-11-21  
    // HomePage : http://blog.csdn.net/zouxy09  
    // Email  : zouxy09@qq.com  
    // Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  
    // HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  
    // Email: zhkhua@gmail.com   
      
    #include "STCTracker.h"  
      
    // Global variables  
    Rect box;  
    bool drawing_box = false;  
    bool gotBB = false;  
      
    // bounding box mouse callback  
    void mouseHandler(int event, int x, int y, int flags, void *param){  
      switch( event ){  
      case CV_EVENT_MOUSEMOVE:  
        if (drawing_box){  
            box.width = x-box.x;  
            box.height = y-box.y;  
        }  
        break;  
      case CV_EVENT_LBUTTONDOWN:  
        drawing_box = true;  
        box = Rect( x, y, 0, 0 );  
        break;  
      case CV_EVENT_LBUTTONUP:  
        drawing_box = false;  
        if( box.width < 0 ){  
            box.x += box.width;  
            box.width *= -1;  
        }  
        if( box.height < 0 ){  
            box.y += box.height;  
            box.height *= -1;  
        }  
        gotBB = true;  
        break;  
      }  
    }  
      
    int main(int argc, char * argv[])  
    {  
        VideoCapture capture;  
        capture.open("handwave.wmv");  
        bool fromfile = true;  
      
        if (!capture.isOpened())  
        {  
            cout << "capture device failed to open!" << endl;  
            return -1;  
        }  
        //Register mouse callback to draw the bounding box  
        cvNamedWindow("Tracker", CV_WINDOW_AUTOSIZE);  
        cvSetMouseCallback("Tracker", mouseHandler, NULL );   
      
        Mat frame;  
        capture >> frame;  
        while(!gotBB)  
        {  
            if (!fromfile)  
                capture >> frame;  
      
            imshow("Tracker", frame);  
            if (cvWaitKey(20) == 27)  
                return 1;  
        }  
        //Remove callback  
        cvSetMouseCallback("Tracker", NULL, NULL );   
          
        STCTracker stcTracker;  
        stcTracker.init(frame, box);  
      
        int frameCount = 0;  
        while (1)  
        {  
            capture >> frame;  
            if (frame.empty())  
                return -1;  
            double t = (double)cvGetTickCount();  
            frameCount++;  
      
            // tracking  
            stcTracker.tracking(frame, box);      
      
            // show the result  
            stringstream buf;  
            buf << frameCount;  
            string num = buf.str();  
            putText(frame, num, Point(20, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 3);  
            rectangle(frame, box, Scalar(0, 0, 255), 3);  
            imshow("Tracker", frame);  
      
            t = (double)cvGetTickCount() - t;  
            cout << "cost time: " << t / ((double)cvGetTickFrequency()*1000.) << endl;  
      
            if ( cvWaitKey(1) == 27 )  
                break;  
        }  
      
        return 0;  
    }  
    View Code

                  这篇论文的原码作者已经看出,非常的简洁,有空再研究。

                   文中还将生成模型和判别模型进行了对比。生成模型一般就是学习一个外表模型来表示目标,然后寻找最匹配的图像区域。

                   判别模型把跟踪作为一个分类问题,评估目标和背景的决策边界。

     

      为了便于理解,我把流程图画了下来,visio用的还不熟,不知道拐弯的箭头咋画,所以那个循环没画出来

      

       

      

     


      

      

      

  • 相关阅读:
    iOS 日期格式
    时间复杂度、空间复杂度
    转载 -- 算法题
    奇奇怪怪的Bug
    iOS12中推送通知新特性
    iOS开发网络篇—Socket编程
    iOS:菜单控制器和菜单项:UIMenuController和UIMenuItem
    关于textField输入光标颜色及cleanButton大小和颜色的设置
    腾讯云开发微信小程序使用体验
    简单理解JavaScript原型链
  • 原文地址:https://www.cnblogs.com/573177885qq/p/4544168.html
Copyright © 2011-2022 走看看