zoukankan      html  css  js  c++  java
  • Hog行人检测







    HOG特征向量归一化

    对block块内的HOG特征向量进行归一化。对block块内特征向量的归一化主要是为了使特征向量空间对光照,阴影和边缘变化具有鲁棒性。还有归一化是针对每一个block进行的,一般采用的归一化函数有以下四种:


    在人体检测系统中进行HOG计算时一般使用L2-norm,Dalal的文章也验证了对于人体检测系统使用L2-norm的时候效果最好。

    关于就算直方图时用到的三线性插值














    1.第一步的作用在于将图像规范化,通过两个方面GAMMA和COLOUR, GAMMA方面的话,其规范化后图像中的参量可以被直接提取出来,方便后面的操作,颜色的规范化则是去除图像中光强值同时保留颜色值,例如去除阴影或者光强变化的像素。
    在低FPPW中,均方根的GAMMA压缩能够提高其表现。而LOG则起到了反作用。
    2.斜率的计算直接影响识别的表现,不同的斜率计算方法在FPPW的表现上不同,总体而言,较为简单的斜率计算能够获得更好的效果,此外,对于颜色的斜率计算则是对每一个颜色通道进行独立的斜率测量,并且寻找到最标准的一个作为像素的斜率向量。求导不仅能够捕捉人物轮廓信息,也能进一步削弱光强差异。
    3.这个模块的主要目的在于通过计算每个像素的权重投票,通过局部空间地区(CELL)累计投票,投票是用来反映某像素的斜率幅度的大小。
    4.针对图像中前景和背景之间的信息的不同,运用归一化使得信息得以统一,通过局部空间单元的信息组成向量(最关键部分),空间中的模块是重合的,这样每个单元可以包含多个单元的信息,使得向量能够反映更多的图像信息。这可以大幅度提高图像识别能力。
    5.HOG技术在人物识别窗口中共有16个像素,这大大降低了识别的错误率,而这一步就是将像素收集并整理信息。
    6.将之前整理的向量送入SVM进行分级,来判断其是否是人物。





    Hog.h

    #pragma once
    #include<vector>
    #include<map>
    #define PI 3.1416
    typedef unsigned char BYTE;
    #define GradType BYTE
    #define MagType BYTE
    #define FeaType double
    class BlockManager
    {
    private:
    	std::map<std::pair<int, int>, double*>cache;
    	int level;
    
    public:
    	bool find(int y, int x)
    	{
    		return cache.find(std::pair<int, int>(y, x)) != cache.end();
    	}
    	double*GetBlockData(int y, int x)
    	{
    		return cache[std::pair<int, int>(y, x)];
    	}
    	void AddBlock(int y, int x, double*data)
    	{
    		cache.insert(std::pair<std::pair<int, int>, double*>(std::pair<int, int>(y, x), data));
    	}
    	void SetLevel(const int lev)
    	{
    		level = lev;
    	}
    	void deleteBlock(int y, int x)
    	{
    		delete[]cache[std::pair<int, int>(y, x)];
    		cache.erase(std::pair<int, int>(y, x));
    	}
    	void deleteAllBlocks()
    	{
    		std::map<std::pair<int, int>, double*>::iterator it;
    		for (it = cache.begin(); it != cache.end(); it++)
    		{
    			delete[](*it).second;
    			cache.erase(it);
    			it = cache.begin();
    			if (cache.empty())
    				return;
    		}
    	}
    	~BlockManager()
    	{
    		deleteAllBlocks();
    	}
    };
    class Hog
    {
    public:
    	int img_width;//待检测图片的宽度
    	int img_height;//待检测图片的高度
    	int window_width;//检测窗口的宽度,64
    	int window_height;//检测窗口的高度,128
    	int CellSize;//cell的大小,设为8
    	int blkcell;//block尺寸是cell的几倍,2*2
    	int blocksize;//block的大小
    	int blockSkipStep;//Block在检测窗口中上下移动尺寸为8,与blocksize=16相比,
    	//即overlap=1/2,blockSkipStep减小到4使得overlap增加到3/4后,可使精度增加,但计算量增大
    	int windowSkipStep;//滑动窗口在检测图片中滑动的尺寸为8
    	int m_histBin;//180度分几个区间,设为9,即1个cell的梯度直方图化成9个bin
    	int win_fea_dim;
    	int xblkSkipStepNum;
    	int yblkSkipStepNum;
    private:
    
    	int max_pyramid_height;//图像金字塔高度
    	int current_pyramid_height;//当前图像金字塔高度
    	double ratio;//缩放比例
    	bool isGaussianWeight;//是否使用高斯权重
    	BYTE*RGBdata;
    	BYTE*greydata;
    	GradType*grad;//梯度矩阵
    	MagType*theta;//角度矩阵
    	std::vector < FeaType* > windowHOGFeature;
    	BlockManager blockmanager;
    
    	bool GetImgData();
    
    	void Gamma();
    	void RGB2Grey();
    	double* GetBlkFeature(int offsetX, int offsetY);
    	double GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy);
    	void NextPyramid();
    public:
    	GradType*get_grad(){ return grad; }
    	MagType*get_mag(){ return theta; }
    	void ComputeGradient();
    	Hog(const int winW, const int winH, const int CellSize,
    		const int blkcell, const int blockSkipStep, const int windowSkipStep,
    		const int m_histBin, const double rat) :ratio(rat),
    		window_width(winW), window_height(winH),
    		CellSize(CellSize), blkcell(blkcell), blockSkipStep(blockSkipStep),
    		windowSkipStep(windowSkipStep), m_histBin(m_histBin)
    	{
    		blocksize = CellSize*blkcell;
    		//RGBdata = new BYTE[imgw*imgh * 3]; 
    		max_pyramid_height = 0;
    		current_pyramid_height = 1;
    		int ww = img_width = 64;
    		int hh = img_height = 128;
    		while (ww >= window_width&&hh >= window_height)
    		{
    			ww = ww / 2;
    			hh = hh / 2;
    			max_pyramid_height++;
    		}
    		xblkSkipStepNum = floor((window_width - blkcell * CellSize) / blockSkipStep + 1);
    		yblkSkipStepNum = floor((window_height - blkcell * CellSize) / blockSkipStep + 1);
    		win_fea_dim = xblkSkipStepNum*yblkSkipStepNum*blkcell*blkcell*m_histBin;
    		_ASSERTE(max_pyramid_height >= 1);
    	};
    	int getwindow_width(){ return window_width; };
    	int getwindow_height(){ return window_height; };
    	void GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg);
    	void L2Normalize(double*vec, int length);
    	void set_img_size(const int h, const int w){ img_height = h; img_width = w; }
    	void SingleScaleDetect();
    	void MultiScaleDetect();
    	void setgreyData(BYTE*src){ this->greydata = src; }
    	GradType*get_grad_data(){ return grad; }
    	void writeHogFea2File();
    	std::vector < FeaType* >getwindowHOGFeature(){ return windowHOGFeature; }
    	~Hog()
    	{
    		if (RGBdata != NULL)
    			delete[]RGBdata;
    		if (greydata != NULL)
    		{
    			delete[]greydata;
    		}
    		for (int i = 0; i < windowHOGFeature.size(); i++)
    			if (windowHOGFeature[i] != NULL)
    				delete[]windowHOGFeature[i];
    		delete[]grad;
    		delete[]theta;
    	};
    };
    

    Hog.cpp

    #include "stdafx.h"
    #include "Hog.h"
    #include<cmath>
    #include <fstream>
    
    
    void Hog::writeHogFea2File()
    {
    	std::ofstream myfile;
    	myfile.open("example.txt");
    	myfile << "Writing HOG Feature to File.
    ";
    	_ASSERTE(windowHOGFeature.size() == 105);
    	for (int z = 0; z < 105; ++z)
    	{
    		for (int i = 0; i < 36; i++)
    			myfile << windowHOGFeature[z][i] << std::endl;
    	}
    	myfile.close();
    }
    void Hog::ComputeGradient()
    {
    	if (grad != NULL)
    		delete[]grad;
    	grad = new GradType[img_height*img_width];
    	if (theta != NULL)
    		delete[]theta;
    	theta = new MagType[img_height*img_width];
    	for (int i = 1; i < img_height; i++)
    		for (int j = 1; j < img_width; j++)
    		{
    			double dx = greydata[i*img_width + j + 1] - greydata[i*img_width + j - 1];
    			double dy = greydata[(i + 1)*img_width + j] - greydata[(i - 1)*img_width + j];
    			if (fabs(dx) <= 1.0e-6 && fabs(dy) <= 1.0e-6) {
    				grad[i*img_width + j] = 0;
    			}
    			else
    				grad[i*img_width + j] = (sqrt(dx*dx + dy*dy));
    			double theta = atan2(dy, dx);
    			if (theta < 0)
    				theta = (theta + PI);   // normalize to [0, PI], CV_PI   
    			if (theta > PI)
    				theta = theta - PI;
    			theta = (theta * 180 / PI);
    			this->theta[i*img_width + j] = theta;
    			std::cout << theta + 0 << std::endl;
    		}
    	// 边界点的梯度取其近邻点的值   
    	int i = 0;
    	for (int j = 0; j < img_width; j++) {
    		grad[i*img_width + j] = grad[(i + 1)*img_width + j];
    		this->theta[i*img_width + j] = this->theta[(i + 1)*img_width + j];
    	}
    	i = img_height - 1;
    	for (int j = 0; j < img_width; j++) {
    		grad[i*img_width + j] = grad[(i - 1)*img_width + j];
    		this->theta[i*img_width + j] = this->theta[(i - 1)*img_width + j];
    	}
    	int j = 0;
    	for (i = 0; i < img_height; i++) {
    		grad[i*img_width + j] = grad[i*img_width + j + 1];
    		this->theta[i*img_width + j] = this->theta[i*img_width + j + 1];
    	}
    	j = img_width - 1;
    	for (i = 0; i < img_height; i++) {
    		grad[i*img_width + j] = grad[i*img_width + j - 1];
    		this->theta[i*img_width + j] = this->theta[i*img_width + j - 1];
    	}
    
    }
    
    void Hog::L2Normalize(double*vec, int length)//归一化
    {
    	double sum = 0;
    	for (int i = 0; i < length; i++)
    		sum += vec[i] * vec[i];
    	sum = (double)1.0 / sqrt(sum + FLT_EPSILON);
    	for (int i = 0; i < length; i++)
    		vec[i] = vec[i] * sum;
    }
    
    double* Hog::GetBlkFeature(int offsetY_againstImg, int offsetX_againstImg)
    {
    	double *blkHOG = new double[blkcell*blkcell*m_histBin];
    	int aa = sizeof(char);
    	memset(blkHOG, 0, 36 * sizeof(double));
    	int center_cell_0_X = CellSize / 2;
    	int center_cell_0_Y = CellSize / 2;
    	/*int center_cell_1_X = CellSize / 2+CellSize;
    	int center_cell_1_Y = CellSize / 2;
    	int center_cell_2_X = CellSize / 2;
    	int center_cell_2_Y = CellSize / 2+CellSize;
    	int center_cell_3_X = CellSize / 2+CellSize;
    	int center_cell_3_Y = CellSize / 2+CellSize;*/
    	int regionsize = CellSize;
    	for (int cell_no_y = 0; cell_no_y < blkcell; cell_no_y++) {
    		for (int cell_no_x = 0; cell_no_x < blkcell; cell_no_x++) {
    			// cell index in the blk  
    			int cell_idx = cell_no_y*blkcell + cell_no_x;
    			// start of a cell  
    			int cell_start_y = cell_no_y*CellSize;
    			int cell_start_x = cell_no_x*CellSize;
    			// compute in the cell  
    			for (int y = cell_start_y; y < cell_start_y + CellSize; y++) {
    				for (int x = cell_start_x; x<cell_start_x + CellSize; x++) {
    					double theta = this->theta[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
    					double magn = grad[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
    					// 如果幅值为0, 没有梯度则不处理  
    					if (magn >= 0.0) {
    						int theta_idx = (int)(theta / (180.0 / m_histBin));
    						//double gaussweight = isGaussianWeight == true ? GaussianKernel(x, y, CellSize, CellSize, CellSize, CellSize) : 1;
    						//magn= magn*gaussweight;//用高斯核函数调制
    
    						double tt = 1.0 - fabs(double(theta) / (180.0 / double(m_histBin)) - (double(theta_idx) + 0.5));
    						double fx0 = 1.0 - fabs(double(x - center_cell_0_X)) / double(regionsize);
    						double fy0 = 1.0 - fabs(double(y - center_cell_0_Y)) / double(regionsize);
    						if (y <= CellSize / 2 && x <= CellSize / 2 || y >= CellSize*blkcell - CellSize / 2 && x <= CellSize / 2
    							|| y >= CellSize*blkcell - CellSize / 2 && x >= CellSize*blkcell - CellSize / 2
    							|| x >= CellSize*blkcell - CellSize / 2 && y <= CellSize / 2)//四个角点不做三线性插值
    						{
    							blkHOG[m_histBin*cell_idx + theta_idx] = blkHOG[m_histBin*cell_idx + theta_idx] + double(magn)*tt;
    							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt);
    						}
    						if (x>CellSize / 2 && x < CellSize && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
    						{
    							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
    							blkHOG[m_histBin*(cell_idx + 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
    							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
    							blkHOG[m_histBin*(cell_idx + 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
    						}
    						if (x>CellSize && x < CellSize*blkcell - CellSize / 2 && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
    						{
    							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
    							blkHOG[m_histBin*(cell_idx - 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
    							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
    							blkHOG[m_histBin*(cell_idx - 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
    						}
    						if (y>CellSize / 2 && y < CellSize && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
    						{
    							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
    							blkHOG[m_histBin*(cell_idx + blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
    							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
    							blkHOG[m_histBin*(cell_idx + blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
    						}
    						if (y>CellSize && y < CellSize*blkcell - CellSize / 2 && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
    						{
    							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
    							blkHOG[m_histBin*(cell_idx - blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
    							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
    							blkHOG[m_histBin*(cell_idx - blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
    						}
    						else//做三线性插值,将 4 个cell中的直方图串接起来
    						{
    							blkHOG[m_histBin * 0 + theta_idx] += double(magn)*tt*fx0*fy0;
    							blkHOG[m_histBin * 0 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*fy0;
    							blkHOG[m_histBin * 1 + theta_idx] += double(magn)*tt*(1.0 - fx0)*fy0;
    							blkHOG[m_histBin * 1 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*fy0;
    							blkHOG[m_histBin * 2 + theta_idx] += double(magn)*tt*fx0*(1.0 - fy0);
    							blkHOG[m_histBin * 2 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*(1.0 - fy0);
    							blkHOG[m_histBin * 3 + theta_idx] += double(magn)*tt*(1.0 - fx0)*(1.0 - fy0);
    							blkHOG[m_histBin * 3 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*(1.0 - fy0);
    
    						}
    					}
    				} // for(x)  
    			}   // for(y)                 
    		}   // for(cell_no_x)  
    	}   // for(cell_no_y)   
    	/*for (int i = 0; i < 36; i++)
    		std::cout << blkHOG[i] << std::endl;
    		std::cout << std::endl << std::endl;*/
    	L2Normalize(blkHOG, blkcell*blkcell*m_histBin);//以一个block为单位进行归一化
    	/*for (int i = 0; i < 36; i++)
    		std::cout << blkHOG[i] << std::endl;*/
    	blockmanager.AddBlock(offsetY_againstImg, offsetX_againstImg, blkHOG);//存入cache避免重复计算
    	return blkHOG;
    }
    double Hog::GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy)//高斯核函数
    {
    	int dx = x - cent_x;
    	int dy = y - cent_y;
    	double temp = 1 - ((double)(dx*dx) / (Hx*Hx) + (double)(dy*dy) / (Hy*Hy)) / 2;
    	if (temp >= 0)
    	{
    		return (double)(4.0 * temp / (2 * PI));
    	}
    	else
    	{
    		return 0.0f;
    	}
    }
    
    void Hog::GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg)//获得window的feature
    {
    	windowHOGFeature.clear();
    
    	//double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
    	for (int i = 0; i < yblkSkipStepNum; i++)
    		for (int j = 0; j < xblkSkipStepNum; j++)
    		{
    			double*blkFea;
    			if (blockmanager.find(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep))
    				blkFea = blockmanager.GetBlockData(offsetY_againstImg + i*blockSkipStep,
    				offsetX_againstImg + j*blockSkipStep);
    			else
    				blkFea = GetBlkFeature(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep);
    			/*memcpy(imgHOGFeature + (i*xSkipStepNum + j)*blkcell*blkcell*m_histBin, blkFea,
    			blkcell*blkcell*m_histBin);
    			delete[]blkFea;*/
    			windowHOGFeature.push_back(blkFea);
    		}
    }
    
    void Hog::RGB2Grey()
    {
    	if (greydata == NULL)
    		greydata = new BYTE[img_width*img_height];
    	for (int i = 0; i < img_height; i++)
    		for (int j = 0; j < img_width; j++)
    		{
    			greydata[i*img_width + j] = 0.299*RGBdata[i*img_width * 3 + 3 * j] +
    				0.587*RGBdata[i*img_width * 3 + 3 * j + 1] +
    				0.114*RGBdata[i*img_width * 3 + 3 * j + 2];
    		}
    	delete[]RGBdata;
    }
    
    void Hog::NextPyramid()//双线性插值获得下一层图像
    {
    	int new_img_height = img_height / ratio;
    	int new_img_width = img_width / ratio;
    
    	BYTE*new_greydata = new BYTE[new_img_height*new_img_width];
    	double fw = ratio;//double(nW) / W1;
    	double fh = ratio;//double(nH) / H1;
    	int y1, y2, x1, x2, x0, y0;
    	double fx1, fx2, fy1, fy2;
    
    	for (int i = 0; i < new_img_height; i++)
    	{
    		y0 = i*fh;
    		y1 = int(y0);
    		if (y1 == img_height - 1)    y2 = y1;
    		else y2 = y1 + 1;
    		fy1 = y1 - y0;
    		fy2 = 1.0f - fy1;
    		for (int j = 0; j < new_img_width; j++)
    		{
    			x0 = j*fw;
    			x1 = int(x0);
    			if (x1 == img_width - 1)    x2 = x1;
    			else x2 = x1 + 1;
    			fx1 = y1 - y0;
    			fx2 = 1.0f - fx1;
    			double s1 = fx1*fy1;
    			double s2 = fx2*fy1;
    			double s3 = fx2*fy2;
    			double s4 = fx1*fy2;
    			BYTE c1r, c2r, c3r, c4r;
    			c1r = greydata[y1*img_width + x1];
    			c2r = greydata[y1*img_width + x2];
    			c3r = greydata[y2*img_width + x1];
    			c4r = greydata[y2*img_width + x2];
    			BYTE r;
    			r = (BYTE)(c1r*s3) + (BYTE)(c2r*s4) + (BYTE)(c3r*s2) + (BYTE)(c4r*s1);
    			new_greydata[i*new_img_width + j] = r;
    		}
    	}
    	delete[]greydata;
    	greydata = new_greydata;
    	img_height = new_img_height;
    	img_width = new_img_width;
    	current_pyramid_height++;
    	blockmanager.deleteAllBlocks();
    	blockmanager.SetLevel(current_pyramid_height);
    }
    /*void Hog::SingleScaleDetect()
    {
    int xSkipStepNum = floor((img_width - window_width) / windowSkipStep + 1);
    int ySkipStepNum = floor((img_height - window_width) / windowSkipStep + 1);
    //double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
    for (int i = 0; i < ySkipStepNum; i++)
    for (int j = 0; j < xSkipStepNum; j++)
    {
    GetWindowFeature(i*windowSkipStep, j*windowSkipStep);
    }
    }
    
    
    void Hog::MultiScaleDetect()
    {
    while (current_pyramid_height < max_pyramid_height)
    {
    SingleScaleDetect();
    NextPyramid();
    }
    }*/
    




    版权声明:

  • 相关阅读:
    简单说说 Java 的 JVM 内存结构
    cpu怎么实现运算的
    triplet
    LCT的一些坑【已经变成坑点集合了233】
    插头DP学习笔记
    一个坑-卡常
    NOIP2017游记
    洛谷P4015 运输问题
    线性基学习笔记
    洛谷P4331[BOI2004] sequence
  • 原文地址:https://www.cnblogs.com/walccott/p/4957075.html
Copyright © 2011-2022 走看看