zoukankan      html  css  js  c++  java
  • discrete adaboost的C++实现

    参考之前的博文,AdaBoost算法学习实现的c++代码

    //adaboost.h
    #ifndef ADABOOST_H
    #define ADABOOST_H
    
    #include<cmath>
    #include<iostream>
    #include<vector>
    #include<assert.h>
    
    
    using namespace std;
    
    #define FEATURETYPE double
    
    
    struct FeaVec
    {
    	unsigned int dim;
    	std::vector<FEATURETYPE>fea;
    	int label;//这里只去两个值,-1,1
    	FeaVec(unsigned int d) :dim(d)
    	{
    
    	}
    };
    
    class weakclassfier;
    
    class adaboost
    {
    
    public:
    	friend class weakclassfier;
    	adaboost();
    	virtual ~adaboost();
    	void train();
    	int classify(FeaVec data);
    	void load_trainset(vector<FeaVec>*data);
    
    protected:
    
    private:
    	double*W;
    	int dim;//特征维数
    	std::vector<FeaVec>trainset;
    	std::vector<weakclassfier*>classfier;
    	double aggri_error;
    
    };
    
    #endif // ADABOOST_H


    //adaboost.cpp
    #include "stdafx.h"
    #include "adaboost.h"
    
    class weakclassfier
    {
    public:
    	friend class adaboost;
    	weakclassfier(adaboost*ada)
    	{
    		this->ada = ada;
    		min_error_rate = 1000000;
    	}
    	void build();
    	std::vector<int>* stumpclassify(int const k, double const threshold,
    		vector<FeaVec>& data, bool greatthan);
    	~weakclassfier();
    private:
    	bool greaterthan;//控制不等式符号
    	int dim;//当前分类器在那一维进行分类
    	double threshold;
    	double min_error_rate;//当前弱分类器在训练集上的错误率
    	std::vector<int>*predicted;//保存对训练集的分类结果
    	double alpha;//在强分类器中所占的权重
    	adaboost* ada;
    };
    weakclassfier::~weakclassfier()
    {
    	if (predicted != NULL)
    		delete predicted;
    }
    void weakclassfier::build()
    {
    	double minerror = 100000;
    	for (int i = 0; i < ada->dim; i++)//外循环次数少
    	{
    		double min = 100000;
    		double max = -100000;
    		for (int j = 0; j<ada->trainset.size(); j++)
    		{
    			if (ada->trainset[j].fea[i]>max)
    				max = ada->trainset[j].fea[i];
    			if (ada->trainset[j].fea[i] < min)
    				min = ada->trainset[j].fea[i];
    		}
    
    		double step = (max - min) / double(10);
    		for (double j = min; j < max;)
    		{
    			j += step;
    			double current_error = 0;
    			bool flag = false;
    			vector<int>*aa = stumpclassify(i, j, ada->trainset, true);
    			for (int k = 0; k < ada->trainset.size(); k++)
    				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
    			if (current_error < min_error_rate)
    			{
    				min_error_rate = current_error;
    				threshold = j;
    				greaterthan = true;
    				dim = i;
    				if (predicted != NULL)
    					delete predicted;
    				predicted = aa;
    				flag = true;
    			}
    			current_error = 0;
    			aa = stumpclassify(i, j, ada->trainset, false);
    			for (int k = 0; k < ada->trainset.size(); k++)
    				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
    			//current_error += abs((*aa)[k] -ada->trainset[k].label) *ada->W[k];
    			if (current_error < min_error_rate)
    			{
    				min_error_rate = current_error;
    				threshold = j;
    				greaterthan = false;
    				dim = i;
    				if (predicted != NULL)
    					delete predicted;
    				predicted = aa;
    				flag = true;
    			}
    			if (!flag)//new和delete必须配套使用
    				delete aa;
    		}
    	}
    	assert(min_error_rate < 0.5);
    }
    
    std::vector<int>* weakclassfier::stumpclassify(int const k, double const threshold,
    	vector<FeaVec>&data, bool greatthan)
    {
    	std::vector<int>*pre = new vector < int > ;
    	//开始假设都满足大于阈值
    	//开始假设都满足小于阈值
    	(*pre).insert((*pre).begin(), data.size(), 1);
    
    	for (int j = 0; j < data.size(); j++)
    	{
    		if (greatthan&&data[j].fea[k] < threshold)//对于greater_than,ada->trainset[j]被预测为另一个类
    		{
    			(*pre)[j] = -1;
    		}
    		else if (!greatthan&&data[j].fea[k] > threshold)
    		{
    			(*pre)[j] = -1;
    		}
    	}
    	return pre;
    }
    
    
    
    adaboost::adaboost()
    {
    	
    }
    
    adaboost::~adaboost()
    {
    	for (int i = 0; i < classfier.size(); i++)
    		delete classfier[i];
    	if (W != NULL)
    		delete[]W;
    }
    
    void adaboost::train()
    {
    	W = new double[trainset.size()];
    	//全部初始化为0,用memset可以,但某一特定值,只能用循环了
    	//memset(W, double(1) / double(trainset.size()), trainset.size()*sizeof(double));
    	for (int i = 0; i < trainset.size(); i++)
    		W[i] = double(1) / double(trainset.size());
    	vector<double> aggrigate;
    	aggrigate.resize(trainset.size());
    
    	while (classfier.size() < 4)
    	{
    		aggri_error = 0;
    		weakclassfier*weak = new weakclassfier(this);
    		weak->build();
    		if (weak->min_error_rate < 0.5)
    		{
    			//弱分类器的准确率越高,其权重也越大
    			weak->alpha = (0.5*log((1.0 - weak->min_error_rate) / (weak->min_error_rate + 1e-16)));
    			classfier.push_back(weak);
    			double sumW = 0;
    			for (int j = 0; j < trainset.size(); j++)
    			{
    				//根据当前弱分类器分类结果将错分样本的权重提升
    				W[j] *= exp(weak->alpha*((*weak->predicted)[j] == trainset[j].label ? -1 : 1));
    				sumW += W[j];
    			}
    			for (int j = 0; j < trainset.size(); j++)
    			{
    				W[j] /= (sumW + 0.00000001);
    				//	aggrigate[j] += weak->alpha*(*weak->predicted)[j];
    				//aggri_error += ((aggrigate[j] > 0) ? 1 : -1) == trainset[j].label ? 0 : 1;
    			}
    			//aggri_error /= double(trainset.size());
    			//	if (aggri_error == 0)
    			//	break;
    		}
    		delete weak->predicted;
    	}
    }
    
    int adaboost::classify(FeaVec data)
    {
    	vector<FeaVec>bb;
    	bb.push_back(data);
    	double cc = 0;
    
    	for (int i = 0; i < classfier.size(); i++)
    	{
    		vector<int>*aa = classfier[i]->stumpclassify(classfier[i]->dim,
    			classfier[i]->threshold, bb, classfier[i]->greaterthan);
    		//	for (int j = 0; j < data.dim; j++)
    		cc += (*aa)[0] * classfier[i]->alpha;
    		delete aa;
    	}
    	return cc > 0 ? 1 : -1;
    }
    
    
    void adaboost::load_trainset(vector<FeaVec>*data)
    {
    	trainset = *data;
    	dim = data->back().dim;
    }
    
    







    //main
    #include "stdafx.h"
    #include"adaboost.h"
    
    int _tmain(int argc, _TCHAR* argv[])
    {
    	cout << double(1) / double(5) << endl;
    	FeaVec aa(2), bb(2), cc(2), dd(2),ee(2);
    	aa.fea.push_back(2);
    	aa.fea.push_back(1.1);
    	aa.label = 1;
    	bb.fea.push_back(1.3);
    	bb.fea.push_back(1.0);
    	bb.label = -1;
    	cc.fea.push_back(1.0);
    	cc.fea.push_back(1.0);
    	cc.label = -1;
    	dd.fea.push_back(2);
    	dd.fea.push_back(1.0);
    	dd.label = 1;
    	ee.fea.push_back(1);
    	ee.fea.push_back(2.1);
    	ee.label = 1;
    	vector<FeaVec>pp;
    	pp.push_back(aa);
    	pp.push_back(bb);
    	pp.push_back(cc);
    	pp.push_back(dd);
    	pp.push_back(ee);
    	adaboost ada;
    	ada.load_trainset(&pp);
    	ada.train();
    	FeaVec ff(2);
    	ff.fea.push_back(0.9);
    	ff.fea.push_back(1.1);
    	int a = ada.classify(ff);
    
    	return 0;
    }
    


    版权声明:

  • 相关阅读:
    每日一水 POJ8道水题
    编译和使用 MySQL C++ Connector
    j2ee model1模型完成分页逻辑的实现 详解!
    DB查询分析器访问EXCEL时,要在表名前后加上中括弧或双引号
    指向结构体变量的指针
    EOSS V3.0 企业运营支撑系统(基于RBAC原理的权限管理)
    MybatisGen1.0 Mybatis JavaBean Mapper生成工具
    The table name must be enclosed in double quotation marks or sqare bracket while accessing EXCEL by
    资源-Android:Android
    软件-开发软件:Android Studio
  • 原文地址:https://www.cnblogs.com/walccott/p/4956870.html
Copyright © 2011-2022 走看看