线性回归（最小二乘法、批量梯度下降法、随机梯度下降法、局部加权线性回归） C++

zoukankan html css js c++ java

线性回归（最小二乘法、批量梯度下降法、随机梯度下降法、局部加权线性回归） C++

We turn next to the task of finding a weight vector w which minimizes the chosen function E(w).

Because there is clearly no hope of finding an anlytical solution to the equation ∂E(w)=0, we resort to

iterative numerical procedures.

On-line gradient descent, also known as sequential gradient descent or stochastic gradient descent, makes

an update to the weight vector based on one data point at a time.

One advantage of on-line methods compared to batch methods is that the former handle redundancy in the data

much more efficiently. Another property of on-line gradient descent is the possibility of escaping from local minima,

since a stationary point with respect to the error function for the whole data set will generally not be a stationary point

for each data point individually.

Another advantage of on-line learning is the fact that it requires much less storage than batch learning.

原始数据获得

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cfloat>
#include <cmath>

double dis(double &train, double &query) {
　　double weight=exp(-0.5*pow(train-query, 2));

　　return weight;
}

/*最小二乘法*/
template <typename PairIterator>
bool GetLinearFit(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
    if(begin_it==end_it) {
        return false;
    }
    size_t n=end_it-begin_it;
    double sum_x2=0.0,sum_y=0.0,sum_x=0.0,sum_xy=0.0;

    for(PairIterator it=begin_it;it!=end_it;++it) {
        sum_x2+=(it->first)*(it->first);
        sum_y+=it->second;
        sum_x+=it->first;
        sum_xy+=(it->first)*(it->second);
    }

    slope=(n*sum_xy-sum_x*sum_y)/(n*sum_x2-sum_x*sum_x);
    y_intercept=(sum_x2*sum_y-sum_x*sum_xy)/(n*sum_x2-sum_x*sum_x);

    return true;
}

/*locally weighted linear regression(LWR)*/
template<typename PairIterator>
bool LWR(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
　　if(begin_it==end_it) {
　　　　return false;
　　}

　　 /*x are the data points for each local regression model. They are usually (but not always) the data points in your sample.*/
　　double query=5.5;
　　size_t n=end_it-begin_it;
　　double J=0.0;

　　for(PairIterator it=begin_it;it!=end_it;++it) {
　　　　J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
　　}
　　J=J*0.5/n;

　　while(true) {
　　　　double temp0=0,temp1=0;
　　　　for(PairIterator it=begin_it;it!=end_it;++it) {
　　　　　　temp0+=(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
　　　　　　temp1+=(y_intercept+slope*(it->first)-it->second)*(it->first)*dis(it->first, query);
　　　　}

　　　　temp0=temp0/n;
　　　　temp1=temp1/n;

　　　　/*0.03为学习率阿尔法*/
　　　　y_intercept=y_intercept-0.03*temp0;
　　　　slope=slope-0.03*temp1;

　　　　double MSE=0.0;
　　　　for(PairIterator it=begin_it;it!=end_it;++it) {
　　　　　　MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
　　　　}
　　　　MSE=0.5*MSE/n;
　　　　if(std::abs(J-MSE)<0.00000001)
　　　　　　break;
　　　　J=MSE;
　　　　}
　　return true;
}

/*批量梯度下降法，Batch Gradient Desscent,BGD*/
template<typename PairIterator>
bool BatchGradientDescent(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
    if(begin_it==end_it) {
        return false;
    }
    size_t n=end_it-begin_it;
    double J=0.0;

/*the initial cost function*/
    for(PairIterator it=begin_it;it!=end_it;++it) {
        J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
    }
    J=J*0.5/n;

    while(true) {
        double temp0=0,temp1=0;
        for(PairIterator it=begin_it;it!=end_it;++it) {
            temp0+=(y_intercept+slope*(it->first)-it->second);
            temp1+=(y_intercept+slope*(it->first)-it->second)*(it->first);
        }
        temp0=temp0/n;
        temp1=temp1/n;

        /*0.03为学习率阿尔法*/
        y_intercept=y_intercept-0.03*temp0;
        slope=slope-0.03*temp1;

        double MSE=0.0;
        for(PairIterator it=begin_it;it!=end_it;++it) {
            MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
        }
        MSE=0.5*MSE/n;
        if(std::abs(J-MSE)<0.00000001)
            break;
        J=MSE;
    }
    return true;
}

/*随机梯度下降法，Stochastic Gradient Desscent,SGD*/
template<typename PairIterator>
bool StochasticGradientDescent(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
    if(begin_it==end_it) {
        return false;
    }
    size_t n=end_it-begin_it;
    double J=0.0;

/*the initial cost function*/
    for(PairIterator it=begin_it;it!=end_it;++it) {
        J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
    }
    J=0.5*J/n;

    while(true) {
        double temp0=0,temp1=0;
        for(PairIterator it=begin_it;it!=end_it;++it) {
            temp0=(y_intercept+slope*(it->first)-it->second);
            temp1=(y_intercept+slope*(it->first)-it->second)*(it->first);

            /*0.03为学习率阿尔法*/
            y_intercept=y_intercept-0.03*temp0;
            slope=slope-0.03*temp1;

            double MSE=0.0;
            for(PairIterator it=begin_it;it!=end_it;++it) {
                MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
            }
            MSE=0.5*MSE/n;
            if(std::abs(J-MSE)<0.00000001)
                break;
            J=MSE;
        }
        break;
    }

    return true;
}

int main() {
    std::ifstream in;
    in.open("ex2x.dat");
    if(!in) {
        std::cout<<"open file ex2x.dat failed!"<<std::endl;
        return 1;
    }

    std::vector<double> datax,datay;
    double temp;

    while(in>>temp) {
        datax.push_back(temp);
    }

    in.close();
    in.open("ex2y.dat");
    if(!in) {
        std::cout<<"open file ex2y.dat failed!"<<std::endl;
        return 1;
    }

    while(in>>temp) {
        datay.push_back(temp);
    }

    std::vector<std::pair<double, double> > data;

    for(std::vector<double>::const_iterator iterx=datax.begin(),itery=datay.begin();iterx!=datax.end(),itery!=datay.end();iterx++,itery++) {
        data.push_back(std::pair<double,double>(*iterx,*itery));
    }
    in.close();
    double slope=0.0;
    double y_intercept=0.0;
    GetLinearFit(data.begin(),data.end(),slope,y_intercept);
    std::cout<<"最小二乘法得到的结果："<<std::endl;
    std::cout<<"slope: "<<slope<<std::endl;
    std::cout<<"y_intercept: "<<y_intercept<<std::endl;

    slope=1.0,y_intercept=1.0;
    BatchGradientDescent(data.begin(),data.end(),slope,y_intercept);
    std::cout<<"批量梯度下降法得到的结果："<<std::endl;
    std::cout<<"slope: "<<slope<<std::endl;
    std::cout<<"y_intercept: "<<y_intercept<<std::endl;

    slope=1.0,y_intercept=1.0;
    StochasticGradientDescent(data.begin(),data.end(),slope,y_intercept);
    std::cout<<"随机梯度下降法得到的结果："<<std::endl;
    std::cout<<"slope: "<<slope<<std::endl;
    std::cout<<"y_intercept: "<<y_intercept<<std::endl;

slope=1.0,y_intercept=1.0;
　LWR(data.begin(),data.end(),slope,y_intercept);
　std::cout<<"locally weighted linear regression 得到的结果："<<std::endl;
　std::cout<<"slope: "<<slope<<std::endl;
　std::cout<<"y_intercept: "<<y_intercept<<std::endl;

    return 0;
}

查看全文

相关阅读:
一周优化内存、查询速度小结
 三种不同的方式，计算欧氏距离，速度比较
 三种不同的方式，计算欧氏距离，速度比较
 多线程与多进程
 匹配错误分析
 fasttext与Linear SVC 分类测试结果
 什么是闭包
 智能客服功能页面
 客服培训
 约当产量法

原文地址：https://www.cnblogs.com/donggongdechen/p/7384568.html