Logistic Regression 就是一个被logistic方程归一化后的线性回归。
对于二分类问题,我们输入向量x[x1,x2...xn],Θ(θ0,θ1,θ2,···θn)为我们的学习算法所学到的参数,分类结果为0和1。令
可写为
用一个sigmoid函数去做映射
g(z)的值接近0则输入样本就归为0类,否则归为1类。所以现在我们需要训练参数Θ(θ0,θ1,θ2,···θn)。我们用梯度下降法去训练参数。
Logistic Regression只能处理两分类问题,在其基础上衍生出来的softmax可以用于多分类,且必须线性可分。
//LogisticRegression.h class LogisticRegression { public: int N; // num of inputs int n_in; int n_out; double **W; double *b; LogisticRegression(int, int, int); ~LogisticRegression(); void train(int*, int*, double); void softmax(double*); void predict(int*, double*); };
//LogisticRegression.cpp #include <iostream> #include <string> #include <math.h> #include "LogisticRegression.h" using namespace std; LogisticRegression::LogisticRegression(int size, int in, int out) { N = size; n_in = in; n_out = out; // initialize W, b W = new double*[n_out]; for(int i=0; i<n_out; i++) W[i] = new double[n_in]; b = new double[n_out]; for(int i=0; i<n_out; i++) { for(int j=0; j<n_in; j++) { W[i][j] = 0; } b[i] = 0; } } LogisticRegression::~LogisticRegression() { for(int i=0; i<n_out; i++) delete[] W[i]; delete[] W; delete[] b; } void LogisticRegression::train(int *x, int *y, double lr) { double *p_y_given_x = new double[n_out]; double *dy = new double[n_out]; for(int i=0; i<n_out; i++) { p_y_given_x[i] = 0; for(int j=0; j<n_in; j++) { p_y_given_x[i] += W[i][j] * x[j]; } p_y_given_x[i] += b[i]; } softmax(p_y_given_x); for(int i=0; i<n_out; i++) { dy[i] = y[i] - p_y_given_x[i]; for(int j=0; j<n_in; j++) { W[i][j] += lr * dy[i] * x[j] / N; } b[i] += lr * dy[i] / N; } delete[] p_y_given_x; delete[] dy; } void LogisticRegression::softmax(double *x) { double max = 0.0; double sum = 0.0; for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i]; for(int i=0; i<n_out; i++) { x[i] = exp(x[i] - max); sum += x[i]; } for(int i=0; i<n_out; i++) x[i] /= sum; } void LogisticRegression::predict(int *x, double *y) { for(int i=0; i<n_out; i++) { y[i] = 0; for(int j=0; j<n_in; j++) { y[i] += W[i][j] * x[j]; } y[i] += b[i]; } softmax(y); } void test_lr() { srand(0); double learning_rate = 0.1; int n_epochs = 500; int train_N = 6; int test_N = 2; int n_in = 6; int n_out = 2; // training data int train_X[6][6] = { {1, 1, 1, 0, 0, 0}, {1, 0, 1, 0, 0, 0}, {1, 1, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0}, {0, 0, 1, 1, 0, 0}, {0, 0, 1, 1, 1, 0} }; int train_Y[6][2] = { {1, 0}, {1, 0}, {1, 0}, {0, 1}, {0, 1}, {0, 1} }; // construct LogisticRegression LogisticRegression classifier(train_N, n_in, n_out); // train online for(int epoch=0; epoch<n_epochs; epoch++) { for(int i=0; i<train_N; i++) { classifier.train(train_X[i], train_Y[i], learning_rate); } // learning_rate *= 0.95; } // test data int test_X[2][6] = { {1, 0, 1, 0, 0, 0}, {0, 0, 1, 1, 1, 0} }; double test_Y[2][2]; // test for(int i=0; i<test_N; i++) { classifier.predict(test_X[i], test_Y[i]); for(int j=0; j<n_out; j++) { cout << test_Y[i][j] << " "; } cout << endl; } } int main() { test_lr(); return 0; }
版权声明: