机器学习之逻辑回归(Logistic Regression)

zoukankan html css js c++ java

机器学习之逻辑回归(Logistic Regression)
1. """逻辑回归中的Sigmoid函数"""
2. import numpy as np
3. import matplotlib.pyplot as plt
5. def sigmoid(t):
6. return 1/(1+np.exp(-t))
8. x=np.linspace(-10,10,500)
9. y=sigmoid(x)
11. plt.plot(x,y)
12. plt.show()
结果：

逻辑回归损失函数的梯度：

逻辑回归算法：
1. import numpy as np
2. from metrics import accuracy_score
4. class LogisticRegression:
6. def __init__(self):
7. """初始化Logistic Regression模型"""
8. self.coef_ = None
9. self.intercept_ = None
10. self._theta = None
12. def _sigmoid(self,t):
13. return 1. / (1. + np.exp(-t))
15. def fit(self, X_train, y_train, eta=0.01, n_iters=1e4):
16. """根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
17. assert X_train.shape[0] == y_train.shape[0],
18. "the size of X_train must be equal to the size of y_train"
22. def J(theta, X_b, y):
23. """求损失函数"""
24. y_hat=self._sigmoid(X_b.dot(theta))
25. try:
26. return -np.sum(y*np.log(y_hat) + (1-y)*np.log(1-y_hat))/ len(y)
27. except:
28. return float('inf')
30. def dJ(theta, X_b, y):
31. """求梯度"""
32. # res = np.empty(len(theta))
33. # res[0] = np.sum(X_b.dot(theta) - y)
34. # for i in range(1, len(theta)):
35. # res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
36. # return res * 2 / len(X_b)
37. return X_b.T.dot(self._sigmoid(X_b.dot(theta)) - y) / len(X_b)
39. def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
40. """使用批量梯度下降法寻找theta"""
41. theta = initial_theta
42. cur_iter = 0
44. while cur_iter < n_iters:
45. gradient = dJ(theta, X_b, y)
46. last_theta = theta
47. theta = theta - eta * gradient
48. if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
49. break
51. cur_iter += 1
53. return theta
55. X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
56. initial_theta = np.zeros(X_b.shape[1])
57. self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
59. self.intercept_ = self._theta[0]
60. self.coef_ = self._theta[1:]
62. return self
64. def predict_proba(self, X_predict):
65. """给定待预测数据集X_predict，返回表示X_predict的结果概率向量"""
66. assert self.intercept_ is not None and self.coef_ is not None,
67. "must fit before predict!"
68. assert X_predict.shape[1] == len(self.coef_),
69. "the feature number of X_predict must be equal to X_train"
71. X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
72. return self._sigmoid(X_b.dot(self._theta))
74. def predict(self, X_predict):
75. """给定待预测数据集X_predict，返回表示X_predict的结果向量"""
76. assert self.intercept_ is not None and self.coef_ is not None,
77. "must fit before predict!"
78. assert X_predict.shape[1] == len(self.coef_),
79. "the feature number of X_predict must be equal to X_train"
81. proba=self.predict_proba(X_predict)
82. return np.array(proba>=0.5,dtype='int')
84. def score(self, X_test, y_test):
85. """根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""
87. y_predict = self.predict(X_test)
88. return accuracy_score(y_test, y_predict)
90. def __repr__(self):
91. return "LogisticRegression()"
1. """实现逻辑回归"""
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn import datasets
6. iris=datasets.load_iris()
7. X=iris.data
8. y=iris.target
10. X=X[y<2,:2]
11. y=y[y<2]
13. plt.scatter(X[y==0,0],X[y==0,1],color='red')
14. plt.scatter(X[y==1,0],X[y==1,1],color='blue')
15. plt.show()
17. """使用逻辑回归"""
18. from model_selection import train_test_split
19. from LogisticRegression import LogisticRegression
21. X_train,X_test,y_train,y_test=train_test_split(X,y,seed=666)
22. log_reg=LogisticRegression()
23. log_reg.fit(X_train,y_train)
24. print(log_reg.score(X_test,y_test))
25. print(log_reg.predict_proba(X_test))
结果：
1. E:pythonspaceKNN_functionvenvScriptspython.exe E:/pythonspace/KNN_function/try.py
2. 1.0
3. [0.92972035 0.98664939 0.14852024 0.17601199 0.0369836 0.0186637
4. 0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285
5. 0.26131273 0.0369836 0.84192923 0.79892262 0.82890209 0.32358166
6. 0.06535323 0.20735334]
8. Process finished with exit code 0
逻辑回归中的决策边界和添加多项式特征：
1. """在逻辑回归中添加多项式特征"""
2. import numpy as np
3. import matplotlib.pyplot as plt
5. np.random.seed(666)
6. X=np.random.normal(0,1,size=(100,2))
7. y=np.array(X[:,0]**2+X[:,1]**2<1.5,dtype='int')
10. """使用逻辑回归"""
11. from LogisticRegression import LogisticRegression
13. log_reg=LogisticRegression()
14. log_reg.fit(X,y)
16. """绘制思路"""
17. def plot_decision_boundary(model,axis):
18. x0,x1 = np.meshgrid(
19. np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
20. np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
21. )
22. X_new = np.c_[x0.ravel(),x1.ravel()]
23. y_predict = model.predict(X_new)
24. zz = y_predict.reshape(x0.shape)
25. from matplotlib.colors import ListedColormap
26. custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
27. plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
29. plot_decision_boundary(log_reg,axis=[-4,4,-4,4])
30. plt.scatter(X[y==0,0],X[y==0,1])
31. plt.scatter(X[y==1,0],X[y==1,1])
32. plt.show()
35. """添加特征值，即升维"""
36. from sklearn.preprocessing import PolynomialFeatures
37. from sklearn.preprocessing import StandardScaler
38. from sklearn.pipeline import Pipeline
39. def PolynomialLogisticRegression(degree):
40. return Pipeline([
41. ('Poly',PolynomialFeatures(degree=degree)),
42. ('std_scaler',StandardScaler()),
43. ('Logistic',LogisticRegression())
44. ])
45. poly_log_reg = PolynomialLogisticRegression(degree=2)
46. poly_log_reg.fit(X,y)
47. plot_decision_boundary(poly_log_reg,axis=[-4,4,-4,4])
48. plt.scatter(X[y==0,0],X[y==0,1])
49. plt.scatter(X[y==1,0],X[y==1,1])
50. plt.show()
结果：
1. """逻辑回归中使用正则化"""
2. import numpy as np
3. import matplotlib.pyplot as plt
4. from sklearn.model_selection import train_test_split
5. from sklearn.linear_model import LogisticRegression
6. from sklearn.preprocessing import StandardScaler
7. from sklearn.pipeline import Pipeline
8. from sklearn.preprocessing import PolynomialFeatures
10. np.random.seed(666)
11. X=np.random.normal(0,1,size=(200,2))
12. y=np.array(X[:,0]**2+X[:,1]<1.5,dtype='int')
13. for _ in range(20):
14. y[np.random.randint(200)] = 1
15. plt .scatter(X[y==0,0],X[y==0,1])
16. plt .scatter(X[y==1,0],X[y==1,1])
17. plt.show()
19. X_train,X_test,y_train,y_test=train_test_split(X,y)
20. log_reg=LogisticRegression()
21. log_reg.fit(X,y)
22. def plot_decision_boundary(model,axis):
23. x0,x1 = np.meshgrid(
24. np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
25. np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
26. )
27. X_new = np.c_[x0.ravel(),x1.ravel()]
28. y_predict = model.predict(X_new)
29. zz = y_predict.reshape(x0.shape)
30. from matplotlib.colors import ListedColormap
31. custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
32. plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
34. def PolynomialLogisticRegression(degree,C=1.0,penalty='l2'):
35. return Pipeline([
36. ('Poly',PolynomialFeatures(degree=degree)),
37. ('std_scaler',StandardScaler()),
38. ('Logistic',LogisticRegression(C=C,penalty=penalty))
39. ])
40. poly_log_reg = PolynomialLogisticRegression(degree=20,C=0.1,penalty='l1')
41. poly_log_reg.fit(X_train,y_train)
43. plot_decision_boundary(poly_log_reg,axis=[-4,4,-4,4])
44. plt.scatter(X[y==0,0],X[y==0,1])
45. plt.scatter(X[y==1,0],X[y==1,1])
46. plt.show()
结果

应用OVR和OVO使逻辑回归处理多分类问题
1. """OVR和OVO"""
2. #为了数据可视化方便，我们只使用鸢尾花数据集的前两列特征
3. from sklearn import datasets
4. from sklearn.linear_model import LogisticRegression
5. from sklearn.model_selection import train_test_split
6. import matplotlib.pyplot as plt
7. import numpy as np
9. iris = datasets.load_iris()
10. X = iris['data'][:,:2]
11. y = iris['target']
12. X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)
15. #log_reg = LogisticRegression(multi_class='ovr') #传入multi_class参数可以指定使用ovr或ovo，默认ovr #由于只使用前两列特征，导致分类准确度较低
16. log_reg = LogisticRegression(multi_class='ovr',solver='newton-cg')
17. log_reg.fit(X_train,y_train)
18. log_reg.score(X_test,y_test)
19. def plot_decision_boundary(model,axis):
20. x0,x1 = np.meshgrid(
21. np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),
22. np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100))
23. )
24. X_new = np.c_[x0.ravel(),x1.ravel()]
25. y_predict = model.predict(X_new)
26. zz = y_predict.reshape(x0.shape)
27. from matplotlib.colors import ListedColormap
28. custom_cmap = ListedColormap(['#EF9A9A','#FFF59D','#90CAF9'])
29. plt.contourf(x0,x1,zz,linewidth=5,cmap=custom_cmap)
31. plot_decision_boundary(log_reg,axis=[4,8.5,1.5,4.5])
32. plt.scatter(X[y==0,0],X[y==0,1])
33. plt.scatter(X[y==1,0],X[y==1,1])
34. plt.scatter(X[y==2,0],X[y==2,1])
35. plt.show()
39. """使用全部数据 OVR and OVO"""
40. from sklearn.multiclass import OneVsOneClassifier
41. from sklearn.multiclass import OneVsRestClassifier
43. from sklearn import datasets
44. from sklearn.linear_model import LogisticRegression
45. from sklearn.model_selection import train_test_split
46. iris = datasets.load_iris()
47. X = iris.data
48. y = iris.target
49. X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)
51. ovr = OneVsRestClassifier(log_reg) #参数为二分类器
52. ovr.fit(X_train,y_train)
53. print(ovr.score(X_test,y_test))
54. ovo = OneVsOneClassifier(log_reg)
55. ovo.fit(X_train,y_train)
56. print(ovo.score(X_test,y_test))
结果：
1. E:pythonspaceKNN_functionvenvScriptspython.exe E:/pythonspace/KNN_function/try.py
2. E:pythonspaceKNN_functionvenvlibsite-packagesmatplotlibcontour.py:960: UserWarning: The following kwargs were not used by contour: 'linewidth'
3. s)
4. 0.9736842105263158
5. 1.0
7. Process finished with exit code 0
查看全文

相关阅读:
韩寒首度回应小三事件：望女友妻子和平相处_陕西频道_凤凰网
 冒泡排序　oj Google 搜索
 on call是什么意思_on call的翻译_音标_读音_用法_例句必应 Bing 词典
 分享：创业失败后如何找工作
 美国出台高科技人才移民优惠政策_the United States 美国_cnBeta.COM
分别用数组和链表实现堆栈(C语言版) ahljjun的专栏博客频道 CSDN.NET
分享：void及void指针深层次探索
 浅谈基础算法之堆栈（五）川山甲博客园
 分享：【原创】Stringification 在二级宏定义中的使用
 System Engineer / Backend Engineer

原文地址：https://www.cnblogs.com/kekexuanxaun/p/9459365.html