逻辑回归是二分类问题,与其他回归不同,逻辑回归可以给出确切的概率值,哪个类别的数量少,判定概率值就是指的这个类别,这个类别即是正例
逻辑回归的损失函数称为对数似然损失函数,但其只能通过梯度下降法求解
逻辑回归sklearn的API:from sklearn.linear_model import LogisticRegression
1 from sklearn.linear_model import LogisticRegression 2 import pandas as pd 3 import numpy as np 4 from sklearn.model_selection import train_test_split 5 from sklearn.preprocessing import StandardScaler 6 from sklearn.metrics import classification_report 7 8 # 数据地址http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/ 9 def logistic(): 10 '''逻辑回归做二分类进行癌症预测''' 11 column = ['sample code number', 'clump thickness', 'uniformity of cell size', 'uniformity of cell shape', 'marginal adhesion', 'single epithelial cell size', 'bare nuclei', 'bland chromatin', 'normal nucleoli', 'mitoses', 'class'] 12 data = pd.read_csv(r"E:360DownloadsSoftwarereast-cancer-wisconsin.data", names=column) 13 # print(data) 14 15 # 处理缺失值 16 data = data.replace(to_replace="?", value=np.nan) 17 data = data.dropna(axis=0, how='any') 18 19 # 进行数据的分割,x特征值,y目标值 20 x_train, x_test, y_train, y_test = train_test_split(data[column[1:10]], data[column[10]], test_size=0.25) 21 22 # 进行标准化处理 23 x_std = StandardScaler() 24 x_train = x_std.fit_transform(x_train) 25 x_test = x_std.fit_transform(x_test) 26 27 # 进行逻辑回归预测,C正则化力度,可以通过网格搜索调优 28 lg = LogisticRegression(C=1.0) 29 lg.fit(x_train, y_train) 30 y_predict = lg.predict(x_test) 31 print(lg.coef_) 32 print("准确率:", lg.score(x_test, y_test)) 33 print("召回率:", classification_report(y_test, y_predict, labels=[2, 4], target_names=["良性", "恶性"])) 34 35 36 if __name__ == "__main__": 37 logistic()