zoukankan      html  css  js  c++  java
  • python——回归案例

    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif'] = ['stxiHei']from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    import seaborn as sns
    from sklearn.metrics import r2_score, mean_squared_error
    
    
    
    
    data = pd.read_csv(r'Statistics汽车销售数据.csv',encoding = 'utf-8')
    data = data[data['公路里程数'].notna()]#只有1行缺失值,直接舍弃
    data = data[['传统汽车销量','国内生产总值当季值(亿元)x1', '汽油价格(元/吨)x2',
           '人民币贷款基准利率%x3', '汽车总产量(万辆)x4', '公路里程数', '汽车整车股票指数', '消费者信心指数']]
    data.head()
    
    
    
    #数据的相关关系
    cormatrix = data.corr() 
    cormatrix *= np.tri(*cormatrix.values.shape,k=-1).T
    cormatrix
    
    
    
    #计算相关系数
    corr_all = data.corr() 
    
    #创建分布
    mask = np.zeros_like(corr_all,dtype = np.bool) #
    mask[np.triu_indices_from(mask)] = True#创建firue,画出heatmap
    g,ax = plt.subplots(figsize = (9,7))
    sns.heatmap(corr_all,mask = mask,square = True, linewidths = .5,ax = ax ,cmap = 'BuPu')
    plt.title('Correlation of Feactures')
    plt.show()
    
    
    #特征选取
    X = data[['国内生产总值当季值(亿元)x1', '汽油价格(元/吨)x2', '人民币贷款基准利率%x3','公路里程数', '汽车整车股票指数', '消费者信心指数']]
    y = data['传统汽车销量']
    
    
    plt.figure()for i in range(len(X.columns)):
        plt.scatter(X.iloc[:,i],
                    y,
                    color = np.array(plt.cm.tab10(i/len(X.columns))),
                   label = X.columns[i])
        plt.legend()
        plt.show()
    
    
    X = data[['国内生产总值当季值(亿元)x1', '汽油价格(元/吨)x2','公路里程数', '汽车整车股票指数', '消费者信心指数']]
    y = data['传统汽车销量']
    X.head()
    
    
    #划分训练和测试数据集,为后续具有可重复性,设定随机种子random_state = 666
    X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 666)
    
    
    
    #模型
    lin_reg0 = LinearRegression()
    lin_reg0.fit(X_train,y_train)
    y_predict = lin_reg0.predict(X_test)print('r2_score:' + str(r2_score(y_test,y_predict)))print('MSE:' + str(mean_squared_error(y_test,y_predict)))
    
    #定义个绘制参数的函数,绘制残差图:
    def plot_risiduals(model,X_train,X_test,y_train,y_test):
        fig,ax = plt.subplots(figsize = (7,4))
    
        risiduals_train = model.predict(X_train) - y_train
        ax.scatter(y_train,risiduals_train,label = 'Train',color = 'r')
    
        risiduals_test = model.predict(X_test) - y_test
        ax.scatter(y_test,risiduals_test,label = 'Test',color = 'k') 
    
        plt.axhline(y=0.0, c="b", ls="--", lw=2)
        plt.title('Risiduals')
        plt.xlabel('True')
        plt.ylabel('Risiduals')
    plot_risiduals(lin_reg0,X_train,X_test,y_train,y_test)
    
    #调整特征数量
    X_train = X_train[['国内生产总值当季值(亿元)x1', '汽车整车股票指数', '消费者信心指数']]
    X_test = X_test[['国内生产总值当季值(亿元)x1', '汽车整车股票指数', '消费者信心指数']]
    lin_reg1 = LinearRegression()
    lin_reg1.fit(X_train,y_train)
    y_predict = lin_reg1.predict(X_test)print('r2_score:' + str(r2_score(y_test,y_predict)))print('MSE:' + str(mean_squared_error(y_test,y_predict)))
    r2_score:0.9167941097031658
    MSE:3878.5666590026112
    plot_risiduals(lin_reg1,X_train,X_test,y_train,y_test)
    
    
    #预测
    True_Predict = {'True':y_test,'Predict':y_predict,'Risiduals':y_test-y_predict}
    pd.DataFrame(True_Predict)

    转自:https://mp.weixin.qq.com/s/o3TIX_7t2nsc6z-J5Lo-XQ

  • 相关阅读:
    jenkins+docker+rancher+zikui 部署
    利用jenkins直接构件docker镜像并发布到docker服务器
    docker+Rancher+K3S
    windows使用VSCode进行Shell开发
    v-drag 弹框拖拽的实现
    vue3兄弟组件传值
    vue3 组件传值
    Azure Computer Vision 之 Smart Crop 智能裁剪图片
    ASP.NET Core 单元测试
    ASP.NET Core Static Files
  • 原文地址:https://www.cnblogs.com/zym-yc/p/12287550.html
Copyright © 2011-2022 走看看