zoukankan      html  css  js  c++  java
  • 14.多元线性回归

    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets

    获取数据

    boston = datasets.load_boston()
    X = boston.data
    y = boston.target

    数据处理

    X = X[y < 50.0]
    y = y[y < 50.0]

    数据分割

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)

    多元线性回归方程θ参数求解

    from sklearn.linear_model import LinearRegression
    lin_reg = LinearRegression()
    lin_reg.fit(X_train, y_train)

    θ参数

    lin_reg.coef_
    array([-1.15625837e-01,  3.13179564e-02, -4.35662825e-02, -9.73281610e-02,
           -1.09500653e+01,  3.49898935e+00, -1.41780625e-02, -1.06249020e+00,
            2.46031503e-01, -1.23291876e-02, -8.79440522e-01,  8.31653623e-03,
           -3.98593455e-01])

    θ截距

    lin_reg.intercept_
    32.59756158869991

    预测结果 R2

    lin_reg.score(X_test, y_test)
    0.8009390227581037

    kNN Regressor 线性回归

    from sklearn.neighbors import KNeighborsRegressor
    
    knn_reg = KNeighborsRegressor()
    knn_reg.fit(X_train, y_train)
    knn_reg.score(X_test, y_test)
    0.602674505080953

    网格搜索超参数

    from sklearn.model_selection import GridSearchCV
    
    param_grid = [
        {
            "weights":["uniform"],
            "n_neighbors":[i for i in range(1, 11)]
        },
        {
            "weights":["distance"],
            "n_neighbors":[i for i in range(1, 11)],
            "p":[i for i in range(1, 6)]
        }
    ]
    
    knn_reg = KNeighborsRegressor()
    grid_search = GridSearchCV(knn_reg, param_grid, n_jobs=-1, verbose=1)
    grid_search.fit(X_train, y_train)
    Fitting 5 folds for each of 60 candidates, totalling 300 fits
    
     
    [Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
    [Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.2s
    [Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    1.4s finished
    
    Out[34]:
    GridSearchCV(cv=None, error_score=nan,
                 estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30,
                                               metric='minkowski',
                                               metric_params=None, n_jobs=None,
                                               n_neighbors=5, p=2,
                                               weights='uniform'),
                 iid='deprecated', n_jobs=-1,
                 param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                              'weights': ['uniform']},
                             {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                              'p': [1, 2, 3, 4, 5], 'weights': ['distance']}],
                 pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
                 scoring=None, verbose=1)
    grid_search.best_params_
    {'n_neighbors': 6, 'p': 1, 'weights': 'distance'}
    grid_search.best_score_
    0.6243135119018297
    grid_search.best_estimator_.score(X_test, y_test)
    0.7353138117643773
  • 相关阅读:
    used内存较大,实际top查看系统进程中并没有占用这么多内存
    查看LINUX进程内存占用情况
    关于ConcurrentHashMap的key和value不能为null的深层次原因
    Linux修改用户所在组方法
    原因可能是托管的PInvoke签名与非托管的目标签名不匹配
    vs2019 实现C#调用c++的dll两种方法
    java jvm 参数 -Xms -Xmx -Xmn -Xss 调优总结
    java 读取文件的几种方式和通过url获取文件
    Idea中Maven的默认配置 (非常好)
    去哪儿网models数据更新
  • 原文地址:https://www.cnblogs.com/waterr/p/14039611.html
Copyright © 2011-2022 走看看