zoukankan      html  css  js  c++  java
  • sklearn调用多项式回归

    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    X=x.reshape(-1,1)
    np.random.seed(666)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    plt.scatter(x,y)
    plt.show()
    #多项式回归,回归为线性回归,在原来的数据上添加新的特征而已
    x2=np.hstack([X,X**2])
    from sklearn.linear_model import LinearRegression
    l=LinearRegression()
    l.fit(x2,y)
    y_pre=l.predict(x2)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    print(l.coef_)
    print(l.intercept_)

    #sklearn中多项式回归算法实现与Pipeline
    #一维数据一个特征使用多项式回归
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    X=x.reshape(-1,1)
    np.random.seed(666)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    plt.scatter(x,y)
    plt.show()
    from sklearn.preprocessing import PolynomialFeatures
    poly=PolynomialFeatures(degree=2) #定义次数为2次
    poly.fit(X)
    x2=poly.transform(X) #进行数据维度的增加,增加数据的平方
    print(x2.shape)
    print(x2[:5,:])
    from sklearn.linear_model import LinearRegression
    l=LinearRegression()
    l.fit(x2,y)
    y_pre=l.predict(x2)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    print(l.coef_)
    print(l.intercept_)
    #二维数据特征使用多项式回归
    #二项式变为(1+x+y+x2+xy+y2)6个特征的矩阵
    x=np.arange(1,11).reshape(-1,2)
    print(x.shape)
    poly=PolynomialFeatures(degree=2)
    poly.fit(x)
    x2=poly.transform(x)
    print(x2.shape)
    print(x)
    print(x2)
    #三次多项式变为(1+x+y+x2+xy+y2+x2y+xy2+y3+x3)10个特征的矩阵
    poly=PolynomialFeatures(degree=3)
    poly.fit(x)
    x2=poly.transform(x)
    print(x2.shape)
    print(x)
    print(x2)

    #Pipeline实现三步:1特征增加-2数据归一化-3线性回归
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    np.random.seed(666)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    X=x.reshape(-1,1)
    plt.scatter(x,y)
    plt.show()
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    #创建属于自己的多特征的多项式回归算法
    poly_reg=Pipeline([
    ("poly",PolynomialFeatures(degree=2)), #1特征增加-
    ("std_scaler",StandardScaler()), #2数据归一化
    ("lin-reg",LinearRegression()) #3线性回归
    ])
    poly_reg.fit(X,y)
    y_pre=poly_reg.predict(X)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()

    #机器学习过拟合和欠拟合的问题-模型泛化能力
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    np.random.seed(666)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    X=x.reshape(-1,1)
    plt.scatter(x,y)
    plt.show()
    #使用线性回归算法进行回归模型的建立,underfitting,欠拟合的情况
    from sklearn.linear_model import LinearRegression
    l=LinearRegression()
    l.fit(X,y)
    print(l.score(X,y))
    y_pre=l.predict(X)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    from sklearn.metrics import mean_squared_error
    print(mean_squared_error(y,y_pre))
    #使用多项式回归算法,建立属于自己的多项式回归算法
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.preprocessing import StandardScaler
    def PolynomailRegression(degree):
    return Pipeline([
    ("poly",PolynomialFeatures(degree=degree)), #1特征增加-
    ("std_scaler",StandardScaler()), #2数据归一化
    ("lin_reg",LinearRegression()) #3线性回归
    ])
    poly=PolynomailRegression(degree=2)
    poly.fit(X,y)
    print(poly.score(X,y))
    y_pre=poly.predict(X)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    from sklearn.metrics import mean_squared_error
    print(mean_squared_error(y,y_pre))
    #overfitting,过拟合,增加多项式回归的次数为100,严重出现过拟合,随着次数的增加,均方差会一直减小,但是模型过于复杂,所以是不合适的
    poly=PolynomailRegression(degree=100)
    poly.fit(X,y)
    print(poly.score(X,y))
    y_pre=poly.predict(X)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    from sklearn.metrics import mean_squared_error
    print(mean_squared_error(y,y_pre))
    #可以明显看到建立的模型是存在过拟合的
    x1=np.linspace(-3,3,100).reshape(100,1)
    y1=poly.predict(x1)
    plt.scatter(x,y)
    plt.plot(x1[:,0],y1,color="r")
    plt.axis([-3,3,-1,10])
    plt.show()
    #检测模型的泛化能力的函数train_test_split函数
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=666)
    l=LinearRegression()
    l.fit(x_train,y_train)
    y_pre1=l.predict(x_test)
    print(mean_squared_error(y_test,y_pre1))
    P1=PolynomailRegression(degree=2)
    P1.fit(x_train,y_train)
    y_pre2=P1.predict(x_test)
    print(mean_squared_error(y_test,y_pre2))
    P2=PolynomailRegression(degree=10)
    P2.fit(x_train,y_train)
    y_pre3=P2.predict(x_test)
    print(mean_squared_error(y_test,y_pre3))
    #学习曲线函数封装,
    #学习曲线可以可视化出模型的过拟合与欠拟合的情况
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=666)
    def plot_learning_curve(algo,x_train,x_test,y_train,y_test):
    train_score = []
    test_score = []
    for i in range(1, len(x_train)):
    algo.fit(x_train[:i], y_train[:i])
    y_train_pre = algo.predict(x_train[:i])
    y_test_pre =algo.predict(x_test)
    train_score.append(mean_squared_error(y_train[:i], y_train_pre))
    test_score.append(mean_squared_error(y_test, y_test_pre))
    plt.figure()
    plt.plot([i for i in range(1, len(x_train))], np.sqrt(train_score), "g", label="train_error")
    plt.plot([i for i in range(1, len(x_train))], np.sqrt(test_score), "r", label="test_error")
    plt.legend()
    plt.axis([0,len(x_train)+1,0,4])
    plt.show()
    #画出线性回归模型和多项式回归模型的学习曲线
    plot_learning_curve(LinearRegression(),x_train,x_test,y_train,y_test) #线性回归算法预测模型建立
    plot_learning_curve(PolynomailRegression(degree=1),x_train,x_test,y_train,y_test) #欠拟合的情况
    plot_learning_curve(PolynomailRegression(degree=2),x_train,x_test,y_train,y_test) #最佳拟合的情况
    plot_learning_curve(PolynomailRegression(degree=20),x_train,x_test,y_train,y_test)#过拟合的情况
    #交叉验证的方式来进行knn算法的查看
    #机器学习算法的交叉验证方式实现代码:
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn import datasets
    digits=datasets.load_digits()
    x=digits.data
    y=digits.target
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4,random_state=666)
    #1-1普通的训练测试方式
    from sklearn.neighbors import KNeighborsClassifier
    best_score=0
    best_p=0
    best_k=0
    for k in range(2,11):
    for p in range(1,6):
    knn1=KNeighborsClassifier(weights="distance",p=p,n_neighbors=k)
    knn1.fit(x_train,y_train)
    score=knn1.score(x_test,y_test)
    if score>best_score:
    best_p=p
    best_k=k
    best_score=score
    print("best_score:",best_score)
    print("best_k:",best_k)
    print("best_p:",best_p)
    #1-2交叉验证方式
    from sklearn.model_selection import cross_val_score
    best_p=0
    best_k=0
    best_score=0
    for k in range(2,11):
    for p in range(1,6):
    knn2=KNeighborsClassifier(weights="distance",p=p,n_neighbors=k)
    knn2.fit(x_train,y_train)
    scores=cross_val_score(knn2,x_train,y_train,cv=5) #这里的cv参数就是指将训练数据集分为几份进行交叉验证,默认为3
    score=np.mean(scores)
    if score>best_score:
    best_p=p
    best_k=k
    best_score=score
    print("best_score:",best_score)
    print("best_k:",best_k)
    print("best_p:",best_p)
    knn=KNeighborsClassifier(weights="distance",p=2,n_neighbors=2)
    knn.fit(x_train,y_train)
    print(knn.score(x_test,y_test))
    #1-3利用网格搜索的方式寻找最优的超参数组合就是对于训练数据集进行交叉验证寻找最优
    from sklearn.model_selection import GridSearchCV
    knn3=KNeighborsClassifier()
    param=[
    {
    "weights":["distance"],
    "n_neighbors":[i for i in range(2,11)],
    "p":[k for k in range(1,6)]
    }
    ]
    grid1=GridSearchCV(knn3,param,verbose=1,cv=5) #这里的cv参数就是指将训练数据集分为几份进行交叉验证,默认为3
    grid1.fit(x_train,y_train)
    print(grid1.best_score_)
    print(grid1.best_params_)
    kn2=grid1.best_estimator_
    print(kn2.score(x_test,y_test))
    #模型正则化-限制模型参数的大小,解决方差太大的问题
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    np.random.seed(666)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    X=x.reshape(-1,1)
    plt.scatter(x,y)
    plt.show()
    lin=LinearRegression()
    def PolynomailRegression(degree):
    return Pipeline([
    ("poly",PolynomialFeatures(degree=degree)), #1特征增加-
    ("std_scaler",StandardScaler()), #2数据归一化
    ("lin_reg",lin) #3线性回归
    ])
    poly=PolynomailRegression(degree=100)
    poly.fit(X,y)
    print(poly.score(X,y))
    y_pre=poly.predict(X)
    plt.scatter(x,y)
    plt.plot(np.sort(x),y_pre[np.argsort(x)],color="r")
    plt.show()
    print(lin.coef_) #各个模型的参数非常大
    #模型正则化方式
    #(1)使用岭回归的正则化方式减小模型方差,将其封装为一个函数
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    X=x.reshape(-1,1)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=666)
    from sklearn.linear_model import Ridge
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.metrics import mean_squared_error
    def Ridgeregression(degree,alpha):
    return Pipeline([("poly", PolynomialFeatures(degree=degree)),
    ("std_scaler", StandardScaler()),
    ("Ridge_reg", Ridge(alpha=alpha))
    ])
    r1=Ridgeregression(20,0.01) #随着a的增大,越来越平直
    r1.fit(x_train,y_train)
    y11=r1.predict(x_test)
    print(mean_squared_error(y11,y_test))
    plt.figure()
    plt.scatter(X,y)
    x1=np.linspace(-3,3,100).reshape(100,1)
    y1=r1.predict(x1)
    plt.plot(x1,y1,"r")
    #plt.axis([-3,3,-1,10])
    plt.show()
    #(2)使用LASSO回归的正则化方式减小模型方差,将其封装为一个函数
    #采用LASSO回归进行训练和预测
    from sklearn.linear_model import Lasso
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.metrics import mean_squared_error
    def lassoregression(degree,alpha):
    return Pipeline([("poly", PolynomialFeatures(degree=degree)),
    ("std_scaler", StandardScaler()),
    ("LASSO_reg", Lasso(alpha=alpha))
    ])
    LA1=lassoregression(20,1) #当a的值从0开始增大时,其拟合曲线的模型会越来越平直,慢慢会接近一条直线,区别于岭回归的曲线,这是由LASSO正则化数学式子决定的
    LA1.fit(x_train,y_train)
    y11=LA1.predict(x_test)
    print(mean_squared_error(y11,y_test))
    plt.figure()
    plt.scatter(X,y)
    x1=np.linspace(-3,3,100).reshape(100,1)
    y1=LA1.predict(x1)
    plt.plot(x1,y1,"r")
    #plt.axis([-3,3,-1,10])
    plt.show()
    #(3)采用普通多项式回归进行预测
    import numpy as np
    import matplotlib.pyplot as plt
    x=np.random.uniform(-3,3,size=100)
    X=x.reshape(-1,1)
    y=0.5*x**2+x+2+np.random.normal(0,1,size=100)
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=666)
    from sklearn.linear_model import Ridge
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.metrics import mean_squared_error
    from sklearn.linear_model import LinearRegression
    def polynomialRegression(degree):
    return Pipeline([("poly",PolynomialFeatures(degree=degree)),
    ("std_scaler",StandardScaler()),
    ( "lin_reg",LinearRegression())
    ])
    poly2_reg=polynomialRegression(20)
    poly2_reg.fit(X,y)
    y2=poly2_reg.predict(X)
    print(mean_squared_error(y,y2))
    print(poly2_reg.score(X,y))
    plt.figure()
    plt.scatter(X,y)
    x1=np.linspace(-3,3,100).reshape(100,1)
    y11=poly2_reg.predict(x1)
    plt.plot(x1,y11,"r")
    #plt.axis([-3,3,-1,10])
    plt.show()



  • 相关阅读:
    解决windows上安装TortoiseSVN后不能使用命令行问题
    Python里Pure paths、PurePosixPath、PureWindowsPath的区别
    PHP数组运算符
    global,local,static的区别
    echo和print的区别
    PHP中foreach循环传值问题
    Matlab入门学习(文件读写)
    Matlab入门学习(程序设计)
    IDEA中使用Maven下载依赖时报错:unable to find valid certification path to requested target
    全国县市区编码表
  • 原文地址:https://www.cnblogs.com/Yanjy-OnlyOne/p/12520782.html
Copyright © 2011-2022 走看看