zoukankan      html  css  js  c++  java
  • sklearn机器学习的监督学习的各个模型主要调参参数

    #正则化:降低模型的复杂度,避免过拟合。
    
    #加载模块
    from sklearn.datasets import load_iris
    import joblib
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.linear_model import LinearRegression
    from sklearn.linear_model import Ridge
    from sklearn.linear_model import Lasso
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import LinearSVC
    from sklearn.model_selection import train_test_split
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC
    from sklearn.neural_network import MLPClassifier
    
    #分割数据集
    data = load_iris()
    X = data.data
    y = data.target
    train_X,test_X,train_y,test_y = train_test_split(X,y,test_size=0.3,random_state=2)
    
    #训练模型
    #k近邻
    kneighbor=KNeighborsClassifier(n_neighbors=3)#n_neighbors是设定邻居的个数
    #线性回归
    lr = LinearRegression()
    #岭回归
    ridge=Ridge(alpha=.01)# aipha模型简单性与训练集性能之间的权衡,alpha趋向于0,降低训练集性能,提高泛化性能
    #Lasso回归
    lasso= Lasso(alpha=.01,max_iter=100)# max_iter运行迭代的最大次数,aipha越小,模型越简单
    #logistic回归
    log=LogisticRegression(C=1)#C是正则化强度的权衡系数,C越大,模型较复杂,拟合效果越好,但可能出现过拟合
    #线性支持向量机
    linearSVC=LinearSVC(C=10)#C是正则化强度的权衡系数,C越大,模型较复杂,拟合效果越好,但可能出现过拟合
    #朴素贝叶斯分类器  GaussianNB(连续数据)、BernoulliNB(二分类数据)、MultinomialNB(计数数据)。后两个基本用于文本数据分类
    #决策树
    tree=DecisionTreeClassifier(random_state=0,max_depth=4)#random_state解决内部平局(不太理解);
                                              # max_depth是树的深度,max_leaf_nodes、min_samples_leaf三个都可以防止过拟合。
    #随机森林
    randomtree=RandomForestClassifier(n_estimators=4,random_state=2)#n_estimators多少棵树,越大,树越多,也可以防止过拟合。
                                                      # max_features决定每棵树的随机性大小,较小可以防止过拟合,一般使用默认值。
    #核支持向量机
    svc=SVC(C=1,gamma=0.1)#C是指正则化系数,C越大,模型较复杂,拟合效果越好,但可能出现过拟合
                                       #gamma用于控制高斯核宽度,决定点与点的最大距离,gamma越大,模型复杂度越高,决策边界变化越快。
    #神经网络
    mlp=MLPClassifier(hidden_layer_sizes=100,alpha=0.1)#hidden_layer_sizes隐含层数,alpha每个隐含层的正则化
    
    kneighbor.fit(train_X,train_y)
    lr.fit(train_X,train_y)
    ridge.fit(train_X,train_y)
    lasso.fit(train_X,train_y)
    log.fit(train_X,train_y)
    linearSVC.fit(train_X,train_y)
    tree.fit(train_X,train_y)
    randomtree.fit(train_X,train_y)
    svc.fit(train_X,train_y)
    mlp.fit(train_X,train_y)
    
    #将训练的模型保存到磁盘(value=模型名)   默认当前文件夹下
    joblib.dump(filename='kneighbor.model',value=kneighbor)
    joblib.dump(filename='LR.model',value=lr)
    joblib.dump(filename='Ridge.model',value=ridge)
    joblib.dump(filename='lasso.model',value=lasso)
    joblib.dump(filename='log.model',value=log)
    joblib.dump(filename='linearSVC.model',value=linearSVC)
    joblib.dump(filename='tree.model',value=tree)
    joblib.dump(filename='randomtree.model',value=randomtree)
    joblib.dump(filename='svc.model',value=svc)
    joblib.dump(filename='mlp.model',value=mlp)
    
    # 下载本地模型
    model0 = joblib.load(filename="kneighbor.model")
    model1 = joblib.load(filename="LR.model")
    model2 = joblib.load(filename="Ridge.model")
    model3 = joblib.load(filename="lasso.model")
    model4 = joblib.load(filename="log.model")
    model5 = joblib.load(filename="linearSVC.model")
    model6 = joblib.load(filename="tree.model")
    model7 = joblib.load(filename="randomtree.model")
    model8 = joblib.load(filename="svc.model")
    model9 = joblib.load(filename="mlp.model")
    #对本地模型进行第三组数据进行预测
    print(model0.predict(test_X)[2])
    print(model0.score(test_X,test_y))
    print(model1.predict(test_X)[2])
    print(model1.score(test_X,test_y))
    print(model2.predict(test_X)[2])
    print(model2.score(test_X,test_y))
    print(model3.predict(test_X)[2])
    print(model3.score(test_X,test_y))
    print(model4.predict(test_X)[2])
    print(model4.score(test_X,test_y))
    print(model5.predict(test_X)[2])
    print(model5.score(test_X,test_y))
    print(model6.predict(test_X)[2])
    print(model6.score(test_X,test_y))
    print(model7.predict(test_X)[2])
    print(model7.score(test_X,test_y))
    print(model8.predict(test_X)[2])
    print(model8.score(test_X,test_y))
    print(model9.predict(test_X)[2])
    print(model9.score(test_X,test_y))
    # 重新设置模型参数并训练
    '''model1.set_params(normalize=True).fit(train_X,train_y)
    
    #新模型做预测
    print(model1.predict(test_X))
    print(model1.score(test_X,test_y))'''
  • 相关阅读:
    j2ee学习笔记
    go开发和运行环境的配置
    Java基础学习笔记
    经典C/S服务器模型之守护进程
    linux抓包命令-tcpdump命令详解
    PostgreSQL的HA解决方案-2负载均衡(load balance)
    PostgreSQL的HA解决方案-1主从和备份(master/slave and backup)
    PostgreSQL的HA解决方案-项目概述
    将数据写到kafka的topic
    将sparkStreaming结果保存到Redshift数据库
  • 原文地址:https://www.cnblogs.com/gao109214/p/13397795.html
Copyright © 2011-2022 走看看