zoukankan      html  css  js  c++  java
  • sklearn Model-selection + Pipeline

    1 GridSearch

    import numpy as np
    
    from sklearn.datasets import load_digits
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.grid_search import GridSearchCV
    from sklearn.grid_search import RandomizedSearchCV
    
    # 生成数据
    digits = load_digits()
    X, y = digits.data, digits.target
    
    # 元分类器
    meta_clf = RandomForestClassifier(n_estimators=20)
    
    # =================================================================
    # 设置参数
    param_dist = {"max_depth": [3, None],
                  "max_features": sp_randint(1, 11),
                  "min_samples_split": sp_randint(1, 11),
                  "min_samples_leaf": sp_randint(1, 11),
                  "bootstrap": [True, False],
                  "criterion": ["gini", "entropy"]}
    
    # 运行随机搜索 RandomizedSearch
    n_iter_search = 20
    rs_clf = RandomizedSearchCV(meta_clf, param_distributions=param_dist,
                                       n_iter=n_iter_search)
    
    start = time()
    rs_clf.fit(X, y)
    print("RandomizedSearchCV took %.2f seconds for %d candidates"
          " parameter settings." % ((time() - start), n_iter_search))
    print(rs_clf.grid_scores_)

    2search

    # =================================================================
    # 设置参数
    param_grid = {"max_depth": [3, None],
                  "max_features": [1, 3, 10],
                  "min_samples_split": [1, 3, 10],
                  "min_samples_leaf": [1, 3, 10],
                  "bootstrap": [True, False],
                  "criterion": ["gini", "entropy"]}
    
    # 运行网格搜索 GridSearch
    gs_clf = GridSearchCV(meta_clf, param_grid=param_grid)
    start = time()
    gs_clf.fit(X, y)
    
    print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
          % (time() - start, len(gs_clf.grid_scores_)))
    print(gs_clf.grid_scores_)

    3

     1 from sklearn import svm
     2 from sklearn.datasets import samples_generator
     3 from sklearn.feature_selection import SelectKBest
     4 from sklearn.feature_selection import f_regression
     5 from sklearn.pipeline import Pipeline
     6 
     7 # 生成数据
     8 X, y = samples_generator.make_classification(n_informative=5, n_redundant=0, random_state=42)
     9 
    10 # 定义Pipeline,先方差分析,再SVM
    11 anova_filter = SelectKBest(f_regression, k=5)
    12 clf = svm.SVC(kernel='linear')
    13 pipe = Pipeline([('anova', anova_filter), ('svc', clf)])
    14 
    15 # 设置anova的参数k=10,svc的参数C=0.1(用双下划线"__"连接!)
    16 pipe.set_params(anova__k=10, svc__C=.1)
    17 pipe.fit(X, y)
    18 
    19 prediction = pipe.predict(X)
    20 
    21 pipe.score(X, y)                        
    22 
    23 # 得到 anova_filter 选出来的特征
    24 s = pipe.named_steps['anova'].get_support()
    25 print(s)
  • 相关阅读:
    【神经网络】LSTM 网络
    【Linux】利用Xvfb关闭chrome的图形化输出
    性能测试面试题:如何找到并发数、平均响应时间、tps的最佳平衡点?
    jmeter引用jar包的3种方式
    flask如何返回真正意义上的json字符串?以及中文如何正常显示?
    记录一次群答问:requests获取cookie
    【笔试题】python文件操作
    JMeter5.1开发http协议接口之form表单脚本
    【笔试题】面向对象小测试(二)
    【笔试题】面向对象小测试(一)
  • 原文地址:https://www.cnblogs.com/zle1992/p/6027207.html
Copyright © 2011-2022 走看看