zoukankan      html  css  js  c++  java
  • 集成学习算法模板

    # 下面针对多个模型进行集成操作
    from sklearn.svm import SVC, LinearSVC
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.kernel_approximation import Nystroem
    from sklearn.kernel_approximation import RBFSampler
    from sklearn.pipeline import make_pipeline
    SEED=666
    def get_models():
        """Generate a library of base learners."""
        nb = GaussianNB()
        svc = SVC(C=100, probability=True)
        knn = KNeighborsClassifier(n_neighbors=3)
        lr = LogisticRegression(C=100, random_state=SEED)
        nn = MLPClassifier((80, 10), early_stopping=False, random_state=SEED)
        gb = GradientBoostingClassifier(n_estimators=100, random_state=SEED)
        rf = RandomForestClassifier(n_estimators=10, max_features=3, random_state=SEED)
    
        models = {'svm': svc,
                  'knn': knn,
                  'naive bayes': nb,
                  'mlp-nn': nn,
                  'random forest': rf,
                  'gbm': gb,
                  'logistic': lr,
                  }
    
        return models
    def train_predict(model_list):
        """Fit models in list on training set and return preds"""
        P = np.zeros((y_test.shape[0], len(model_list)))
        P = pd.DataFrame(P)
    
        print("Fitting models.")
        cols = list()
        for i, (name, m) in enumerate(models.items()):
            print("%s..." % name, end=" ", flush=False)
            m.fit(X_train, y_train)
            P.iloc[:, i] = m.predict_proba(X_test)[:, 1]
            cols.append(name)
            print("done")
    
        P.columns = cols
        print("Done.
    ")
        return P
    def score_models(P, y):
        """Score model in prediction DF"""
        print("Scoring models.")
        for m in P.columns:
            score = roc_auc_score(y, P.loc[:, m])
            print("%-26s: %.3f" % (m, score))
        print("Done.
    ")
    
    

    meta_learner = GradientBoostingClassifier(
    n_estimators=1000,
    loss="exponential",
    max_features=4,
    max_depth=3,
    subsample=0.5,
    learning_rate=0.005,
    random_state=SEED
    )

    from mlens.ensemble import SuperLearner

    # Instantiate the ensemble with 10 folds
    sl = SuperLearner(
        folds=5,
        random_state=SEED,
        verbose=2,
        backend="multiprocessing"
    )
    
    # Add the base learners and the meta learner
    sl.add(list(get_models().values()), proba=True) 
    sl.add_meta(meta_learner, proba=True)
    
    # Train the ensemble
    sl.fit(Stan_X,y_train)
    
    # Predict the test set
    p_sl = sl.predict_proba(Stan_X_test)

     效果查看:

    from sklearn.metrics import roc_auc_score
    print(" Super Learner ROC-AUC score: %.3f" % roc_auc_score(y_test, p_sl[:, 1]))

    根据预测结果,取出标签

    import numpy
    Y_prelast=numpy.argmax(p_sl,axis=1)
  • 相关阅读:
    jmeter linux使用经验小结
    同步两台linux服务器时间同步方案
    jsp空页面导致的jvm heap溢出
    Struts2 interceptor使用经验小结
    转--Server “**” has shut down the connection prematurely一例分析
    Tomcat HTTP/1.1 Connector 参数整理
    严重: The web application [] registered the JDBC driver 错误
    JavaScript那些事
    jstl c标签 ”test does not support runtime expressions“
    SpringMvc文件资源防止被外链链接
  • 原文地址:https://www.cnblogs.com/wangzhenghua/p/11240531.html
Copyright © 2011-2022 走看看