zoukankan      html  css  js  c++  java
  • 集成学习算法模板

    # 下面针对多个模型进行集成操作
    from sklearn.svm import SVC, LinearSVC
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.neural_network import MLPClassifier
    from sklearn.kernel_approximation import Nystroem
    from sklearn.kernel_approximation import RBFSampler
    from sklearn.pipeline import make_pipeline
    SEED=666
    def get_models():
        """Generate a library of base learners."""
        nb = GaussianNB()
        svc = SVC(C=100, probability=True)
        knn = KNeighborsClassifier(n_neighbors=3)
        lr = LogisticRegression(C=100, random_state=SEED)
        nn = MLPClassifier((80, 10), early_stopping=False, random_state=SEED)
        gb = GradientBoostingClassifier(n_estimators=100, random_state=SEED)
        rf = RandomForestClassifier(n_estimators=10, max_features=3, random_state=SEED)
    
        models = {'svm': svc,
                  'knn': knn,
                  'naive bayes': nb,
                  'mlp-nn': nn,
                  'random forest': rf,
                  'gbm': gb,
                  'logistic': lr,
                  }
    
        return models
    def train_predict(model_list):
        """Fit models in list on training set and return preds"""
        P = np.zeros((y_test.shape[0], len(model_list)))
        P = pd.DataFrame(P)
    
        print("Fitting models.")
        cols = list()
        for i, (name, m) in enumerate(models.items()):
            print("%s..." % name, end=" ", flush=False)
            m.fit(X_train, y_train)
            P.iloc[:, i] = m.predict_proba(X_test)[:, 1]
            cols.append(name)
            print("done")
    
        P.columns = cols
        print("Done.
    ")
        return P
    def score_models(P, y):
        """Score model in prediction DF"""
        print("Scoring models.")
        for m in P.columns:
            score = roc_auc_score(y, P.loc[:, m])
            print("%-26s: %.3f" % (m, score))
        print("Done.
    ")
    
    

    meta_learner = GradientBoostingClassifier(
    n_estimators=1000,
    loss="exponential",
    max_features=4,
    max_depth=3,
    subsample=0.5,
    learning_rate=0.005,
    random_state=SEED
    )

    from mlens.ensemble import SuperLearner

    # Instantiate the ensemble with 10 folds
    sl = SuperLearner(
        folds=5,
        random_state=SEED,
        verbose=2,
        backend="multiprocessing"
    )
    
    # Add the base learners and the meta learner
    sl.add(list(get_models().values()), proba=True) 
    sl.add_meta(meta_learner, proba=True)
    
    # Train the ensemble
    sl.fit(Stan_X,y_train)
    
    # Predict the test set
    p_sl = sl.predict_proba(Stan_X_test)

     效果查看:

    from sklearn.metrics import roc_auc_score
    print(" Super Learner ROC-AUC score: %.3f" % roc_auc_score(y_test, p_sl[:, 1]))

    根据预测结果,取出标签

    import numpy
    Y_prelast=numpy.argmax(p_sl,axis=1)
  • 相关阅读:
    8.20Java之反射机制的基本概念
    8.18Go语言之字符串
    Debug
    Feign
    Nacos
    SpringCloud Alibaba
    SpringCloud
    Maven
    Maven
    Jenkins
  • 原文地址:https://www.cnblogs.com/wangzhenghua/p/11240531.html
Copyright © 2011-2022 走看看