# 下面针对多个模型进行集成操作 from sklearn.svm import SVC, LinearSVC from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.neural_network import MLPClassifier from sklearn.kernel_approximation import Nystroem from sklearn.kernel_approximation import RBFSampler from sklearn.pipeline import make_pipeline
SEED=666 def get_models(): """Generate a library of base learners.""" nb = GaussianNB() svc = SVC(C=100, probability=True) knn = KNeighborsClassifier(n_neighbors=3) lr = LogisticRegression(C=100, random_state=SEED) nn = MLPClassifier((80, 10), early_stopping=False, random_state=SEED) gb = GradientBoostingClassifier(n_estimators=100, random_state=SEED) rf = RandomForestClassifier(n_estimators=10, max_features=3, random_state=SEED) models = {'svm': svc, 'knn': knn, 'naive bayes': nb, 'mlp-nn': nn, 'random forest': rf, 'gbm': gb, 'logistic': lr, } return models def train_predict(model_list): """Fit models in list on training set and return preds""" P = np.zeros((y_test.shape[0], len(model_list))) P = pd.DataFrame(P) print("Fitting models.") cols = list() for i, (name, m) in enumerate(models.items()): print("%s..." % name, end=" ", flush=False) m.fit(X_train, y_train) P.iloc[:, i] = m.predict_proba(X_test)[:, 1] cols.append(name) print("done") P.columns = cols print("Done. ") return P def score_models(P, y): """Score model in prediction DF""" print("Scoring models.") for m in P.columns: score = roc_auc_score(y, P.loc[:, m]) print("%-26s: %.3f" % (m, score)) print("Done. ")
meta_learner = GradientBoostingClassifier(
n_estimators=1000,
loss="exponential",
max_features=4,
max_depth=3,
subsample=0.5,
learning_rate=0.005,
random_state=SEED
)
from mlens.ensemble import SuperLearner
# Instantiate the ensemble with 10 folds sl = SuperLearner( folds=5, random_state=SEED, verbose=2, backend="multiprocessing" ) # Add the base learners and the meta learner sl.add(list(get_models().values()), proba=True) sl.add_meta(meta_learner, proba=True) # Train the ensemble sl.fit(Stan_X,y_train) # Predict the test set p_sl = sl.predict_proba(Stan_X_test)
效果查看:
from sklearn.metrics import roc_auc_score
print("
Super Learner ROC-AUC score: %.3f" % roc_auc_score(y_test, p_sl[:, 1]))
根据预测结果,取出标签
import numpy Y_prelast=numpy.argmax(p_sl,axis=1)