转载:https://github.com/LearningFromBest/CMB-credit-card-department-prediction-of-purchasing-behavior-in-consumer-finance-scenario/blob/master/stacking.py
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split
import numpy as np
class Stacking(BaseEstimator):
def __init__(self,base_models,meta_model,folds=5):
self.base_models = base_models
self.meta_models = meta_model
self.folds = folds
def fit(self,X,y):
self.base_models_ = [[] for x in self.base_models]
self.meta_models_ = clone(self.meta_models)
kfold = KFold(n_splits=self.folds,shuffle=True,random_state=156)
#用于存储每一折每个估计器的输出,拼接成行的特征
matrix_out_fit = np.zeros(X.shape[0],len(self.base_models))
for i , model in enumerate(self.base_models):
for train_index , test_index in kfold.split(X,y):
temp_model = clone(model)
self.base_models_[i].append(temp_model)
temp_model.fit(X[train_index],y[train_index])
matrix_out_fit[test_index,i] = temp_model.predict(X[test_index])
self.meta_models_.fit(matrix_out_fit,y)
return self
def predict(self,X):
meta_features = np.column_stack([np.column_stack([model.predict(X) for model in base_models]).mean(axis=1) for base_models in self.base_models_])
return self.meta_model_.predict(meta_features)