zoukankan      html  css  js  c++  java
  • 封装GridSearchCV的训练包

    import xgboost as xgb
    from sklearn.model_selection import GridSearchCV
    from sklearn.metrics import make_scorer
    from sklearn.metrics import accuracy_score
    from sklearn.datasets import load_breast_cancer  
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import ParameterGrid
    from sklearn.model_selection import ParameterSampler
    from scipy.stats.distributions import expon
    import numpy as np
    
    
    
    #从给定分布中生成采样参数
    np.random.seed(0)
    param = {"a":[1,2],"b":expon()}
    param = list(ParameterSampler(param,n_iter=4))
    
    
    
    ##################################################################################
    
    
    def get_model_GridSearchCV(estimator,parameters,X_train,y_train,scoring,cv=5): 
        """
        return:返回训练过的最好模型
        """
        
        #refit:Refit an estimator using the best found parameters on the whole dataset.
        model = GridSearchCV(estimator=estimator,param_grid=parameters,scoring=scoring,cv=5,refit=True)    
        
        model.fit(X_train, y_train)
        
        #打印结果
        print("best score in GridSearchCV:
    ",model.best_score_)
        print("best param in GridSearchCV:
    ",model.best_params_)     
        
        return model.best_estimator_
    
    
    #########################################测试########################################
    
    X,y = load_breast_cancer(return_X_y=True)  
    
    #分隔训练集和测试集
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0,stratify=y)
    
    #配置参数
    param = [{
             "learning_rate":[0.1,0.3,0.6],
             "max_depth":[5,6,7],
             "n_estimators":[100,200,300], 
            }]
    
    
    #scoring = make_scorer(accuracy_score, greater_is_better=True)
    
    estimator = xgb.XGBClassifier(objective="reg:logistic")
    
    #训练模型
    model = get_model_GridSearchCV(estimator=estimator,
                                   parameters=param,
                                   cv=5,
                                   X_train=X_train,
                                   y_train=y_train,
                                   scoring="roc_auc")
    
    #采用训练得模型做测试
    """
    decision_function(*args, **kwargs)    Call decision_function on the estimator with the best found parameters.
    fit(X[, y, groups])    Run fit with all sets of parameters.
    get_params([deep])    Get parameters for this estimator.
    inverse_transform(*args, **kwargs)    Call inverse_transform on the estimator with the best found params.
    predict(*args, **kwargs)    Call predict on the estimator with the best found parameters.
    predict_log_proba(*args, **kwargs)    Call predict_log_proba on the estimator with the best found parameters.
    predict_proba(*args, **kwargs)    Call predict_proba on the estimator with the best found parameters.
    score(X[, y])    Returns the score on the given data, if the estimator has been refit.
    set_params(**params)    Set the parameters of this estimator.
    transform(*args, **kwargs)    Call transform on the estimator with the best found parameters.
    """
    y_pred = model.predict(X_test)
    
    #模型评价
    print(accuracy_score(y_test,y_pred))
  • 相关阅读:
    三年Android开发经验,挥泪整理字节跳动、微软中国凉经,你不看看吗?
    App怎么做才能永不崩溃
    做了八年的Android开发,谁不是一边崩溃,一边默默坚守!
    阿里员工年年绩效A,晒出收入后感叹:996虽然痛苦,发钱时候真香
    2021阅读书单
    不动产测绘概念
    Elasticsearch 集成
    Elasticsearch 环境
    Elasticsearch 优化
    Elasticsearch入门
  • 原文地址:https://www.cnblogs.com/wzdLY/p/9840192.html
Copyright © 2011-2022 走看看