zoukankan      html  css  js  c++  java
  • Xgboost参数调节

    转自:https://segmentfault.com/a/1190000014040317

    整体:

    # 1.调试n_estimators
    cv_params = {'n_estimators': [550, 575, 600, 650, 675]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 600, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
    'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    # 2.调试max_depth、min_child_weight
    # cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [1, 2, 3, 4, 5, 6]}
    # other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
    # 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    # 3.调试gamma
    # cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]}
    # other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
    # 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    # 4.调试subsample、colsample_bytree
    # cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}
    # other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
    # 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
    # 5.调试reg_alpha、reg_lambda
    # cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]}
    # other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
    # 'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 0, 'reg_lambda': 1}
    # 6.调试learning_rate
    # cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}
    # other_params = {'learning_rate': 0.1, 'n_estimators': 550, 'max_depth': 4, 'min_child_weight': 5, 'seed': 0,
    # 'subsample': 0.7, 'colsample_bytree': 0.7, 'gamma': 0.1, 'reg_alpha': 1, 'reg_lambda': 1}
    
    model = xgb.XGBClassifier(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, cv=5, verbose=1, n_jobs=4)
    optimized_GBM.fit(X_train, y_train)
    evalute_result = optimized_GBM.grid_scores_
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    1.调节最大迭代次数n_estimators

    # 最佳迭代次数:n_estimators
    from xgboost import XGBRegressor
    from sklearn.model_selection import GridSearchCV
    cv_params = {'n_estimators': [20,30,40]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 500, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    2.调试的参数是min_child_weight以及max_depth:

    # 调试的参数是min_child_weight以及max_depth:
    cv_params = {'max_depth': [3, 4, 5, 6, 7, 8, 9, 10], 'min_child_weight': [6,7,8]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 5, 'min_child_weight': 1, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    3.调试参数:gamma:

    # 调试参数:gamma:
    cv_params = {'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=-1)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    4. 调试subsample以及colsample_bytree:

    # 调试subsample以及colsample_bytree:
    cv_params = {'subsample': [0.6, 0.7, 0.8, 0.9], 'colsample_bytree': [0.6, 0.7, 0.8, 0.9]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    5.调试reg_alpha以及reg_lambda:

    # 调试reg_alpha以及reg_lambda:
    cv_params = {'reg_alpha': [0.05, 0.1, 1, 2, 3], 'reg_lambda': [0.05, 0.1, 1, 2, 3]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))

    6.调试learning_rate:

    # 调试learning_rate,一般这时候要调小学习率来测试:
    cv_params = {'learning_rate': [0.01, 0.05, 0.07, 0.1, 0.2]}
    other_params = {'learning_rate': 0.1, 'n_estimators': 20, 'max_depth': 4, 'min_child_weight': 6, 'seed': 0,
                        'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 1}
    model = XGBRegressor(**other_params)
    optimized_GBM = GridSearchCV(estimator=model, param_grid=cv_params, scoring='r2', cv=3, verbose=1, n_jobs=4)
    optimized_GBM.fit(x_data, y_data)
    evalute_result =optimized_GBM.return_train_score
    print('每轮迭代运行结果:{0}'.format(evalute_result))
    print('参数的最佳取值:{0}'.format(optimized_GBM.best_params_))
    print('最佳模型得分:{0}'.format(optimized_GBM.best_score_))
  • 相关阅读:
    Python 入门的一些练习题
    Parallel Data Augmentation for Formality Style Transfer 阅读
    Windows Internals 笔记——用户模式下的线程同步
    RSA算法原理(转)
    Ubuntu Git Server 搭建(Gitosis)
    Linux Make 报错:make: *** /lib/modules/3.10.0-1127.el7.x86_64/build: no such file or directory. stop.
    Centos 配置串口连接
    编写一个程序,将字符数组s2中的全部字符复制到字符数组s1中,不用strcpy函数。复制时,‘’也要赋值过去。''之后的字符不复制
    编写一个程序,将连个字符串s1和s2比较,如果s1 > s2,输出一个整数;若s1 = s2,输出0;若s1 < s2,输出一个负数。不要用strcpy函数。两个字符串用gets函数读入。输出的正数或负数的绝对值应是相比较的两个字符串相对应字符的ASCII码的差值。
    编一程序,将两个字符串连接起来,不要用strcat函数
  • 原文地址:https://www.cnblogs.com/wangzhenghua/p/11251462.html
Copyright © 2011-2022 走看看