暂时还没有搞清楚xgboost中每一个树的权重是怎么样的,以及每个树的结果和最终的结果之间的关系是怎么样的?后面再补上,
下面如何xgboost中的决策树
# -*- coding: utf-8 -*- """ Created on Tue Mar 9 16:16:56 2021 @author: Administrator """ #%%导入模块 import pandas as pd import numpy as np from scipy import stats import seaborn as sns import matplotlib.pyplot as plt %matplotlib inline plt.rc("font",family="SimHei",size="12") #解决中文无法显示的问题 #%%导入数据 creditcard = pd.read_csv('D:/信用卡欺诈检测/creditcard.csv/creditcard.csv') creditcard.info() import xgboost as xgb from xgboost import XGBClassifier from xgboost import plot_tree import matplotlib.pyplot as plt X = creditcard.iloc[:,0:-1] y = creditcard.Class model = XGBClassifier(max_depth=4, n_estimators=200, learn_rate=0.1) model.fit(X, y) def ceate_feature_map(features): outfile = open('xgb.fmap', 'w') i = 0 for feat in features: outfile.write('{0} {1} q '.format(i, feat)) i = i + 1 outfile.close() ''' X_train.columns在第一段代码中也已经设置过了。 特别需要注意:列名字中不能有空格。 ''' ceate_feature_map(X.columns) plot_tree(model, num_trees=199, fmap='xgb.fmap') fig = plt.gcf() fig.set_size_inches(150, 100) #plt.show() fig.savefig('tree.png')
下面简单介绍一下plot_tree参数