直接上代码,和我之前的SVM差不多,都是使用了sklearn库
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
import numpy as np
import graphviz
def load_data(filename):
data = np.genfromtxt(filename, delimiter=' ')
x = data[:, 1:]
y = data[:, 0].astype(int)
scaler = StandardScaler() # 标准化函数
x_std = scaler.fit_transform(x) # 标准化
# 将数据划分为训练集和测试集,test_size=.5表示50%的测试集
x_train, x_test, y_train, y_test = train_test_split(x_std, y, test_size=.5)
print(len(x_train), len(x_test), len(y_train), len(y_test))
return x_train, x_test, y_train, y_test
def dec_tree(x_train, x_test, y_train, y_test):
clf = tree.DecisionTreeClassifier()
clf = clf.fit(x_train, y_train)
# answer = clf.predict(x_test) 预测
print(clf.score(x_test, y_test))
dot_data = tree.export_graphviz(clf, out_file=None)
graph = graphviz.Source(dot_data)
graph.render(r"tree.dot")
if __name__ == '__main__':
dec_tree(*load_data('txt/10/frame505/all.txt'))
import graphviz,我导入了graphviz,最后会生成一个dot文件,所以大家如果想要图形化查看dot文件,需要下载graphviz。
下载好之后,进入到终端,win+R,输入cmd,进入dot所在文件夹,使用
graphviz -Tpdf ***.dot -o ***.pdf
输入上面命令运行可以在当前文件夹生成pdf。
好了,这就是决策树的sklearn库实现,不调用库的实现方法后续给出