import csv from sklearn.feature_extraction import DictVectorizer from sklearn import preprocessing from sklearn import tree film_data = open('fime.csv','rt') reader = csv.reader(film_data) headers = next(reader) feature_list=[] #特征值 result_list=[] #结果 for row in reader: # 结果 result_list.append(row[-1]) # 去掉没用的信息列 feature_list.append(dict(zip(headers[1:-1],row[1:-1]))) # 对特征值扁平化处理,结果集亦然 vec = DictVectorizer() dummyX = vec.fit_transform(feature_list).toarray() dummyY = preprocessing.LabelBinarizer().fit_transform(result_list) clf = tree.DecisionTreeClassifier(criterion='entropy',random_state=0) clf = clf.fit(dummyX,dummyY) print('clf:'+str(clf)) # 将这个树可视化 import pydotplus dot_data = tree.export_graphviz(clf, feature_names=vec.get_feature_names(), filled=True,rounded=True, special_characters=True, out_file=None ) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_pdf("film.pdf") #预测 predict_result=clf.predict()