RID age income student credit_rating Class:buys_computer 1 youth high no fair no 2 youth high no excellent no 3 middle_aged high no fair no 4 senior medium no fair yes 5 senior low yes fair yes 6 senior low yes excellent no 7 middle_aged low yes excellent yes 8 youth medium no fair no 9 youth low yes fair yes 10 senior medium yes fair yes 11 youth medium yes excellent yes 12 middle_aged medium no excellent yes 13 middle_aged high yes fair yes 14 senior medium no excellent no
from sklearn.feature_extraction import DictVectorizer from sklearn import preprocessing from sklearn import tree import csv import numpy as np all_csv_data = open('DT.csv', 'rt') reader = csv.reader(all_csv_data) header = next(reader) feature_list = [] label_list = [] for row in reader: label_list.append(row[len(row) - 1]) row_dict = {} for i in range(1, len(row)-1): row_dict[header[i]] = row[i] feature_list.append(row_dict) #print(feature_list) vec = DictVectorizer() dummyX = vec.fit_transform(feature_list).toarray() print(dummyX) lb = preprocessing.LabelBinarizer() dummyY = lb.fit_transform(label_list) clf = tree.DecisionTreeClassifier(criterion = 'entropy') clf = clf.fit(dummyX, dummyY) with open("dt.dot", 'w') as f: f = tree.export_graphviz(clf, feature_names=vec.get_feature_names(), out_file=f) OneRowX = dummyX[0,:] print("OneRowX: "+str(OneRowX)) #newRowX = OneRowX #newRowX[0] = 1 #newRowX[1] = 0 #print("NewRowX: "+str(newRowX)) predY = clf.predict(dummyX) print("predY :"+str(predY)) print("dummyX:"+str(np.array(dummyY).transpose()))