''' @author :Eric-chen @contact:809512722@qq.com @time :2017/12/19 16:19 @desc : ''' from sklearn.feature_extraction import DictVectorizer import csv from sklearn import preprocessing from sklearn import tree from sklearn.externals.six import StringIO #read in the csv file and put features in a list of dict and list of class label allData=open(r'F:pythonday01AllElectronics.csv','rb') reader=csv.reader(allData) headers=reader.next() print (headers) featureList=[] labelList=[] for row in reader: labelList.append(row[len(row)-1]) rowDict={} for i in range(1,len(row)-1): # print (row[i]) # print ("==") rowDict[headers[i]]=row[i] # print (rowDict) featureList.append(rowDict) print (featureList) # Vetorize features vec=DictVectorizer() dummyX=vec.fit_transform(featureList).toarray() print ("dummyX:"+str(dummyX)) print (vec.get_feature_names()) print ("labellist:"+str(labelList)) # Vectorize class labels lb=preprocessing.LabelBinarizer() dummY=lb.fit_transform(labelList) print("dummY"+str(dummY)) # Using decision tree for classification clf=tree.DecisionTreeClassifier(criterion='entropy') clf=clf.fit(dummyX,dummY) print("clf:"+str(clf)) # Visualize model with open("allData.dot",'w') as f: f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f) oneRowX=dummyX[0,:] print ("oneRowX:"+str(oneRowX)) newRowX=oneRowX newRowX[0]=1 newRowX[2]=0 print("newRowx:"+str(newRowX)) predictedY=clf.predict(newRowX.reshape(1, -1)) print("predictedY"+str(predictedY))