zoukankan      html  css  js  c++  java
  • 决策树python建模中的坑 :ValueError: Expected 2D array, got 1D array instead:

    决策树python建模中的坑

    代码

    #coding=utf-8

    from sklearn.feature_extraction import DictVectorizer
    import csv
    from sklearn import tree
    from sklearn import preprocessing
    from sklearn.externals.six import StringIO

    allElectronicsData = open(r"D:workspacepythonfilesAllElectronics.csv")

    reader = csv.reader(allElectronicsData)
    headers = reader.next()
    print (headers)
    featureList = []
    labelList = []

    for row in reader:
    labelList.append(row[len(row)-1])
    rowDict = {}
    for i in range(1,len(row)-1):
    rowDict[headers[i]]=row[i]
    featureList.append(rowDict)
    print (featureList)
    #Vetorrize features
    vec = DictVectorizer()
    dummyX = vec.fit_transform(featureList).toarray()
    print ("dummyx:" + str(dummyX))
    print (vec.get_feature_names())

    print ("labelList:" + str(labelList))
    # vectorize class labels
    lb =preprocessing.LabelBinarizer()
    dummyY = lb.fit_transform(labelList)
    print ("dummyY:"+ str(dummyY))

    #Using decision tree for classification
    clf = tree.DecisionTreeClassifier(criterion='entropy')
    clf =clf.fit(dummyX,dummyY)
    print ("clf:"+str(clf))

    #Visualize mpdel
    with open("allElectornicinformationGainOri.dot",'w')as f:
    f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
    #dot 转化成pdf 树:dot -Tpdf " " -o output.pdf
    oneRowx = dummyX[0,:]
    print ("oneRowx"+str(oneRowx))
    #测试模型
    newRowX = oneRowx
    #这里有个坑,一定要注意维度 numpy!!!
    newRowX[0] = 0
    newRowX[2] = 1
    newRowX.reshape(1, -1)
    print ("newRowx:" + str(newRowX))

    predictedY = clf.predict(oneRowx)
    print ("predictedY"+str(predictedY))

    错误如下

    Traceback (most recent call last):
    File "D:/workspace/python/.idea/decision_tree.py", line 55, in <module>
    predictedY = clf.predict(oneRowx)
    File "C:Python27libsite-packagessklearn ree ree.py", line 412, in predict
    X = self._validate_X_predict(X, check_input)
    File "C:Python27libsite-packagessklearn ree ree.py", line 373, in _validate_X_predict
    X = check_array(X, dtype=DTYPE, accept_sparse="csr")
    File "C:Python27libsite-packagessklearnutilsvalidation.py", line 441, in check_array
    "if it contains a single sample.".format(array))
    ValueError: Expected 2D array, got 1D array instead:
    array=[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.].
    Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

    修正后代码:

    #coding=utf-8

    from sklearn.feature_extraction import DictVectorizer
    import csv
    from sklearn import tree
    from sklearn import preprocessing
    from sklearn.externals.six import StringIO

    allElectronicsData = open(r"D:workspacepythonfilesAllElectronics.csv")

    reader = csv.reader(allElectronicsData)
    headers = reader.next()
    print (headers)
    featureList = []
    labelList = []

    for row in reader:
    labelList.append(row[len(row)-1])
    rowDict = {}
    for i in range(1,len(row)-1):
    rowDict[headers[i]]=row[i]
    featureList.append(rowDict)
    print (featureList)
    #Vetorrize features
    vec = DictVectorizer()
    dummyX = vec.fit_transform(featureList).toarray()
    print ("dummyx:" + str(dummyX))
    print (vec.get_feature_names())

    print ("labelList:" + str(labelList))
    # vectorize class labels
    lb =preprocessing.LabelBinarizer()
    dummyY = lb.fit_transform(labelList)
    print ("dummyY:"+ str(dummyY))

    #Using decision tree for classification
    clf = tree.DecisionTreeClassifier(criterion='entropy')
    clf =clf.fit(dummyX,dummyY)
    print ("clf:"+str(clf))

    #Visualize mpdel
    with open("allElectornicinformationGainOri.dot",'w')as f:
    f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
    #dot 转化成pdf 树:dot -Tpdf " " -o output.pdf
    oneRowx = dummyX[0,:].reshape(1, -1)
    print ("oneRowx"+str(oneRowx))
    #测试模型
    newRowX = oneRowx
    #这里有个坑,一定要注意维度 numpy!!!
    newRowX[0][0] = 0
    newRowX[0][2] = 1
    newRowX.reshape(1, -1)print ("newRowx:" + str(newRowX))
    predictedY = clf.predict(oneRowx)
    print ("predictedY"+str(predictedY))

    运行结果:

    C:Python27python.exe D:/workspace/python/.idea/decision_tree.py
    ['RID', 'age', 'income', 'student', 'credit_rating', 'class_buys_computer']
    [{'credit_rating': 'fair', 'age': 'youth', 'student': 'no', 'income': 'high'}, {'credit_rating': 'excellent', 'age': 'youth', 'student': 'no', 'income': 'high'}, {'credit_rating': 'fair', 'age': 'middle_aged', 'student': 'no', 'income': 'high'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'excellent', 'age': 'senior', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'excellent', 'age': 'middle_aged', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'fair', 'age': 'youth', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'youth', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'yes', 'income': 'medium'}, {'credit_rating': 'excellent', 'age': 'youth', 'student': 'yes', 'income': 'medium'}, {'credit_rating': 'excellent', 'age': 'middle_aged', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'middle_aged', 'student': 'yes', 'income': 'high'}, {'credit_rating': 'excellent', 'age': 'senior', 'student': 'no', 'income': 'medium'}]
    dummyx:[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]
    [0. 0. 1. 1. 0. 1. 0. 0. 1. 0.]
    [1. 0. 0. 0. 1. 1. 0. 0. 1. 0.]
    [0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]
    [0. 1. 0. 0. 1. 0. 1. 0. 0. 1.]
    [0. 1. 0. 1. 0. 0. 1. 0. 0. 1.]
    [1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
    [0. 0. 1. 0. 1. 0. 0. 1. 1. 0.]
    [0. 0. 1. 0. 1. 0. 1. 0. 0. 1.]
    [0. 1. 0. 0. 1. 0. 0. 1. 0. 1.]
    [0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]
    [1. 0. 0. 1. 0. 0. 0. 1. 1. 0.]
    [1. 0. 0. 0. 1. 1. 0. 0. 0. 1.]
    [0. 1. 0. 1. 0. 0. 0. 1. 1. 0.]]
    ['age=middle_aged', 'age=senior', 'age=youth', 'credit_rating=excellent', 'credit_rating=fair', 'income=high', 'income=low', 'income=medium', 'student=no', 'student=yes']
    labelList:['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
    dummyY:[[0]
    [0]
    [1]
    [1]
    [1]
    [0]
    [1]
    [0]
    [1]
    [1]
    [1]
    [1]
    [1]
    [0]]
    clf:DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
    max_features=None, max_leaf_nodes=None,
    min_impurity_decrease=0.0, min_impurity_split=None,
    min_samples_leaf=1, min_samples_split=2,
    min_weight_fraction_leaf=0.0, presort=False, random_state=None,
    splitter='best')
    oneRowx[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]
    newRowx:[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]
    predictedY[0]

    总结:注意 维度,标红位置

  • 相关阅读:
    开源数据汇集工具
    scrapy定时执行抓取任务
    xpath的常见操作
    ubuntu 安装python mysqldb
    sudo: /etc/sudoers is owned by uid 755, should be 0
    ubuntu 14.04安装mysql数据库
    win7 远程桌面连接centos 6.5
    本地启动spark-shell
    ubuntu 安装 2.10.x版本的scala
    unfolding maps支持中文
  • 原文地址:https://www.cnblogs.com/mobiwangyue/p/8243979.html
Copyright © 2011-2022 走看看