zoukankan      html  css  js  c++  java
  • mooc机器学习第六天-K近邻,决策树,朴素贝叶斯分类器简单尝试

    1.下面的代码是上一篇理论中的小例子

    from sklearn.neighbors import KNeighborsClassifier # K近邻分类器
    from sklearn.datasets import load_iris  # 鸢尾花数据
    from sklearn.tree import DecisionTreeClassifier  #决策树分类器
    from sklearn.model_selection import cross_val_score #交叉验证值函数
    from sklearn.naive_bayes import GaussianNB #朴素贝叶斯分类器
    import  numpy as np #科学计算库
    
    
    #小示例实现顺序与导包顺序相同
    
    
    X=[[0],[1],[2],[3]]
    y=[0,0,1,1]
    
    neigh=KNeighborsClassifier(n_neighbors=3)
    neigh.fit(X,y)
    print("+++++K近邻+++++")
    print(neigh.predict([[1.2]]))
    
    
    
    clf=DecisionTreeClassifier()
    irls=load_iris()
    re=cross_val_score(clf,irls.data,irls.target,cv=10)
    print("+++++交叉验证+++++")
    print(re)
    
    print("+++++决策树+++++")
    clf.fit(X,y)
    print(clf.predict([[2.2]]))
    
    
    
    A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]])
    B=np.array([1,1,1,2,2,2])
    
    clf1=GaussianNB(priors=None)
    clf1.fit(A,B)
    r=clf1.predict([[-0.8,-1]])
    print("+++++朴素毕贝叶斯+++++")
    print(r)
    

    2.结果

      

     

    +++++K近邻+++++
    [0]
    +++++交叉验证+++++
    [ 1.          0.93333333  1.          0.93333333  0.93333333  0.86666667
      0.93333333  0.93333333  1.          1.        ]
    +++++决策树+++++
    [1]
    +++++朴素毕贝叶斯+++++
    [1]
    

     

     

    3.利用mooc给的feature数据实践

     

    import  numpy as np
    import  pandas as pd
    
    from  sklearn.preprocessing import  Imputer#数据预处理库
    from  sklearn.cross_validation import train_test_split  #打乱训练数据
    from  sklearn.metrics  import  classification_report #计算召回率,F1值,精准度
    
    
    from sklearn.neighbors import  KNeighborsClassifier
    from sklearn.tree import  DecisionTreeClassifier
    from sklearn.naive_bayes import GaussianNB
    
    
    def load_datasets(feature_path,lable_path):
        #设定shape
        feature=np.ndarray(shape=(0,41))
        lable=np.ndarray(shape=(0,1))
        #处理文件缺失值
        for file in feature_path:
            df=pd.read_table(file,delimiter=',',na_values="?",header=None)
            imp=Imputer(missing_values='NaN',strategy='mean',axis=0)
            imp.fit(df)
            #Impute all missing values in X.
            df=imp.transform(df)
            feature=np.concatenate((feature,df))
    
        for file in lable_path:
            df=pd.read_table(file,header=None)
            lable=np.concatenate((lable,df))
    
    
        lable=np.ravel(lable)
        return feature, lable
    
    
    if __name__ == '__main__':
       '''数据具体路径'''
       featurepaths=['/A/A.feature',
                    '/B/B.feature',
                    '/C/C.feature',
                    '/D/D.feature',
                    '/E/E.feature'
                    ]
       labelPaths=['/A/A.label',
                  '/B/B.label',
                  '/C/C.label',
                  '/D/D.label',
                  '/E/E.label']
    
       '''读如数据'''
       x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4])
       x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:])
       #打乱训练数据
       x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0)
    
    
       #创建三种分类器并预测
       print('Start training knn')
       knn = KNeighborsClassifier().fit(x_train, y_train)
       print('Training done')
       answer_knn = knn.predict(x_test)
       print('Prediction done')
    
       print('Start training DT')
       dt = DecisionTreeClassifier().fit(x_train, y_train)
       print('Training done')
       answer_dt = dt.predict(x_test)
       print('Prediction done')
    
       print('Start training Bayes')
       gnb = GaussianNB().fit(x_train, y_train)
       print('Training done')
       answer_gnb = gnb.predict(x_test)
       print('Prediction done')
    
    
       #结果展示
       '''
       Build a text report showing the main classification metrics
       classification_report&精确度/召回率/F1值
       '''
       print('
    
    The classification report for knn:')
       print(classification_report(y_test, answer_knn))
       print('
    
    The classification report for DT:')
       print(classification_report(y_test, answer_dt))
       print('
    
    The classification report for Bayes:')
       print(classification_report(y_test, answer_gnb))
    

      

     

     

  • 相关阅读:
    《JAVA与模式》之备忘录模式
    《JAVA与模式》之调停者模式
    如何将GTK+2.0的信号、回调处理映射成类成员函数的调用VC中测试(单类继承)
    第一个GTK程序:中国农历 附录2 libtool
    【转载】成员函数指针与高性能的C++委托
    第一个GTK程序:中国农历 附录4 动态库
    第一个GTK程序:中国农历 附录3 静态库
    【转载】C++中实现委托(Delegate)
    如何将GTK+2.0的信号、回调处理映射成类成员函数的调用VC中测试(虚拟继承类及多继承)
    第一个GTK程序:中国农历 附录5 变量名
  • 原文地址:https://www.cnblogs.com/cheflone/p/13205215.html
Copyright © 2011-2022 走看看