zoukankan      html  css  js  c++  java
  • python多标签分类模版

      1 from sklearn.multioutput import MultiOutputClassifier
      2 from sklearn.ensemble import RandomForestClassifier
      3 from sklearn.ensemble import ExtraTreesClassifier
      4 import numpy as np
      5 from pandas import read_csv
      6 import pandas as pd
      7 
      8 root1="F:/goverment/shuili2/techproblem_text_train.csv"
      9 root2="F:/goverment/shuili2/techproblem_text_test.csv"
     10 root3="F:/goverment/shuili2/text_train_4problem.csv"
     11 root4="F:/goverment/shuili2/text_test_4problem.csv"
     12 
     13 
     14 '''大类小类一起预测'''
     15 #root2="./id="+str(id_num)+"_process_data.csv"
     16 dataset1 = read_csv(root1) #数据转化为数组
     17 dataset1=dataset1.values
     18 dataset2 = read_csv(root2) #数据转化为数组
     19 dataset2=dataset2.values
     20 X_train=dataset1[:,:28]# 到28之前都是变量
     21 Y_train=dataset1[:,28:]# 28到之后都是lable
     22 X_test=dataset2[:,:28]
     23 Y_test=dataset2[:,28:]
     24 
     25 print('多输出多分类器真实输出分类:
    ',Y_train)
     26 n_samples, n_features = X_train.shape #4000 29
     27 n_outputs = Y_train.shape[1] # 4000*8
     28 n_classes = 50 # 每种输出有50种分类
     29 forest = RandomForestClassifier(n_estimators=500,random_state=1)  # 生成随机森林多分类器
     30 multi_target_forest = MultiOutputClassifier(forest)  # 构建多输出多分类器
     31 y_pred = multi_target_forest.fit(X_train, Y_train).predict(X_train)
     32 print('多输出多分类器预测输出分类:
    ',y_pred)
     33 pp=multi_target_forest.predict(X_test)
     34 a=pp
     35 k=0
     36 for i in range(len(a)):
     37     if a[i][0]==Y_test[i][0] and a[i][1]==Y_test[i][1] and a[i][2]==Y_test[i][2] and a[i][3]==Y_test[i][3] and a[i][4]==Y_test[i][4] and a[i][5]==Y_test[i][5] and a[i][6]==Y_test[i][6] and a[i][7]==Y_test[i][7]:
     38         k+=1
     39 aa=k/1328*1.0
     40 print(aa)
     41 
     42 
     43 '''只预测大类'''
     44 #root2="./id="+str(id_num)+"_process_data.csv"
     45 dataset3 = read_csv(root1) #数据转化为数组
     46 dataset3=dataset3.values
     47 dataset4 = read_csv(root2) #数据转化为数组
     48 dataset4=dataset4.values
     49 X_train_big=dataset3[:,:28]
     50 Y_train_big=dataset3[:,28:32]
     51 X_test_big=dataset4[:,:28]
     52 Y_test_big=dataset4[:,28:32]
     53 print('只预测大类:多输出多分类器真实输出分类:
    ',Y_train_big)
     54 n_samples, n_features = X_train_big.shape #4000 29
     55 n_outputs = Y_train_big.shape[1] # 4000*8
     56 n_classes = 11 # 每种输出有11种分类
     57 forest = RandomForestClassifier(n_estimators=200,random_state=1)  # 生成随机森林多分类器
     58 multi_target_forest = MultiOutputClassifier(forest)  # 构建多输出多分类器
     59 y_pred = multi_target_forest.fit(X_train_big, Y_train_big).predict(X_train_big)
     60 print('多输出多分类器预测输出分类:
    ',y_pred)
     61 pp=multi_target_forest.predict(X_test_big)
     62 a=pp
     63 k=0
     64 for i in range(len(a)):
     65     if a[i][0]==Y_test_big[i][0] and a[i][1]==Y_test_big[i][1] and a[i][2]==Y_test_big[i][2] and a[i][3]==Y_test_big[i][3]:
     66         k+=1
     67 aa=k/1328*1.0
     68 print(aa)
     69 
     70 
     71 '''只预测小类'''
     72 #root2="./id="+str(id_num)+"_process_data.csv"
     73 dataset4 = read_csv(root3) #数据转化为数组
     74 dataset4=dataset4.values
     75 dataset5 = read_csv(root4) #数据转化为数组
     76 dataset5=dataset5.values
     77 X_train_samll=dataset4[:,:28]
     78 Y_train_samll=dataset4[:,28:32]
     79 X_test_samll=dataset5[:,:28]
     80 Y_test_samll=dataset5[:,28:32]
     81 print('只预测小类:多输出多分类器真实输出分类:
    ',Y_train_samll)
     82 n_samples, n_features = X_train_samll.shape #4000 29
     83 n_outputs = Y_train_samll.shape[1] # 4000*4
     84 n_classes = 61 # 每种输出有61种分类
     85 forest = RandomForestClassifier(n_estimators=200,random_state=1)  # 生成随机森林多分类器
     86 multi_target_forest = MultiOutputClassifier(forest)  # 构建多输出多分类器
     87 y_pred = multi_target_forest.fit(X_train_samll, Y_train_samll).predict(X_train_samll)
     88 print('多输出多分类器预测输出分类:
    ',y_pred)
     89 pp=multi_target_forest.predict(X_test_samll)
     90 a=pp
     91 k=0
     92 for i in range(len(a)):
     93     if a[i][0]==Y_test_samll[i][0] and a[i][1]==Y_test_samll[i][1] and a[i][2]==Y_test_samll[i][2] and a[i][3]==Y_test_samll[i][3]:
     94         k+=1
     95 aa=k/1328*1.0
     96 print(aa)
     97         
     98     
     99 
    100 '''
    101 from pandas import read_csv
    102 import pandas as pd
    103 import numpy as np
    104 from skmultilearn.problem_transform import BinaryRelevance
    105 from sklearn.naive_bayes import GaussianNB
    106 from sklearn.metrics import accuracy_score
    107 
    108 
    109 root1="D:/Anaconda3-5.0.1-Windows-x86_64/anaconda/work/shuili2/data.csv"
    110 #root2="./id="+str(id_num)+"_process_data.csv"
    111 dataset = read_csv(root1) #数据转化为数组
    112 dataset=dataset.values
    113 x_train=dataset[:4000,:29]
    114 y_train=dataset[:4000,29:]
    115 
    116 x_test=dataset[4000:,:29]
    117 y_test=dataset[4000:,29:]
    118 
    119 # initialize binary relevance multi-label classifier
    120 # with a gaussian naive bayes base classifier
    121 classifier = BinaryRelevance(GaussianNB())
    122 
    123 # train
    124 classifier.fit(x_train, y_train)
    125 
    126 # predict
    127 predictions = classifier.predict(x_test)
    128 accuracy_score(y_test,predictions)
    129 '''
    130 
    131 
    132 '''---------------------------------'''
    133 '''
    134 import numpy as np
    135 import pandas as pd
    136 from keras.models import Sequential
    137 from keras.layers import Dense, Dropout
    138 from keras.wrappers.scikit_learn import KerasClassifier
    139 from keras.utils import np_utils
    140 from sklearn.model_selection import train_test_split, KFold, cross_val_score
    141 from sklearn.preprocessing import LabelEncoder
    142 from pandas import read_csv
    143 from sklearn.naive_bayes import GaussianNB
    144 from sklearn.metrics import accuracy_score
    145 
    146 
    147 root1="D:/Anaconda3-5.0.1-Windows-x86_64/anaconda/work/shuili2/data.csv"
    148 #root2="./id="+str(id_num)+"_process_data.csv"
    149 dataset = read_csv(root1) #数据转化为数组
    150 dataset=dataset.values
    151 
    152 # load dataset
    153 dataframe = pd.read_csv("data.csv", header=None)
    154 dataset = dataframe.values
    155 X = dataset[:, 0:29].astype(float)
    156 Y = dataset[:, 29:]
    157 
    158 # encode class values as integers
    159 #encoder = LabelEncoder()
    160 #encoded_Y = encoder.fit_transform(Y)
    161 # convert integers to dummy variables (one hot encoding)
    162 #dummy_y = np_utils.to_categorical(encoded_Y)
    163 
    164 # define model structure
    165 def baseline_model():
    166     model = Sequential()
    167     model.add(Dense(output_dim=10, input_dim=29, activation='relu'))
    168     model.add(Dropout(0.2))
    169     model.add(Dense(output_dim=8, input_dim=10, activation='softmax'))
    170     # Compile model
    171     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    172     return model
    173 estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=200, batch_size=50)
    174 # splitting data into training set and test set. If random_state is set to an integer, the split datasets are fixed.
    175 X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.01, random_state=0)
    176 estimator.fit(X_train, Y_train)
    177 
    178 # make predictions
    179 pred = estimator.predict(X_test)
    180 
    181 
    182 # inverse numeric variables to initial categorical labels
    183 #init_lables = encoder.inverse_transform(pred)
    184 
    185 # k-fold cross-validate
    186 seed = 42
    187 np.random.seed(seed)
    188 kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
    189 results = cross_val_score(estimator, X, Y, cv=kfold)
    190 '''
    from pandas import read_csv
    
    root1="F:/goverment/shuili2/techproblem_text_train.csv"
    root2="F:/goverment/shuili2/techproblem_text_test.csv"
    root3="F:/goverment/shuili2/text_train_4problem.csv"
    root4="F:/goverment/shuili2/text_test_4problem.csv"
    
    '''大类小类一起预测'''
    #root2="./id="+str(id_num)+"_process_data.csv"
    dataset1 = read_csv(root1) #数据转化为数组
    dataset1=dataset1.values
    dataset2 = read_csv(root2) #数据转化为数组
    dataset2=dataset2.values
    X_train=dataset1[:,:28]
    Y_train=dataset1[:,28:]
    X_test=dataset2[:,:28]
    Y_test=dataset2[:,28:]
    
    from pprint import pprint
    pprint(dataset1)
    
    ##使用二进制相关性
    #scikit-multilearn
    from skmultilearn.problem_transform import BinaryRelevance
    from sklearn.naive_bayes import GaussianNB
    
    #initialize二进制相关多标签分类器
    #用高斯朴素贝叶斯基分类器
    classifier = BinaryRelevance(GaussianNB())
    
     #训练
    classifier.fit(X_train, Y_train)
    
     #预测
    predictions = classifier.predict(X_test)
    
    #计算精度用
    from sklearn.metrics import accuracy_score
    accuracy_score(Y_test,predictions)
  • 相关阅读:
    [Spring] 学习Spring Boot之二:整合MyBatis并使用@Trasactional管理事务
    [Spring] 学习Spring Boot之一:基本使用及简析
    [Java] SpringMVC工作原理之四:MultipartResolver
    [Java] SpringMVC工作原理之三:ViewResolver
    [Java] SpringMVC工作原理之二:HandlerMapping和HandlerAdapter
    [Java] SpringMVC工作原理之一:DispatcherServlet
    [Java] Servlet工作原理之二:Session与Cookie
    [Java] Servlet工作原理之一:体系结构及其容器
    [Java] I/O底层原理之三:NIO
    [Java] I/O底层原理之二:网络IO及网络编程
  • 原文地址:https://www.cnblogs.com/caiyishuai/p/9315325.html
Copyright © 2011-2022 走看看