zoukankan      html  css  js  c++  java
  • Python 分类方法记录

    使用GPU

    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
    import tensorflow as tf
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True

    DNN

    def baseline_model():
        model = Sequential()
        model.add(Dense(16, input_shape=(21, ), activation="relu"))
        model.add(Dense(16, activation="relu"))
        model.add(Dense(2, activation="sigmoid"))
    
        model.compile(optimizer=RMSprop(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
    
        return model
    
    def cross_validation(X, new_y, num_feat):
        print("X=", X[:10])
        print("X.values=", X.values[:10])
        # print("y=", new_y[:10]) 
        # X = X.values
        y = to_categorical(new_y)
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666)
        standScaler = StandardScaler()
        standScaler.fit(X_train)
        X_train = standScaler.transform(X_train)
        X_test = standScaler.transform(X_test)
    
    
        estimator = KerasClassifier(build_fn=baseline_model, epochs=10, batch_size=1, verbose=1)
        kfold = KFold(n_splits=5, shuffle=True, random_state=999)
        scores = cross_val_score(estimator, X_train, y_train, cv=kfold)
        print("Accuracy of cross validation, mean %.2f, std %.2f" %(scores.mean(), scores.std()))
    
        # clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', class_weight = 'balanced')
        # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
        # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
        # clf = svm.SVC(kernel= 'linear', C = 2e3)
        # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
        # clf = GaussianNB()
        # scores = cross_val_score(clf, X, new_y, cv = 10)
    
        return scores

    画准确率和损失曲线

    def show_acc(history):
        plt.clf()
        history_dict = history.history
        acc = history_dict['binary_accuracy']
        val_acc = history_dict['val_binary_accuracy']
    
        epochs = range(1, len(val_acc) + 1)
    
        plt.plot(epochs, acc, 'bo', label='Training acc')
        plt.plot(epochs, val_acc, 'b', label='Balidation acc')
        plt.xlabel('Epochs')
        plt.ylabel('Acc')
        plt.legend()
    
        plt.show()
    def show_loss(history):
        plt.clf()
        history_dict = history.history
        print("print history.history = ", history_dict)
        loss = history_dict['loss']
        val_loss = history_dict['val_loss']
    
        epochs = range(1, len(val_loss) + 1)
    
        plt.plot(epochs, loss, 'bo', label='Training loss')
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
    
        plt.show()

    Tensorboard

    def classify_data(X, y, class_names):
        y = to_categorical(y)
      
       # 归一化 X_train, X_test, y_train, y_test
    = train_test_split(X, y, test_size=0.1, random_state=666) standScaler = StandardScaler() standScaler.fit(X_train) X_train = standScaler.transform(X_train) X_test = standScaler.transform(X_test) model = Sequential() model.add(Dense(16, input_shape=(21, ), activation="relu")) model.add(Dense(16, activation="relu")) model.add(Dense(2, activation="sigmoid")) model.summary() model.compile(optimizer=RMSprop(lr=0.001), loss=binary_crossentropy, metrics=['accuracy']) plot_model(model, show_shapes='True', to_file='model.png') callbacks = [keras.callbacks.TensorBoard(log_dir="my_log_dir", histogram_freq=1, embeddings_freq=1, embeddings_data=X[:20].astype("float32"))] history = model.fit(X_train, y_train, epoches=20, batch_size=1, validation_split=0.2, callbacks=callbacks)

    画混淆矩阵

    def plot_confusion_matrix(cm, classes,normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
            if normalize:
                    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
                    print("Normalized confusion matrix")
            else:
                    print('Confusion matrix, without normalization')
    
            print(cm)
    
            plt.imshow(cm, interpolation='nearest', cmap=cmap) # 负责对图像进行处理,并显示其格式,但是不显示图
    片
            plt.title(title)
            plt.colorbar() # 显示色阶
            tick_marks = np.arange(len(classes))
            plt.xticks(tick_marks, classes, rotation=45) # x轴标注
            plt.yticks(tick_marks, classes) # y轴标注
    
            fmt = '.2f' if normalize else 'd'
            thresh = cm.max() / 2.
            # 依次取出list1中的每1个元素,与list2中的每1个元素,组成元组, 然后将所有的元组组成一个列表返回
            # 矩阵行i为真实值,列j为预测值
            # 也就是x为预测值,y为真实值
            for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                    plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")
    
            plt.tight_layout()
            plt.ylabel('True label', fontsize = 14)
            plt.xlabel('Predicted label', fontsize = 14)
    
    def classify_data(X, y, class_names):
            # 训练集测试集切分
            global f_cv_scores
    
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 42)
    
            # clf = LogisticRegression(penalty = 'l2', class_weight = 'balanced')
            # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
    
            # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
            # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
            clf = svm.SVC(kernel= 'linear', C = 2e3)
            # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
            # clf = GaussianNB()
            y_pred = clf.fit(X_train, y_train).predict(X_test)
    
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average = 'micro')
            f1 = f1_score(y_test, y_pred, average = 'weighted')
            acc = accuracy_score(y_test, y_pred)

    Keras训练集、测试集与验证集

    # 训练集与测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666) # 训练集中抽20%作为验证集
    history
    = model.fit(X_train, y_train, epochs=20, batch_size=1, shuffle=True, validation_split=0.1, verbose = 1, callbacks = None, validation_data = None))
  • 相关阅读:
    uva 10369 Arctic Network
    uvalive 5834 Genghis Khan The Conqueror
    uvalive 4848 Tour Belt
    uvalive 4960 Sensor Network
    codeforces 798c Mike And Gcd Problem
    codeforces 796c Bank Hacking
    codeforces 768c Jon Snow And His Favourite Number
    hdu 1114 Piggy-Bank
    poj 1276 Cash Machine
    bzoj 2423 最长公共子序列
  • 原文地址:https://www.cnblogs.com/taoyuyeit/p/11454477.html
Copyright © 2011-2022 走看看