zoukankan      html  css  js  c++  java
  • Python 分类方法记录

    使用GPU

    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
    import tensorflow as tf
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True

    DNN

    def baseline_model():
        model = Sequential()
        model.add(Dense(16, input_shape=(21, ), activation="relu"))
        model.add(Dense(16, activation="relu"))
        model.add(Dense(2, activation="sigmoid"))
    
        model.compile(optimizer=RMSprop(lr=0.01), loss=binary_crossentropy, metrics=['accuracy'])
    
        return model
    
    def cross_validation(X, new_y, num_feat):
        print("X=", X[:10])
        print("X.values=", X.values[:10])
        # print("y=", new_y[:10]) 
        # X = X.values
        y = to_categorical(new_y)
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=666)
        standScaler = StandardScaler()
        standScaler.fit(X_train)
        X_train = standScaler.transform(X_train)
        X_test = standScaler.transform(X_test)
    
    
        estimator = KerasClassifier(build_fn=baseline_model, epochs=10, batch_size=1, verbose=1)
        kfold = KFold(n_splits=5, shuffle=True, random_state=999)
        scores = cross_val_score(estimator, X_train, y_train, cv=kfold)
        print("Accuracy of cross validation, mean %.2f, std %.2f" %(scores.mean(), scores.std()))
    
        # clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', class_weight = 'balanced')
        # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
        # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
        # clf = svm.SVC(kernel= 'linear', C = 2e3)
        # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
        # clf = GaussianNB()
        # scores = cross_val_score(clf, X, new_y, cv = 10)
    
        return scores

    画准确率和损失曲线

    def show_acc(history):
        plt.clf()
        history_dict = history.history
        acc = history_dict['binary_accuracy']
        val_acc = history_dict['val_binary_accuracy']
    
        epochs = range(1, len(val_acc) + 1)
    
        plt.plot(epochs, acc, 'bo', label='Training acc')
        plt.plot(epochs, val_acc, 'b', label='Balidation acc')
        plt.xlabel('Epochs')
        plt.ylabel('Acc')
        plt.legend()
    
        plt.show()
    def show_loss(history):
        plt.clf()
        history_dict = history.history
        print("print history.history = ", history_dict)
        loss = history_dict['loss']
        val_loss = history_dict['val_loss']
    
        epochs = range(1, len(val_loss) + 1)
    
        plt.plot(epochs, loss, 'bo', label='Training loss')
        plt.plot(epochs, val_loss, 'b', label='Validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
    
        plt.show()

    Tensorboard

    def classify_data(X, y, class_names):
        y = to_categorical(y)
      
       # 归一化 X_train, X_test, y_train, y_test
    = train_test_split(X, y, test_size=0.1, random_state=666) standScaler = StandardScaler() standScaler.fit(X_train) X_train = standScaler.transform(X_train) X_test = standScaler.transform(X_test) model = Sequential() model.add(Dense(16, input_shape=(21, ), activation="relu")) model.add(Dense(16, activation="relu")) model.add(Dense(2, activation="sigmoid")) model.summary() model.compile(optimizer=RMSprop(lr=0.001), loss=binary_crossentropy, metrics=['accuracy']) plot_model(model, show_shapes='True', to_file='model.png') callbacks = [keras.callbacks.TensorBoard(log_dir="my_log_dir", histogram_freq=1, embeddings_freq=1, embeddings_data=X[:20].astype("float32"))] history = model.fit(X_train, y_train, epoches=20, batch_size=1, validation_split=0.2, callbacks=callbacks)

    画混淆矩阵

    def plot_confusion_matrix(cm, classes,normalize=False, title='Confusion matrix',cmap=plt.cm.Blues):
            if normalize:
                    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
                    print("Normalized confusion matrix")
            else:
                    print('Confusion matrix, without normalization')
    
            print(cm)
    
            plt.imshow(cm, interpolation='nearest', cmap=cmap) # 负责对图像进行处理,并显示其格式,但是不显示图
    片
            plt.title(title)
            plt.colorbar() # 显示色阶
            tick_marks = np.arange(len(classes))
            plt.xticks(tick_marks, classes, rotation=45) # x轴标注
            plt.yticks(tick_marks, classes) # y轴标注
    
            fmt = '.2f' if normalize else 'd'
            thresh = cm.max() / 2.
            # 依次取出list1中的每1个元素,与list2中的每1个元素,组成元组, 然后将所有的元组组成一个列表返回
            # 矩阵行i为真实值,列j为预测值
            # 也就是x为预测值,y为真实值
            for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                    plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")
    
            plt.tight_layout()
            plt.ylabel('True label', fontsize = 14)
            plt.xlabel('Predicted label', fontsize = 14)
    
    def classify_data(X, y, class_names):
            # 训练集测试集切分
            global f_cv_scores
    
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = testSize, random_state = 42)
    
            # clf = LogisticRegression(penalty = 'l2', class_weight = 'balanced')
            # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
    
            # clf = KNeighborsClassifier(weights = "distance", n_neighbors = 10, p =9)
            # clf = svm.SVC(kernel = 'rbf', C = 2e4, gamma = 2e-5)
            clf = svm.SVC(kernel= 'linear', C = 2e3)
            # clf = RandomForestClassifier(n_estimators = 1000, class_weight = "balanced")
            # clf = GaussianNB()
            y_pred = clf.fit(X_train, y_train).predict(X_test)
    
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average = 'micro')
            f1 = f1_score(y_test, y_pred, average = 'weighted')
            acc = accuracy_score(y_test, y_pred)

    Keras训练集、测试集与验证集

    # 训练集与测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666) # 训练集中抽20%作为验证集
    history
    = model.fit(X_train, y_train, epochs=20, batch_size=1, shuffle=True, validation_split=0.1, verbose = 1, callbacks = None, validation_data = None))
  • 相关阅读:
    [POJ2104]K-th Number(区间第k值 记录初始状态)
    [POJ2007]Scrambled Polygon(计算几何 极角排序)
    [POJ1269]Intersecting Lines (计算几何)
    [POJ2318]TOYS (计算几何 行列式(叉乘)+二分)
    [HDOJ1394]Minimum Inversion Number(线段树,逆序数)
    Codeforces Round #319 (Div. 2) C. Vasya and Petya's Game 数学题
    BZOJ 1934 [Shoi2007]Vote 善意的投票 最小割
    BZOJ 1055 区间DP
    HDU4267 树状数组 不连续区间修改(三维)
    HDU 3308 线段树单点更新+区间查找最长连续子序列
  • 原文地址:https://www.cnblogs.com/taoyuyeit/p/11454477.html
Copyright © 2011-2022 走看看