zoukankan      html  css  js  c++  java
  • 吴裕雄--天生自然 PYTHON数据分析:基于Keras的CNN分析太空深处寻找系外行星数据

    #We import libraries for linear algebra, graphs, and evaluation of results
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.linear_model import LinearRegression
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import roc_curve, roc_auc_score
    from scipy.ndimage.filters import uniform_filter1d
    #Keras is a high level neural networks library, based on either tensorflow or theano
    from keras.models import Sequential, Model
    from keras.layers import Conv1D, MaxPool1D, Dense, Dropout, Flatten, BatchNormalization, Input, concatenate, Activation
    from keras.optimizers import Adam
    INPUT_LIB = 'F:\kaggleDataSet\kepler-labelled\'
    raw_data = np.loadtxt(INPUT_LIB + 'exoTrain.csv', skiprows=1, delimiter=',')
    x_train = raw_data[:, 1:]
    y_train = raw_data[:, 0, np.newaxis] - 1.
    raw_data = np.loadtxt(INPUT_LIB + 'exoTest.csv', skiprows=1, delimiter=',')
    x_test = raw_data[:, 1:]
    y_test = raw_data[:, 0, np.newaxis] - 1.
    del raw_data
    x_train = ((x_train - np.mean(x_train, axis=1).reshape(-1,1))/ np.std(x_train, axis=1).reshape(-1,1))
    x_test = ((x_test - np.mean(x_test, axis=1).reshape(-1,1)) / np.std(x_test, axis=1).reshape(-1,1))
    x_train = np.stack([x_train, uniform_filter1d(x_train, axis=1, size=200)], axis=2)
    x_test = np.stack([x_test, uniform_filter1d(x_test, axis=1, size=200)], axis=2)
    model = Sequential()
    model.add(Conv1D(filters=8, kernel_size=11, activation='relu', input_shape=x_train.shape[1:]))
    model.add(MaxPool1D(strides=4))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=16, kernel_size=11, activation='relu'))
    model.add(MaxPool1D(strides=4))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=32, kernel_size=11, activation='relu'))
    model.add(MaxPool1D(strides=4))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=64, kernel_size=11, activation='relu'))
    model.add(MaxPool1D(strides=4))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    def batch_generator(x_train, y_train, batch_size=32):
        """
        Gives equal number of positive and negative samples, and rotates them randomly in time
        """
        half_batch = batch_size // 2
        x_batch = np.empty((batch_size, x_train.shape[1], x_train.shape[2]), dtype='float32')
        y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32')
        
        yes_idx = np.where(y_train[:,0] == 1.)[0]
        non_idx = np.where(y_train[:,0] == 0.)[0]
        
        while True:
            np.random.shuffle(yes_idx)
            np.random.shuffle(non_idx)
        
            x_batch[:half_batch] = x_train[yes_idx[:half_batch]]
            x_batch[half_batch:] = x_train[non_idx[half_batch:batch_size]]
            y_batch[:half_batch] = y_train[yes_idx[:half_batch]]
            y_batch[half_batch:] = y_train[non_idx[half_batch:batch_size]]
        
            for i in range(batch_size):
                sz = np.random.randint(x_batch.shape[1])
                x_batch[i] = np.roll(x_batch[i], sz, axis = 0)
         
            yield x_batch, y_batch
    #Start with a slightly lower learning rate, to ensure convergence
    model.compile(optimizer=Adam(1e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
    hist = model.fit_generator(batch_generator(x_train, y_train, 32), 
                               validation_data=(x_test, y_test), 
                               verbose=0, epochs=5,
                               steps_per_epoch=x_train.shape[1]//32)
    #Then speed things up a little
    model.compile(optimizer=Adam(4e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
    hist = model.fit_generator(batch_generator(x_train, y_train, 32), 
                               validation_data=(x_test, y_test), 
                               verbose=2, epochs=40,
                               steps_per_epoch=x_train.shape[1]//32)

    plt.plot(hist.history['loss'], color='b')
    plt.plot(hist.history['val_loss'], color='r')
    plt.show()
    plt.plot(hist.history['acc'], color='b')
    plt.plot(hist.history['val_acc'], color='r')
    plt.show()

    non_idx = np.where(y_test[:,0] == 0.)[0]
    yes_idx = np.where(y_test[:,0] == 1.)[0]
    y_hat = model.predict(x_test)[:,0]
    plt.plot([y_hat[i] for i in yes_idx], 'bo')
    plt.show()
    plt.plot([y_hat[i] for i in non_idx], 'ro')
    plt.show()

    y_true = (y_test[:, 0] + 0.5).astype("int")
    fpr, tpr, thresholds = roc_curve(y_true, y_hat)
    plt.plot(thresholds, 1.-fpr)
    plt.plot(thresholds, tpr)
    plt.show()
    crossover_index = np.min(np.where(1.-fpr <= tpr))
    crossover_cutoff = thresholds[crossover_index]
    crossover_specificity = 1.-fpr[crossover_index]
    print("Crossover at {0:.2f} with specificity {1:.2f}".format(crossover_cutoff, crossover_specificity))
    plt.plot(fpr, tpr)
    plt.show()
    print("ROC area under curve is {0:.2f}".format(roc_auc_score(y_true, y_hat)))

    false_positives = np.where(y_hat * (1. - y_test) > 0.5)[0]
    for i in non_idx:
        if y_hat[i] > crossover_cutoff:
            print(i)
            plt.plot(x_test[i])
            plt.show()

  • 相关阅读:
    P2824 [HEOI2016/TJOI2016]排序
    P4930「FJ2014集训」采药人的路径
    FlexboxLayout 的一些基本介绍与基本用法
    Android——打造万能适配器(CommonAdapter)
    关于list、set、map的几点总结
    equals()和hashCode()区别?
    Android开发艺术探索(一)——Activity的生命周期和启动模式
    Android开发艺术探索(三)——View的事件体系
    Android五大布局重新回顾
    Android基础知识回顾
  • 原文地址:https://www.cnblogs.com/tszr/p/11258031.html
Copyright © 2011-2022 走看看