zoukankan      html  css  js  c++  java
  • 两层神经网络实战

    1 概述

    两层全连接网络。
    代码实现环境:python3

    2 数据处理

    2.1 加载数据集

    将原始数据集放入“data/cifar10/”文件夹下。

    ### 加载cifar10数据集
    import os
    import pickle
    import random
    import numpy as np
    import matplotlib.pyplot as plt
    
    def load_CIFAR_batch(filename):
        """
        cifar-10数据集是分batch存储的,这是载入单个batch
    
        @参数 filename: cifar文件名
        @r返回值: X, Y: cifar batch中的 data 和 labels
        """
    
        with open(filename,'rb') as f:
            datadict=pickle.load(f,encoding='bytes')
    
            X=datadict[b'data']
            Y=datadict[b'labels']
            
            X=X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
            Y=np.array(Y)
            
            return X, Y
    
    def load_CIFAR10(ROOT):
        """
        读取载入整个 CIFAR-10 数据集
    
        @参数 ROOT: 根目录名
        @return: X_train, Y_train: 训练集 data 和 labels
                 X_test, Y_test: 测试集 data 和 labels
        """
    
        xs=[]
        ys=[]
    
        for b in range(1,6):
            f=os.path.join(ROOT, "data_batch_%d" % (b, ))
            X, Y=load_CIFAR_batch(f)
            xs.append(X)
            ys.append(Y)
    
        X_train=np.concatenate(xs)
        Y_train=np.concatenate(ys)
    
        del X, Y
    
        X_test, Y_test=load_CIFAR_batch(os.path.join(ROOT, "test_batch"))
    
        return X_train, Y_train, X_test, Y_test
      
      
    X_train, y_train, X_test, y_test = load_CIFAR10('data/cifar10/') 
    
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print( y_test.shape)
    

    运行结果如下:

    (50000, 32, 32, 3)
    (50000,)
    (10000, 32, 32, 3)
    (10000,)
    

    2.2 划分数据集

    将加载好的数据集划分为训练集,验证集,以及测试集。

    ## 划分训练集,验证集,测试集
    
    num_train = 49000
    num_val = 1000
    num_test = 1000
    
    # Validation set
    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    
    # Train set
    mask = range(num_train)
    X_train = X_train[mask]
    y_train = y_train[mask]
    
    # Test set
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    
    # Preprocessing: reshape the images data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    
    print('Train data shape: ', X_train.shape)
    print('Validation data shape: ', X_val.shape)
    print('Test data shape: ', X_test.shape)
    

    运行结果如下:

    Train data shape:  (49000, 3072)
    Validation data shape:  (1000, 3072)
    Test data shape:  (1000, 3072)
    

    2.3 归一化

    将划分好的数据集归一化,即:所有划分好的数据集减去均值图像。

    # Processing: subtract the mean images
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    

    3 两层神经网络的分类器

    3.1 定义两层的神经网络

    class TwoLayerNet(object):
        def __init__(self, input_size, hidden_size, output_size, std = 1e-4):
            """
            Initialize the model weights
            W1: First layer weights; has shape (D, H)
            b1: First layer biases; has shape (H,)
            W2: Second layer weights; has shape (H, C)
            b2: Second layer biases; has shape (C,)
            Inputs:
            - input_size: The dimension D of the input data.
            - hidden_size: The number of neurons H in the hidden layer.
            - output_size: The number of classes C.
            """
            self.params = {}
            self.params['W1'] = std * np.random.randn(input_size, hidden_size)
            self.params['b1'] = np.zeros(hidden_size)
            self.params['W2'] = std * np.random.randn(hidden_size, output_size)
            self.params['b2'] = np.zeros(output_size)
            
        def loss(self, X, y, reg = 0.0):
            """
            Two-layer network loss function, vectorized implementation (without loops).
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i]
            - reg: float, regularization strength
            Return:
            - loss: the loss value between predict value and ground truth
            - grads: Dictionary mapping parameter names to gradients of those parameters
              with respect to the loss function; has the same keys as self.params.
              Contain 'dW1', 'db1', 'dW2', 'db2'
            """
            N, dim = X.shape
            grads = {}
            
            # input layer ==> hidden layer ==> ReLU ==> output layer ==> Softmax
            W1 = self.params['W1']
            b1 = self.params['b1']
            W2 = self.params['W2']
            b2 = self.params['b2']
            
            # input layer==> hidden layer
            Z1 = np.dot(X, W1) + b1
            # hidden layer ==> ReLU
            A1 = np.maximum(0, Z1)    # ReLU function
            # ReLU ==> output layer
            scores = np.dot(A1, W2) + b2
            # output layer ==> Softmax
            scores_shift = scores - np.max(scores, axis=1).reshape(-1, 1)
            Softmax_output = np.exp(scores_shift) / np.sum(np.exp(scores_shift), axis=1).reshape(-1, 1)
            loss = -np.sum(np.log(Softmax_output[range(N), list(y)]))
            loss /= N
            loss += 0.5 * reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2)
            
            # grads
            dS = Softmax_output.copy()
            dS[range(N), list(y)] += -1
            dS /= N
            dW2 = np.dot(A1.T, dS)
            db2 = np.sum(dS, axis=0)
            dA1 = np.dot(dS, W2.T)
            dZ1 = dA1 * (A1 > 0)
            dW1 = np.dot(X.T, dZ1)
            db1 = np.sum(dZ1, axis=0)
            dW2 += reg * W2
            dW1 += reg * W1
            
            grads['W1'] = dW1
            grads['b1'] = db1
            grads['W2'] = dW2
            grads['b2'] = db2
            
            return loss, grads
        
        def predict(self, X):
            """
            Use the trained weights to predict data labels
            Inputs:
            - X: A numpy array of shape (num_test, D) contain the test data
            Outputs:
            - y_pred: A numpy array, predicted labels for the data in X
            """
            W1 = self.params['W1']
            b1 = self.params['b1']
            W2 = self.params['W2']
            b2 = self.params['b2']
            
            Z1 = np.dot(X, W1) + b1
            A1 = np.maximum(0, Z1)    # ReLU function
            scores = np.dot(A1, W2) + b2
            y_pred = np.argmax(scores, axis=1)
                
            return y_pred
        
        def train(self, X, y, X_val, y_val, learning_rate=1e-3, learning_rate_decay=0.95,
                reg=5e-6, num_iters=100, batch_size=200, print_flag=False):
            """
            Train Two-layer neural network classifier using SGD
            Inputs:
            - X: A numpy array of shape (num_train, D) contain the training data
              consisting of num_train samples each of dimension D
            - y: A numpy array of shape (num_train,) contain the training labels,
              where y[i] is the label of X[i], y[i] = c, 0 <= c <= C
            - X_val: A numpy array of shape (num_val, D) contain the validation data
              consisting of num_val samples each of dimension D
            - y_val: A numpy array of shape (num_val,) contain the validation labels,
              where y_val[i] is the label of X_val[i], y_val[i] = c, 0 <= c <= C
            - learning rate: (float) learning rate for optimization
            - learning_rate_decay: Scalar giving factor used to decay the learning rate
              after each epoch.
            - reg: (float) regularization strength
            - num_iters: (integer) numbers of steps to take when optimization
            - batch_size: (integer) number of training examples to use at each step
            - print_flag: (boolean) If true, print the progress during optimization
            Outputs:
            - a dictionary contains the loss_history, train_accuracy_history and val_accuracy_history
            """
            num_train = X.shape[0]
            iterations_per_epoch = max(num_train / batch_size, 1)
            loss_history = []
            train_accuracy_history = []
            val_accuracy_history = []
            
            for t in range(num_iters):
                idx_batch = np.random.choice(num_train, batch_size, replace=True)
                X_batch = X[idx_batch]
                y_batch = y[idx_batch]
                loss, grads = self.loss(X_batch, y_batch, reg)
                loss_history.append(loss)
                self.params['W1'] += -learning_rate * grads['W1']
                self.params['b1'] += -learning_rate * grads['b1']
                self.params['W2'] += -learning_rate * grads['W2']
                self.params['b2'] += -learning_rate * grads['b2']
                
                # Every epoch, check train and val accuracy and decay learning rate.
                if t % iterations_per_epoch == 0:
                    train_accuracy = np.mean(self.predict(X_batch) == y_batch)
                    val_accuracy = np.mean(self.predict(X_val) == y_val)
                    train_accuracy_history.append(train_accuracy)
                    val_accuracy_history.append(val_accuracy)
                    
                    # Decay learning rate
                    learning_rate *= learning_rate_decay
                
                # print the progress during optimization
                if print_flag and t%100 == 0:
                    print('iteration %d / %d: loss %f' % (t, num_iters, loss))
                
            return {
                'loss_history': loss_history,
                'train_accuracy_history': train_accuracy_history,
                'val_accuracy_history': val_accuracy_history,
            }
            
    

    使用随机梯度下降训练模型,并在验证集上测试效果。

    input_size = 32 * 32 * 3
    hidden_size = 50
    num_classes = 10
    net = TwoLayerNet(input_size, hidden_size, num_classes)
    
    # Train the network
    stats = net.train(X_train, y_train, X_val, y_val,
                num_iters=1000, batch_size=200,
                learning_rate=1e-4, learning_rate_decay=0.95,
                reg=0.25, print_flag=True)
    
    # Predict on the validation set
    val_acc = (net.predict(X_val) == y_val).mean()
    print('Validation accuracy: ', val_acc)
    

    运行结果为:

    iteration 0 / 1000: loss 2.302776
    iteration 100 / 1000: loss 2.302100
    iteration 200 / 1000: loss 2.296930
    iteration 300 / 1000: loss 2.261319
    iteration 400 / 1000: loss 2.174613
    iteration 500 / 1000: loss 2.075674
    iteration 600 / 1000: loss 2.103124
    iteration 700 / 1000: loss 2.036040
    iteration 800 / 1000: loss 1.931442
    iteration 900 / 1000: loss 1.906379
    Validation accuracy:  0.281
    

    3.2 微调超参数

    # 微调超参数
    # Hyperparameters
    learning_rates = [1e-4, 5e-4, 9e-4, 13e-4, 15e-4]
    regularization_strengths = [0.25, 0.5, 0.75, 1.0]
    num_iters = 3000
    batch_size = 200
    learning_rate_decay = 0.98
    
    # Net structure
    input_size = 32 * 32 * 3
    hidden_size = [50, 100, 150]
    num_classes = 10
    
    # Initialization
    best_net = None
    best_hidden_size = None
    best_val = -1
    best_lr = None
    best_reg = None
    results = {}
    
    # Train the two layers network
    for i in range(len(hidden_size)):
        for lr in learning_rates:
            for reg in regularization_strengths:
                net = TwoLayerNet(input_size, hidden_size[i], num_classes)
                stats = net.train(X_train, y_train, X_val, y_val,
                    num_iters=num_iters, batch_size=batch_size,
                    learning_rate=lr, learning_rate_decay=learning_rate_decay,
                    reg=reg, print_flag=False)
                train_accuracy = stats['train_accuracy_history'][-1]
                val_accuracy = stats['val_accuracy_history'][-1]
                if val_accuracy > best_val:
                    best_lr = lr
                    best_reg = reg
                    best_val = val_accuracy
                    best_net = net
                    best_hidden_size = hidden_size[i]
                results[(lr, reg)] = train_accuracy, val_accuracy
                print('hidden_size: %d lr: %e reg: %e train accuracy: %f val accuracy: %f' %
                      (hidden_size[i], lr, reg, results[(lr, reg)][0], results[(lr, reg)][1]))
    print('Best hidden_size: %d
    Best lr: %e
    Best reg: %e
    train accuracy: %f
    val accuracy: %f' %
         (hidden_size[i], best_lr, best_reg, results[(best_lr, best_reg)][0], results[(best_lr, best_reg)][1]))
    

    运行结果如下:

    hidden_size: 50 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.410000 val accuracy: 0.407000
    hidden_size: 50 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.415000 val accuracy: 0.399000
    hidden_size: 50 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.370000 val accuracy: 0.400000
    hidden_size: 50 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.420000 val accuracy: 0.403000
    hidden_size: 50 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.595000 val accuracy: 0.494000
    hidden_size: 50 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.595000 val accuracy: 0.505000
    hidden_size: 50 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.495000 val accuracy: 0.483000
    hidden_size: 50 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.600000 val accuracy: 0.475000
    hidden_size: 50 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.670000 val accuracy: 0.496000
    hidden_size: 50 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.510000
    hidden_size: 50 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.610000 val accuracy: 0.485000
    hidden_size: 50 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.635000 val accuracy: 0.487000
    hidden_size: 50 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.700000 val accuracy: 0.486000
    hidden_size: 50 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.590000 val accuracy: 0.500000
    hidden_size: 50 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.615000 val accuracy: 0.466000
    hidden_size: 50 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.620000 val accuracy: 0.481000
    hidden_size: 50 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.660000 val accuracy: 0.485000
    hidden_size: 50 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.585000 val accuracy: 0.466000
    hidden_size: 50 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.655000 val accuracy: 0.483000
    hidden_size: 50 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.630000 val accuracy: 0.484000
    hidden_size: 100 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.325000 val accuracy: 0.413000
    hidden_size: 100 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.340000 val accuracy: 0.416000
    hidden_size: 100 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.375000 val accuracy: 0.421000
    hidden_size: 100 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.480000 val accuracy: 0.409000
    hidden_size: 100 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.605000 val accuracy: 0.496000
    hidden_size: 100 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.580000 val accuracy: 0.510000
    hidden_size: 100 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.605000 val accuracy: 0.496000
    hidden_size: 100 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.540000 val accuracy: 0.508000
    hidden_size: 100 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.720000 val accuracy: 0.509000
    hidden_size: 100 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.665000 val accuracy: 0.507000
    hidden_size: 100 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.630000 val accuracy: 0.512000
    hidden_size: 100 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.610000 val accuracy: 0.497000
    hidden_size: 100 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.720000 val accuracy: 0.495000
    hidden_size: 100 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.775000 val accuracy: 0.524000
    hidden_size: 100 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.640000 val accuracy: 0.503000
    hidden_size: 100 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.665000 val accuracy: 0.478000
    hidden_size: 100 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.650000 val accuracy: 0.516000
    hidden_size: 100 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.499000
    hidden_size: 100 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.635000 val accuracy: 0.493000
    hidden_size: 100 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.600000 val accuracy: 0.493000
    hidden_size: 150 lr: 1.000000e-04 reg: 2.500000e-01 train accuracy: 0.410000 val accuracy: 0.420000
    hidden_size: 150 lr: 1.000000e-04 reg: 5.000000e-01 train accuracy: 0.475000 val accuracy: 0.415000
    hidden_size: 150 lr: 1.000000e-04 reg: 7.500000e-01 train accuracy: 0.385000 val accuracy: 0.422000
    hidden_size: 150 lr: 1.000000e-04 reg: 1.000000e+00 train accuracy: 0.375000 val accuracy: 0.425000
    hidden_size: 150 lr: 5.000000e-04 reg: 2.500000e-01 train accuracy: 0.605000 val accuracy: 0.524000
    hidden_size: 150 lr: 5.000000e-04 reg: 5.000000e-01 train accuracy: 0.550000 val accuracy: 0.512000
    hidden_size: 150 lr: 5.000000e-04 reg: 7.500000e-01 train accuracy: 0.565000 val accuracy: 0.511000
    hidden_size: 150 lr: 5.000000e-04 reg: 1.000000e+00 train accuracy: 0.580000 val accuracy: 0.501000
    hidden_size: 150 lr: 9.000000e-04 reg: 2.500000e-01 train accuracy: 0.635000 val accuracy: 0.526000
    hidden_size: 150 lr: 9.000000e-04 reg: 5.000000e-01 train accuracy: 0.615000 val accuracy: 0.512000
    hidden_size: 150 lr: 9.000000e-04 reg: 7.500000e-01 train accuracy: 0.655000 val accuracy: 0.523000
    hidden_size: 150 lr: 9.000000e-04 reg: 1.000000e+00 train accuracy: 0.585000 val accuracy: 0.507000
    hidden_size: 150 lr: 1.300000e-03 reg: 2.500000e-01 train accuracy: 0.745000 val accuracy: 0.519000
    hidden_size: 150 lr: 1.300000e-03 reg: 5.000000e-01 train accuracy: 0.655000 val accuracy: 0.501000
    hidden_size: 150 lr: 1.300000e-03 reg: 7.500000e-01 train accuracy: 0.625000 val accuracy: 0.530000
    hidden_size: 150 lr: 1.300000e-03 reg: 1.000000e+00 train accuracy: 0.645000 val accuracy: 0.506000
    hidden_size: 150 lr: 1.500000e-03 reg: 2.500000e-01 train accuracy: 0.660000 val accuracy: 0.478000
    hidden_size: 150 lr: 1.500000e-03 reg: 5.000000e-01 train accuracy: 0.675000 val accuracy: 0.485000
    hidden_size: 150 lr: 1.500000e-03 reg: 7.500000e-01 train accuracy: 0.670000 val accuracy: 0.512000
    hidden_size: 150 lr: 1.500000e-03 reg: 1.000000e+00 train accuracy: 0.655000 val accuracy: 0.495000
    Best hidden_size: 150
    Best lr: 1.300000e-03
    Best reg: 7.500000e-01
    train accuracy: 0.625000
    val accuracy: 0.530000
    

    在测试集上测试其性能:

    test_acc = (best_net.predict(X_test) == y_test).mean()
    print('Test accuracy: ', test_acc)
    

    结果如下:

    Test accuracy:  0.504
    
  • 相关阅读:
    在指定文件夹目录下打开jupyter notebook
    防止sql注入
    惰性函数——适合外层函数只需要执行一次
    Text类型
    怎样理解阻塞非阻塞与同步异步的区别?
    Element类型
    避免使用eval()
    javascript 连等赋值问题
    类数组转化为数组
    DOM10-1节点层次
  • 原文地址:https://www.cnblogs.com/Terrypython/p/11004865.html
Copyright © 2011-2022 走看看