zoukankan      html  css  js  c++  java
  • 杂谈:AI Intro课程作业——卷积神经网络练练手之MNIST数据集

    MNIST手写体数字识别 实验报告
    一、思路

    数据集来自 http://yann.lecun.com/exdb/mnist/
    还没有理解透彻CNN的结构和功能、原理blabla,先参考已有模型写一个看看
    实现方法:Keras 2.3.1 + Tensorflow 2.1 + CUDA 10.1
    参考了AlexNet的卷积神经网络结构,根据训练样本的特性修改得到一个新的CNN。
    由于训练样本全部是黑白图像,只需要读取单通道的灰度信息即可,输入数据维数是(28 imes 28 imes 1).
    网络的结构如下,激活函数全部使用relu,optimizer使用Adam
    1:卷积层1,采用(5 imes 5)卷积核,步长((2,2)),64层filter,padding方式采取边界补零,该层输出为(13 imes13 imes64)
    2:池化层1,(2 imes 2)平均值池化,步长((2,2)).padding方式采取边界补零,输出(7 imes 7 imes64)
    3:正则化1
    4:卷积层2:使用(3 imes3)大小的卷积核,步长((1,1)),192层filter,padding方式边界补零,输出(5 imes 5 imes 192)
    5:池化层2:(2 imes 2)平均值池化,步长((1,1)).padding方式去除多余边界,输出(4 imes4 imes 192)
    6:正则化2
    7:全连接+dropout

    二、代码
    # CNNmod.py
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
    
    def myNetwork(img_rows, img_cols):
        inputs = keras.Input(shape=[img_rows, img_cols, 1])
        # 卷积层1
        conv1 = keras.layers.Conv2D(filters=64, kernel_size=[5, 5], strides=[2, 2],
                                    activation=keras.activations.relu,
                                    use_bias=True, padding='same')(inputs)
        # 池化层1
        pooling1 = keras.layers.AveragePooling2D(pool_size=[2, 2],
                                                 strides=[2, 2], padding='valid')(conv1)
        # 正则化层1
        stand1 = keras.layers.BatchNormalization(axis=1)(pooling1)
        # 卷积层2
        conv2 = keras.layers.Conv2D(filters=192, kernel_size=[3, 3], strides=[1, 1],
                                    activation=keras.activations.relu,
                                    use_bias=True, padding='same')(stand1)
        # 池化层2
        pooling2 = keras.layers.AveragePooling2D(pool_size=[2, 2],
                                                 strides=[1, 1], padding='valid')(conv2)
        # 正则化层2
        stand2 = keras.layers.BatchNormalization(axis=1)(pooling2)
        # 全连接层
        flatten = keras.layers.Flatten()(stand2)
        fc1 = keras.layers.Dense(4096, activation=keras.activations.relu,
                                 use_bias=True)(flatten)
        drop1 = keras.layers.Dropout(0.5)(fc1)
        fc2 = keras.layers.Dense(4096, activation=keras.activations.relu,
                                 use_bias=True)(drop1)
        drop2 = keras.layers.Dropout(0.5)(fc2)
        fc3 = keras.layers.Dense(10, activation=keras.activations.softmax,
                                 use_bias=True)(drop2)
        # 构建模型
        return keras.Model(inputs=inputs, outputs=fc3)
    

    训练主程序:

    # train.py
    from tensorflow import keras
    import cv2
    from keras.models import Sequential
    from keras.layers.core import Dense, Dropout, Activation, Flatten
    from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
    from keras.layers.normalization import BatchNormalization
    from keras.callbacks import ModelCheckpoint
    import numpy as np
    import tensorflow as tf
    from tensorflow import optimizers
    import model.CNNmod as mod
    
    batch_size = 128
    num_classes = 10
    epochs = 10
    img_shape = (28, 28, 1)
    img_rows, img_cols = 28, 28
    
    x_train = []
    x_test = []
    y_train = []
    y_test = []
    
    log_dir = "./logs/"
    
    # 打开数据集的txt
    with open(r"./preprocess_train.txt", "r") as f:
        lines = f.readlines()
    for i in range(60000):
        name = lines[i].split(";")[0]
        img = cv2.imread(r"./train_set/" + name)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 转换成灰度图片
        img = img / 255
        x_train.append(img)
        y_train.append(lines[i].split(';')[1])
    f.close()
    print("训练数据集读取完成")
    with open(r"./preprocess_test.txt", "r") as f:
        lines = f.readlines()
    for i in range(10000):
        name = lines[i].split(";")[0]
        img = cv2.imread(r"./test_set/" + name)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 255
        x_test.append(img)
        y_test.append(lines[i].split(';')[1])
    f.close()
    print("测试数据集读取完成")
    # 数组转换
    x_train = np.array(x_train)
    x_test = np.array(x_test)
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    
    # 编译模型
    model = mod.myNetwork(28, 28)
    model.compile(optimizer=tf.optimizers.Adam(0.001),
                  loss=keras.losses.categorical_crossentropy,
                  metrics=['accuracy'])
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))
    model.save_weights(log_dir + 'latest.h5')
    

    数据集预处理,使用opencv将mnist数据集转换成.jpg格式存储,标签存在文件名里,另外保存了一个.txt文件易于读取

    # preprocess.py
    import os
    import struct
    import numpy as np
    import cv2
    
    def save_mnist_to_jpg(mnist_image_file, mnist_label_file, save_dir):
        if 'train' in os.path.basename(mnist_image_file):
            prefix = 'train'
        else:
            prefix = 'test'
        labelIndex = 0
        imageIndex = 0
        i = 0
        lbdata = open(mnist_label_file, 'rb').read()
        magic, nums = struct.unpack_from(">II", lbdata, labelIndex)
        labelIndex += struct.calcsize('>II')
        imgdata = open(mnist_image_file, "rb").read()
        magic, nums, numRows, numColumns = struct.unpack_from(
            '>IIII', imgdata, imageIndex)
        imageIndex += struct.calcsize('>IIII')
        for i in range(nums):
            label = struct.unpack_from('>B', lbdata, labelIndex)[0]
            labelIndex += struct.calcsize('>B')
            im = struct.unpack_from('>784B', imgdata, imageIndex)
            imageIndex += struct.calcsize('>784B')
            im = np.array(im, dtype='uint8')
            img = im.reshape(28, 28)
            save_name = os.path.join(
                save_dir, '{}_{}_{}.jpg'.format(prefix, i, label))
            cv2.imwrite(save_name, img)
    
    if __name__ == '__main__':
        train_images = './dataset/train-images.idx3-ubyte'  # 训练集图像的文件名
        train_labels = './dataset/train-labels.idx1-ubyte'  # 训练集label的文件名
        test_images = './dataset/t10k-images.idx3-ubyte'  # 测试集图像的文件名
        test_labels = './dataset/t10k-labels.idx1-ubyte'  # 测试集label的文件名
        save_train_dir = './train_set'
        save_test_dir = './test_set'
        if not os.path.exists(save_train_dir):
            os.makedirs(save_train_dir)
        if not os.path.exists(save_test_dir):
            os.makedirs(save_test_dir)
    
        save_mnist_to_jpg(test_images, test_labels, save_test_dir)
        save_mnist_to_jpg(train_images, train_labels, save_train_dir)
    
        photos = os.listdir("./train_set")
        with open("./preprocess_train.txt", "w") as f:
            for photo in photos:
                num = photo.split("_")[2].split(".")[0]
                f.write(photo + ";" + num + "
    ")
        f.close()
    
        photos = os.listdir("./test_set")
        with open("./preprocess_test.txt", "w") as f:
            for photo in photos:
                num = photo.split("_")[2].split(".")[0]
                f.write(photo + ";" + num + "
    ")
        f.close()
    
    三、实验结果和修修改改

    使用以上代码和所有60000张样本训练10个epochs,batch_size为128,测试集使用全部10000张图片。使用CUDA10.1 + RTX2070进行训练,总时间约60秒训练完毕,测试集的错误率约0.9%。
    接下来对上述代码进行修改。相对于原模型只修改一种变量
    (1).更改池化层的池化方式,将两个池化层由平均值池化改为最大值池化,验证测试集的错误率约0.86%
    (2).更改池化层padding方式:由'valid'改为'same',验证测试集错误率无明显变化。
    (3).更改卷积层1的激活函数:由relu改为sigmoid函数,观察到测试集错误率收敛速度增加,当训练完第二个Epoch时,验证测试集的错误率已经降低到约1.78%.(使用relu时,同阶段错误率约5%),但使用sigmoid时,模型最终的错误率基本不再下降,约1.45%.(使用relu时错误率约0.9%)

    参考:https://blog.csdn.net/weixin_41055137/article/details/81071226

  • 相关阅读:
    Mybatis获取插入记录的自增长ID
    mybatisGenerator 代码自动生成报错 Result Maps collection already contains value for BaseResultMap
    <c:if test="value ne, eq, lt, gt,...."> 用法
    大话设计模式之----状态模式
    php文件加锁 lock_sh ,lock_ex
    in_array 判断问题的疑惑解决。
    我是一只IT小小鸟观后感
    《世界是数字的》
    我是一只IT小小鸟
    解压缩
  • 原文地址:https://www.cnblogs.com/allegro-vivace/p/12657288.html
Copyright © 2011-2022 走看看