zoukankan      html  css  js  c++  java
  • TensorFlow笔记五:将cifar10和Mnist数据集文件复原成图片格式

    一、cifar10数据集

    (http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz)源格式是数据文件,因为训练需要转换成图片格式

    转换代码:

    注意文件路径改成自己的文件路径,train文件夹需要自己建,等待转换完成

    from scipy.misc import imsave
    import numpy as np
    
    # 解压 返回解压后的字典
    def unpickle(file):
        import pickle as pk
        fo = open(file, 'rb')
        dict = pk.load(fo,encoding='iso-8859-1')
        fo.close()
        return dict
    
    # 生成训练集图片
    for j in range(1, 6):
        dataName = "cifar-10-python/cifar-10-batches-py/data_batch_" + str(j)  # 读取当前目录下的data_batch1~5文件。
        Xtr = unpickle(dataName)
        print (dataName + " is loading...")
    
        for i in range(0, 10000):
            img = np.reshape(Xtr['data'][i], (3, 32, 32))  # Xtr['data']为图片二进制数据
            img = img.transpose(1, 2, 0)  # 读取image
            picName = 'train/' + str(Xtr['labels'][i]) + '_' + str(i + (j - 1)*10000) + '.jpg'
            # Xtr['labels']为图片的标签,值范围0-9,本文中,train文件夹需要存在,并与脚本文件在同一目录下。
            imsave(picName, img)
        print (dataName + " loaded.")
    
    print ("test_batch is loading...")
    
    # 生成测试集图片
    testXtr = unpickle("test_batch")
    for i in range(0, 10000):
        img = np.reshape(testXtr['data'][i], (3, 32, 32))
        img = img.transpose(1, 2, 0)
        picName = 'test/' + str(testXtr['labels'][i]) + '_' + str(i) + '.jpg'
        imsave(picName, img)
    print ("test_batch loaded.")

    二、mnist数据集的转化

    1、先解压出二进制文件,再运行

    import numpy as np  
    import struct  
    
    from PIL import Image  
    import os  
    
    data_file = 'MNIST_data/train-images.idx3-ubyte' #需要修改的路径  
    # It's 47040016B, but we should set to 47040000B  
    data_file_size = 47040016  
    data_file_size = str(data_file_size - 16) + 'B'  
    
    data_buf = open(data_file, 'rb').read()  
    
    magic, numImages, numRows, numColumns = struct.unpack_from(  
        '>IIII', data_buf, 0)  
    datas = struct.unpack_from(  
        '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
    datas = np.array(datas).astype(np.uint8).reshape(  
        numImages, 1, numRows, numColumns)  
    
    label_file = 'MNIST_data/train-labels.idx1-ubyte' #需要修改的路径  
    
    # It's 60008B, but we should set to 60000B  
    label_file_size = 60008  
    label_file_size = str(label_file_size - 8) + 'B'  
    
    label_buf = open(label_file, 'rb').read()  
    
    magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
    labels = struct.unpack_from(  
        '>' + label_file_size, label_buf, struct.calcsize('>II'))  
    labels = np.array(labels).astype(np.int64)  
    
    datas_root = 'MNIST_data/mnist_train' #需要修改的路径  
    if not os.path.exists(datas_root):  
        os.mkdir(datas_root)  
    
    for i in range(10):  
        file_name = datas_root + os.sep + str(i)  
        if not os.path.exists(file_name):  
            os.mkdir(file_name)  
    
    for ii in range(numLabels):  
        img = Image.fromarray(datas[ii, 0, 0:28, 0:28])  
        label = labels[ii]  
        file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_train_' + str(ii) + '.png'  
        img.save(file_name)
    import numpy as np  
    import struct  
    
    from PIL import Image  
    import os  
    
    data_file = 'MNIST_data/t10k-images.idx3-ubyte' #需要修改的路径  
    
    # It's 7840016B, but we should set to 7840000B  
    data_file_size = 7840016  
    data_file_size = str(data_file_size - 16) + 'B'  
    
    data_buf = open(data_file, 'rb').read()  
    
    magic, numImages, numRows, numColumns = struct.unpack_from(  
        '>IIII', data_buf, 0)  
    datas = struct.unpack_from(  
        '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
    datas = np.array(datas).astype(np.uint8).reshape(  
        numImages, 1, numRows, numColumns)  
    
    label_file = 'MNIST_data/t10k-labels.idx1-ubyte'#需要修改的路径  
    
    # It's 10008B, but we should set to 10000B  
    label_file_size = 10008  
    label_file_size = str(label_file_size - 8) + 'B'  
    
    label_buf = open(label_file, 'rb').read()  
    
    magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
    labels = struct.unpack_from(  
        '>' + label_file_size, label_buf, struct.calcsize('>II'))  
    labels = np.array(labels).astype(np.int64)  
    
    datas_root = 'MNIST_data/mnist_test' #需要修改的路径  
    
    if not os.path.exists(datas_root):  
        os.mkdir(datas_root)  
    
    for i in range(10):  
        file_name = datas_root + os.sep + str(i)  
        if not os.path.exists(file_name):  
            os.mkdir(file_name)  
    
    for ii in range(numLabels):  
        img = Image.fromarray(datas[ii, 0, 0:28, 0:28])  
        label = labels[ii]  
        file_name = datas_root + os.sep + str(label) + os.sep +  'mnist_test_' + str(ii) + '.png'  
        img.save(file_name)

    2、接着构造出图片集noisy_test和noisy_train

    这两个图片集是加了椒盐噪声的集合(可用作图像去噪)

    import numpy as np  
    import struct  
    import numpy as np
    from PIL import Image  
    import os  
    
    data_file = 'MNIST_data/train-images.idx3-ubyte' #需要修改的路径  
    # It's 47040016B, but we should set to 47040000B  
    data_file_size = 47040016  
    data_file_size = str(data_file_size - 16) + 'B'  
    
    data_buf = open(data_file, 'rb').read()  
    
    magic, numImages, numRows, numColumns = struct.unpack_from(  
        '>IIII', data_buf, 0)  
    datas = struct.unpack_from(  
        '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
    datas = np.array(datas).astype(np.uint8).reshape(  
        numImages, 1, numRows, numColumns)  
    
    label_file = 'MNIST_data/train-labels.idx1-ubyte' #需要修改的路径  
    
    # It's 60008B, but we should set to 60000B  
    label_file_size = 60008  
    label_file_size = str(label_file_size - 8) + 'B'  
    
    label_buf = open(label_file, 'rb').read()  
    
    magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
    labels = struct.unpack_from(  
        '>' + label_file_size, label_buf, struct.calcsize('>II'))  
    labels = np.array(labels).astype(np.int64)  
    
    datas_root = 'MNIST_data/noisy_train' #需要修改的路径  
    if not os.path.exists(datas_root):  
        os.mkdir(datas_root)  
    
    for i in range(10):  
        file_name = datas_root + os.sep + str(i)  
        if not os.path.exists(file_name):  
            os.mkdir(file_name)  
    
    for ii in range(numLabels):  
        img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
        label = labels[ii]  
        file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_train_' + str(ii) + '.png'  
        x_train_noisy = np.array(img)
        noise_factor = 0.5
        x_train_noisy = x_train_noisy.astype('float32') / 255.
        x_train_noisy = x_train_noisy + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_noisy.shape) 
        x_train_noisy = np.clip(x_train_noisy, 0., 1.)
        x_train_noisy = x_train_noisy.astype(np.float)
        x_train_noisy = x_train_noisy.astype('float32') * 255
        x_train_noisy = x_train_noisy.astype(np.uint8)
        #print(x_train_noisy)
        #os._exit(0)
        img=Image.fromarray(x_train_noisy)
        img.save(file_name)
    import numpy as np  
    import struct  
    
    from PIL import Image  
    import os  
    
    data_file = 'MNIST_data/t10k-images.idx3-ubyte' #需要修改的路径  
    
    # It's 7840016B, but we should set to 7840000B  
    data_file_size = 7840016  
    data_file_size = str(data_file_size - 16) + 'B'  
    
    data_buf = open(data_file, 'rb').read()  
    
    magic, numImages, numRows, numColumns = struct.unpack_from(  
        '>IIII', data_buf, 0)  
    datas = struct.unpack_from(  
        '>' + data_file_size, data_buf, struct.calcsize('>IIII'))  
    datas = np.array(datas).astype(np.uint8).reshape(  
        numImages, 1, numRows, numColumns)  
    
    label_file = 'MNIST_data/t10k-labels.idx1-ubyte'#需要修改的路径  
    
    # It's 10008B, but we should set to 10000B  
    label_file_size = 10008  
    label_file_size = str(label_file_size - 8) + 'B'  
    
    label_buf = open(label_file, 'rb').read()  
    
    magic, numLabels = struct.unpack_from('>II', label_buf, 0)  
    labels = struct.unpack_from(  
        '>' + label_file_size, label_buf, struct.calcsize('>II'))  
    labels = np.array(labels).astype(np.int64)  
    
    datas_root = 'MNIST_data/noisy_test' #需要修改的路径  
    
    if not os.path.exists(datas_root):  
        os.mkdir(datas_root)  
    
    for i in range(10):  
        file_name = datas_root + os.sep + str(i)  
        if not os.path.exists(file_name):  
            os.mkdir(file_name)  
    
    for ii in range(numLabels):  
        img = Image.fromarray(datas[ii, 0, 0:28, 0:28])
        label = labels[ii]  
        file_name = datas_root + os.sep + str(label) + os.sep + 'mnist_test_' + str(ii) + '.png'  
        x_train_noisy = np.array(img)
        noise_factor = 0.5
        x_train_noisy = x_train_noisy.astype('float32') / 255.
        x_train_noisy = x_train_noisy + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train_noisy.shape) 
        x_train_noisy = np.clip(x_train_noisy, 0., 1.)
        x_train_noisy = x_train_noisy.astype(np.float)
        x_train_noisy = x_train_noisy.astype('float32') * 255
        x_train_noisy = x_train_noisy.astype(np.uint8)
        #print(x_train_noisy)
        #os._exit(0)
        img=Image.fromarray(x_train_noisy)
        img.save(file_name)
  • 相关阅读:
    JavaScript中{}+{}
    网站性能优化
    C++是如何从代码到游戏的?
    C++是如何从代码到游戏的?
    【力扣】至少是其他数字两倍的最大数 中速题解
    代码编辑器选择Atom还是VScode?
    TIOBE 4 月榜单:少儿编程语言 Scratch 进入 TOP 20
    熟悉一下oncontextmenu事件的知识
    input属性type为file打开文件资源管理器时,如何限制多次选取或只能一次选取的行为
    HTML5的拖放事件
  • 原文地址:https://www.cnblogs.com/dzzy/p/10824072.html
Copyright © 2011-2022 走看看