分批量读取数据

zoukankan html css js c++ java

分批量读取数据

分批量读取数据

import tensorflow as tf

def read_data(fileNameQue):

    reader = tf.TFRecordReader()

    key, value = reader.read(fileNameQue)

    features = tf.parse_single_example(value, features={'label': tf.FixedLenFeature([], tf.int64),

                                                        'img': tf.FixedLenFeature([], tf.string),})

    img = tf.decode_raw(features["img"], tf.uint8)

    img = tf.reshape(img, [92,112]) # 恢复图像原始大小

    label = tf.cast(features["label"], tf.int32)

    return img, label

def batch_input(filename, batchSize):

   fileNameQue = tf.train.string_input_producer([filename], shuffle=True)

    img, label = read_data(fileNameQue) # fetch图像和label

    min_after_dequeue = 1000

    capacity = min_after_dequeue+3*batchSize

    # 预取图像和label并随机打乱，组成batch，此时tensor rank发生了变化，多了一个batch大小的维度

    exampleBatch,labelBatch = tf.train.shuffle_batch([img, label],batch_size=batchSize, capacity=capacity,

                                                     min_after_dequeue=min_after_dequeue)

    return exampleBatch,labelBatch

if __name__ == "__main__":

    init = tf.initialize_all_variables()

    exampleBatch, labelBatch = batch_input("./data/faceTF.tfrecords", batchSize=10)

    with tf.Session() as sess:

        sess.run(init)

        coord = tf.train.Coordinator()

        threads = tf.train.start_queue_runners(coord=coord)

        for i in range(100):

            example, label = sess.run([exampleBatch, labelBatch])

            print(example.shape)

        coord.request_stop()

        coord.join(threads)

读取数据和解码数据与之前基本相同，针对不同格式数据集使用不同阅读器和解码器即可，后面是产生batch，核心是tf.train.shuffle_batch这个函数，它相当于一个蓄水池的功能，第一个参数代表蓄水池的入水口，也就是逐个读取到的记录，batch_size自然就是batch的大小了，capacity是蓄水池的容量，表示能容纳多少个样本，min_after_dequeue是指出队操作后还可以供随机采样出批量数据的样本池大小，显然，capacity要大于min_after_dequeue，官网推荐：min_after_dequeue + (num_threads + a small safety margin) * batch_size,还有一个参数就是num_threads，表示所用线程数目。

min_after_dequeue这个值越大，随机采样的效果越好，但是消耗的内存也越大。

查看全文

相关阅读:
用Docker执行Percona Server
Java基础笔记（七）
VC与JavaScript交互(三) ———— JS调用C++
4456: [Zjoi2016]旅行者|分治+最短路
 Swift语法学习之方法
 JavaScript学习笔记二
 Latex 制作积分规则表格
 向MapReduce转换：计算共现关系
 王立平--switch case
组队训练1 回放

原文地址：https://www.cnblogs.com/yongfuxue/p/10095884.html