zoukankan html css js c++ java

Implement TensorFlow's next_batch for own data

The version of numpy data

import numpy as np

class Dataset:
    def __init__(self, data):
        self._index_in_epoch = 0
        self._epochs_completed = 0
        self._data = data
        self._num_examples = data.shape[0]
        pass

    @property
    def data(self):
        return self._data

    def next_batch(self, batch_size, shuffle=True):
        start = self._index_in_epoch
        if start == 0 and self._epochs_completed == 0:
            idx = np.arange(0, self._num_examples)
            np.random.shuffle(idx)  # shuffle indexe
            self._data = self.data[idx]  # get the shuffled data

        # go to the data of next batch
        if start + batch_size > self._num_examples:
            '''
            note: when start  == self._num_examples, data_rest_part = np.array([])
            '''
            self._epochs_completed += 1
            # print(self.data)
            rest_num_examples = self._num_examples - start
            data_rest_part = self.data[start:self._num_examples]
            idx_update = np.arange(0, self._num_examples)
            np.random.shuffle(idx_update)
            self._data = self.data[idx_update]  # get another shuffled data

            start = 0
            self._index_in_epoch = batch_size - rest_num_examples
            end = self._index_in_epoch
            data_new_part = self._data[start:end]
            return np.concatenate((data_rest_part, data_new_part), axis=0)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            return self._data[start:end]

dataset = Dataset(np.arange(0, 10))
for i in range(10):
    print(dataset.next_batch(6))
print(dataset.data)

The version of pandas data

import numpy as np
import pandas as pd
class Dataset:
    def __init__(self, data):
        self._index_in_epoch = 0
        self._epochs_completed = 0
        self._data = data
        self._num_examples = data.shape[0]
        pass

    @property
    def data(self):
        return self._data

    def next_batch(self, batch_size, shuffle=True):
        start = self._index_in_epoch
        if start == 0 and self._epochs_completed == 0:
            idx = np.arange(0, self._num_examples)
            np.random.shuffle(idx)  # shuffle index
            self._data = self.data.iloc[idx,:]  # get the shuffled data

        # go to the data of next batch
        if start + batch_size > self._num_examples:
            '''
            note: when start  == self._num_examples, data_rest_part = np.array([])
            '''
            self._epochs_completed += 1
            # print(self.data) # this is for debug
            rest_num_examples = self._num_examples - start
            data_rest_part = self.data.iloc[start:self._num_examples,:]
            idx_update = np.arange(0, self._num_examples)
            np.random.shuffle(idx_update)
            self._data = self.data.iloc[idx_update,:]  # get another shuffled data

            start = 0
            self._index_in_epoch = batch_size - rest_num_examples
            end = self._index_in_epoch
            data_new_part = self._data.iloc[start:end,:]
            return pd.concat((data_rest_part, data_new_part), axis=0)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            return self._data[start:end]

df = pd.DataFrame()
df['a']=np.arange(10)
df['b']=np.arange(10)*10
dataset = Dataset(df)
for i in range(10):
    print(dataset.next_batch(5))
print(dataset.data)

查看全文

相关阅读:
【Python】学习笔记十四：循环进阶
 【Python】学习笔记十三：函数的参数对应
 【Python】学习笔记十二：模块
 输入法核心数据结构及算法的设计
 迭代式软件开发也有陷阱
 C++数组参数应用方式探讨（转）
数组，结构体初始化 {0} (转载）
宿主机为linux、windows分别实现VMware三种方式上网（转）
汽车导航系统背景介绍
 分解大量switch-case分支的两种方法

原文地址：https://www.cnblogs.com/ZeroTensor/p/10394989.html