dataloader.py
import random import torch import torch.multiprocessing as multiprocessing from torch._C import _set_worker_signal_handlers, _update_worker_pids, _remove_worker_pids, _error_if_any_worker_fails from . import SequentialSampler, RandomSampler, BatchSampler import signal import functools import collections import re import sys import threading import traceback import os import time from torch._six import string_classes, int_classes, FileNotFoundError IS_WINDOWS = sys.platform == "win32" if IS_WINDOWS: import ctypes from ctypes.wintypes import DWORD, BOOL, HANDLE if sys.version_info[0] == 2: import Queue as queue else: import queue class ExceptionWrapper(object): r"""Wraps an exception plus traceback to communicate across threads""" def __init__(self, exc_info): self.exc_type = exc_info[0] self.exc_msg = "".join(traceback.format_exception(*exc_info)) _use_shared_memory = False r"""Whether to use shared memory in default_collate""" MANAGER_STATUS_CHECK_INTERVAL = 5.0 if IS_WINDOWS: # On Windows, the parent ID of the worker process remains unchanged when the manager process # is gone, and the only way to check it through OS is to let the worker have a process handle # of the manager and ask if the process status has changed. class ManagerWatchdog(object): def __init__(self): self.manager_pid = os.getppid() self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD) self.kernel32.OpenProcess.restype = HANDLE self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD) self.kernel32.WaitForSingleObject.restype = DWORD # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx SYNCHRONIZE = 0x00100000 self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid) if not self.manager_handle: raise ctypes.WinError(ctypes.get_last_error()) def is_alive(self): # Value obtained from https://msdn.microsoft.com/en-us/library/windows/desktop/ms687032.aspx return self.kernel32.WaitForSingleObject(self.manager_handle, 0) != 0 else: class ManagerWatchdog(object): def __init__(self): self.manager_pid = os.getppid() def is_alive(self): return os.getppid() == self.manager_pid def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id): global _use_shared_memory _use_shared_memory = True # Intialize C side signal handlers for SIGBUS and SIGSEGV. Python signal # module's handlers are executed after Python returns from C low-level # handlers, likely when the same fatal signal happened again already. # https://docs.python.org/3/library/signal.html Sec. 18.8.1.1 _set_worker_signal_handlers() torch.set_num_threads(1) random.seed(seed) torch.manual_seed(seed) if init_fn is not None: init_fn(worker_id) watchdog = ManagerWatchdog() while True: try: r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL) except queue.Empty: if watchdog.is_alive(): continue else: break if r is None: break idx, batch_indices = r try: samples = collate_fn([dataset[i] for i in batch_indices]) except Exception: data_queue.put((idx, ExceptionWrapper(sys.exc_info()))) else: data_queue.put((idx, samples)) del samples def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, device_id): if pin_memory: torch.cuda.set_device(device_id) while True: try: r = in_queue.get() except Exception: if done_event.is_set(): return raise if r is None: break if isinstance(r[1], ExceptionWrapper): out_queue.put(r) continue idx, batch = r try: if pin_memory: batch = pin_memory_batch(batch) except Exception: out_queue.put((idx, ExceptionWrapper(sys.exc_info()))) else: out_queue.put((idx, batch)) numpy_type_map = { 'float64': torch.DoubleTensor, 'float32': torch.FloatTensor, 'float16': torch.HalfTensor, 'int64': torch.LongTensor, 'int32': torch.IntTensor, 'int16': torch.ShortTensor, 'int8': torch.CharTensor, 'uint8': torch.ByteTensor, } def default_collate(batch): r"""Puts each data field into a tensor with outer dimension batch size""" error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" elem_type = type(batch[0]) if isinstance(batch[0], torch.Tensor): out = None if _use_shared_memory: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = batch[0].storage()._new_shared(numel) out = batch[0].new(storage) return torch.stack(batch, 0, out=out) elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' and elem_type.__name__ != 'string_': elem = batch[0] if elem_type.__name__ == 'ndarray': # array of string classes and object if re.search('[SaUO]', elem.dtype.str) is not None: raise TypeError(error_msg.format(elem.dtype)) return torch.stack([torch.from_numpy(b) for b in batch], 0) if elem.shape == (): # scalars py_type = float if elem.dtype.name.startswith('float') else int return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) elif isinstance(batch[0], int_classes): return torch.LongTensor(batch) elif isinstance(batch[0], float): return torch.DoubleTensor(batch) elif isinstance(batch[0], string_classes): return batch elif isinstance(batch[0], collections.Mapping): return {key: default_collate([d[key] for d in batch]) for key in batch[0]} elif isinstance(batch[0], collections.Sequence): transposed = zip(*batch) return [default_collate(samples) for samples in transposed] raise TypeError((error_msg.format(type(batch[0])))) def pin_memory_batch(batch): if isinstance(batch, torch.Tensor): return batch.pin_memory() elif isinstance(batch, string_classes): return batch elif isinstance(batch, collections.Mapping): return {k: pin_memory_batch(sample) for k, sample in batch.items()} elif isinstance(batch, collections.Sequence): return [pin_memory_batch(sample) for sample in batch] else: return batch _SIGCHLD_handler_set = False r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one handler needs to be set for all DataLoaders in a process.""" def _set_SIGCHLD_handler(): # Windows doesn't support SIGCHLD handler if sys.platform == 'win32': return # can't set signal in child threads if not isinstance(threading.current_thread(), threading._MainThread): return global _SIGCHLD_handler_set if _SIGCHLD_handler_set: return previous_handler = signal.getsignal(signal.SIGCHLD) if not callable(previous_handler): previous_handler = None def handler(signum, frame): # This following call uses `waitid` with WNOHANG from C side. Therefore, # Python can still get and update the process status successfully. _error_if_any_worker_fails() if previous_handler is not None: previous_handler(signum, frame) signal.signal(signal.SIGCHLD, handler) _SIGCHLD_handler_set = True class _DataLoaderIter(object): r"""Iterates once over the DataLoader's dataset, as specified by the sampler""" def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory and torch.cuda.is_available() self.timeout = loader.timeout self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) base_seed = torch.LongTensor(1).random_().item() if self.num_workers > 0: self.worker_init_fn = loader.worker_init_fn self.index_queues = [multiprocessing.Queue() for _ in range(self.num_workers)] self.worker_queue_idx = 0 self.worker_result_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.worker_pids_set = False self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queues[i], self.worker_result_queue, self.collate_fn, base_seed + i, self.worker_init_fn, i)) for i in range(self.num_workers)] if self.pin_memory or self.timeout > 0: self.data_queue = queue.Queue() if self.pin_memory: maybe_device_id = torch.cuda.current_device() else: # do not initialize cuda context if not necessary maybe_device_id = None self.worker_manager_thread = threading.Thread( target=_worker_manager_loop, args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory, maybe_device_id)) self.worker_manager_thread.daemon = True self.worker_manager_thread.start() else: self.data_queue = self.worker_result_queue for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() _update_worker_pids(id(self), tuple(w.pid for w in self.workers)) _set_SIGCHLD_handler() self.worker_pids_set = True # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices() def __len__(self): return len(self.batch_sampler) def _get_batch(self): if self.timeout > 0: try: return self.data_queue.get(timeout=self.timeout) except queue.Empty: raise RuntimeError('DataLoader timed out after {} seconds'.format(self.timeout)) else: return self.data_queue.get() def __next__(self): if self.num_workers == 0: # same-process loading indices = next(self.sample_iter) # may raise StopIteration batch = self.collate_fn([self.dataset[i] for i in indices]) if self.pin_memory: batch = pin_memory_batch(batch) return batch # check if the next sample has already been generated if self.rcvd_idx in self.reorder_dict: batch = self.reorder_dict.pop(self.rcvd_idx) return self._process_next_batch(batch) if self.batches_outstanding == 0: self._shutdown_workers() raise StopIteration while True: assert (not self.shutdown and self.batches_outstanding > 0) idx, batch = self._get_batch() self.batches_outstanding -= 1 if idx != self.rcvd_idx: # store out-of-order samples self.reorder_dict[idx] = batch continue return self._process_next_batch(batch) next = __next__ # Python 2 compatibility def __iter__(self): return self def _put_indices(self): assert self.batches_outstanding < 2 * self.num_workers indices = next(self.sample_iter, None) if indices is None: return self.index_queues[self.worker_queue_idx].put((self.send_idx, indices)) self.worker_queue_idx = (self.worker_queue_idx + 1) % self.num_workers self.batches_outstanding += 1 self.send_idx += 1 def _process_next_batch(self, batch): self.rcvd_idx += 1 self._put_indices() if isinstance(batch, ExceptionWrapper): raise batch.exc_type(batch.exc_msg) return batch def __getstate__(self): # TODO: add limited pickling support for sharing an iterator # across multiple threads for HOGWILD. # Probably the best way to do this is by moving the sample pushing # to a separate thread and then just sharing the data queue # but signalling the end is tricky without a non-blocking API raise NotImplementedError("_DataLoaderIter cannot be pickled") def _shutdown_workers(self): try: if not self.shutdown: self.shutdown = True self.done_event.set() for q in self.index_queues: q.put(None) # if some workers are waiting to put, make place for them try: while not self.worker_result_queue.empty(): self.worker_result_queue.get() except (FileNotFoundError, ImportError): # Many weird errors can happen here due to Python # shutting down. These are more like obscure Python bugs. # FileNotFoundError can happen when we rebuild the fd # fetched from the queue but the socket is already closed # from the worker side. # ImportError can happen when the unpickler loads the # resource from `get`. pass # done_event should be sufficient to exit worker_manager_thread, # but be safe here and put another None self.worker_result_queue.put(None) finally: # removes pids no matter what if self.worker_pids_set: _remove_worker_pids(id(self)) self.worker_pids_set = False def __del__(self): if self.num_workers > 0: self._shutdown_workers() class DataLoader(object): r""" Data loader. Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset. Arguments: dataset (Dataset): dataset from which to load the data. batch_size (int, optional): how many samples per batch to load (default: 1). shuffle (bool, optional): set to ``True`` to have the data reshuffled at every epoch (default: False). sampler (Sampler, optional): defines the strategy to draw samples from the dataset. If specified, ``shuffle`` must be False. batch_sampler (Sampler, optional): like sampler, but returns a batch of indices at a time. Mutually exclusive with batch_size, shuffle, sampler, and drop_last. num_workers (int, optional): how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0) collate_fn (callable, optional): merges a list of samples to form a mini-batch. pin_memory (bool, optional): If ``True``, the data loader will copy tensors into CUDA pinned memory before returning them. drop_last (bool, optional): set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by the batch size. If ``False`` and the size of dataset is not divisible by the batch size, then the last batch will be smaller. (default: False) timeout (numeric, optional): if positive, the timeout value for collecting a batch from workers. Should always be non-negative. (default: 0) worker_init_fn (callable, optional): If not None, this will be called on each worker subprocess with the worker id (an int in ``[0, num_workers - 1]``) as input, after seeding and before data loading. (default: None) .. note:: By default, each worker will have its PyTorch seed set to ``base_seed + worker_id``, where ``base_seed`` is a long generated by main process using its RNG. However, seeds for other libraies may be duplicated upon initializing workers (w.g., NumPy), causing each worker to return identical random numbers. (See :ref:`dataloader-workers-random-seed` section in FAQ.) You may use ``torch.initial_seed()`` to access the PyTorch seed for each worker in :attr:`worker_init_fn`, and use it to set other seeds before data loading. .. warning:: If ``spawn`` start method is used, :attr:`worker_init_fn` cannot be an unpicklable object, e.g., a lambda function. """ __initialized = False def __init__(self, dataset, batch_size=1, subdivison=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): self.dataset = dataset self.batch_size_real = batch_size self.subdivision = subdivison self.batch_size = batch_size/subdivison self.num_workers = num_workers self.collate_fn = collate_fn self.pin_memory = pin_memory self.drop_last = drop_last self.timeout = timeout self.worker_init_fn = worker_init_fn if timeout < 0: raise ValueError('timeout option should be non-negative') if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError('batch_sampler option is mutually exclusive ' 'with batch_size, shuffle, sampler, and ' 'drop_last') self.batch_size = None self.drop_last = None if sampler is not None and shuffle: raise ValueError('sampler option is mutually exclusive with ' 'shuffle') if self.num_workers < 0: raise ValueError('num_workers option cannot be negative; ' 'use num_workers=0 to disable multiprocessing.') if batch_sampler is None: if sampler is None: if shuffle: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler self.__initialized = True def __setattr__(self, attr, val): if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'): raise ValueError('{} attribute should not be set after {} is ' 'initialized'.format(attr, self.__class__.__name__)) super(DataLoader, self).__setattr__(attr, val) def __iter__(self): return _DataLoaderIter(self) def __len__(self): return len(self.batch_sampler)