zoukankan      html  css  js  c++  java
  • 目标检测数据增强方法

    def letterbox_image(img, inp_dim):
        '''resize image with unchanged aspect ratio using padding
        
        Parameters
        ----------
        
        img : numpy.ndarray
            Image 
        
        inp_dim: tuple(int)
            shape of the reszied image
            
        Returns
        -------
        
        numpy.ndarray:
            Resized image
        
        '''
    
        inp_dim = (inp_dim, inp_dim)
        img_w, img_h = img.shape[1], img.shape[0]
        w, h = inp_dim
        new_w = int(img_w * min(w/img_w, h/img_h))
        new_h = int(img_h * min(w/img_w, h/img_h))
        resized_image = cv2.resize(img, (new_w,new_h)) # 按照target_szie/(长边)为scale进行resize,然后填充空白区域
        
        canvas = np.full((inp_dim[1], inp_dim[0], 3), 0)
    
        canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
        
        return canvas
    
    class Resize(object):
        """Resize the image in accordance to `image_letter_box` function in darknet 
        
        The aspect ratio is maintained. The longer side is resized to the input 
        size of the network, while the remaining space on the shorter side is filled 
        with black color. **This should be the last transform**
        
        
        Parameters
        ----------
        inp_dim : tuple(int)
            tuple containing the size to which the image will be resized.
            
        Returns
        -------
        
        numpy.ndaaray
            Sheared image in the numpy format of shape `HxWxC`
        
        numpy.ndarray
            Resized bounding box co-ordinates of the format `n x 4` where n is 
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
            
        """
        
        def __init__(self, inp_dim):
            self.inp_dim = inp_dim
            
        def __call__(self, img, bboxes):
            w,h = img.shape[1], img.shape[0]
            img = letterbox_image(img, self.inp_dim) # 按照target_szie/(长边)为scale进行resize,然后填充空白区域
        
        
            scale = min(self.inp_dim/h, self.inp_dim/w)
            bboxes[:,:4] *= (scale)
        
            new_w = scale*w
            new_h = scale*h
            inp_dim = self.inp_dim   
        
            del_h = (inp_dim - new_h)/2
            del_w = (inp_dim - new_w)/2
        
            add_matrix = np.array([[del_w, del_h, del_w, del_h]]).astype(int)
        
            bboxes[:,:4] += add_matrix # 根据空白区域补充
        
            img = img.astype(np.uint8)
        
            return img, bboxes 
    
    class RandomHorizontalFlip(object):
    
        """Randomly horizontally flips the Image with the probability *p*
    
        Parameters
        ----------
        p: float
            The probability with which the image is flipped
    
    
        Returns
        -------
    
        numpy.ndaaray
            Flipped image in the numpy format of shape `HxWxC`
    
        numpy.ndarray
            Tranformed bounding box co-ordinates of the format `n x 4` where n is
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
    
        """
    
        def __init__(self, p=0.5):
            self.p = p
    
        def __call__(self, img, bboxes):
                img_center = np.array(img.shape[:2])[::-1]/2 # 得到图像中心坐标(x,y)
                img_center = np.hstack((img_center, img_center))
                if random.random() < self.p:
                    img = img[:, ::-1, :]  # 图像水平翻转
                    bboxes[:, [0, 2]] += 2*(img_center[[0, 2]] - bboxes[:, [0, 2]]) # 将box(x1,y1,x2,y2)的x坐标翻转,
    
                    box_w = abs(bboxes[:, 0] - bboxes[:, 2])
    
                    bboxes[:, 0] -= box_w  # 翻转后的坐标,x1>x2;该操作交换坐标,使得x1<x2
                    bboxes[:, 2] += box_w
    
                return img, bboxes
    
    class RandomScale(object):
        """Randomly scales an image    
        
        
        Bounding boxes which have an area of less than 25% in the remaining in the 
        transformed image is dropped. The resolution is maintained, and the remaining
        area if any is filled by black color.
        
        Parameters
        ----------
        scale: float or tuple(float)
            if **float**, the image is scaled by a factor drawn 
            randomly from a range (1 - `scale` , 1 + `scale`). If **tuple**,
            the `scale` is drawn randomly from values specified by the 
            tuple
            
        Returns
        -------
        
        numpy.ndaaray
            Scaled image in the numpy format of shape `HxWxC`
        
        numpy.ndarray
            Tranformed bounding box co-ordinates of the format `n x 4` where n is 
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
            
        """
    
        def __init__(self, scale = 0.2, diff = False):
            self.scale = scale
    
            
            if type(self.scale) == tuple:
                assert len(self.scale) == 2, "Invalid range"
                assert self.scale[0] > -1, "Scale factor can't be less than -1"
                assert self.scale[1] > -1, "Scale factor can't be less than -1"
            else:
                assert self.scale > 0, "Please input a positive float"
                self.scale = (max(-1, -self.scale), self.scale)
            
            self.diff = diff
    
            
    
        def __call__(self, img, bboxes):
        
            
            #Chose a random digit to scale by 
            
            img_shape = img.shape
            
            if self.diff:
                scale_x = random.uniform(*self.scale)
                scale_y = random.uniform(*self.scale)
            else:
                scale_x = random.uniform(*self.scale)
                scale_y = scale_x
                
        
            
            resize_scale_x = 1 + scale_x
            resize_scale_y = 1 + scale_y
    
            # The logic of the Scale transformation is fairly simple.
            # We use the OpenCV function cv2.resize to scale our image, and scale our bounding boxes by the scale factor(s).
            img=  cv2.resize(img, None, fx = resize_scale_x, fy = resize_scale_y)
            
            bboxes[:,:4] *= [resize_scale_x, resize_scale_y, resize_scale_x, resize_scale_y]
            
            
            
            canvas = np.zeros(img_shape, dtype = np.uint8) # 原始图像大小
            
            y_lim = int(min(resize_scale_y,1)*img_shape[0])
            x_lim = int(min(resize_scale_x,1)*img_shape[1])
            
            
            canvas[:y_lim,:x_lim,:] =  img[:y_lim,:x_lim,:] # 有可能变大或者变小,如果变大,取其中一部分,变小,黑色填充
            
            img = canvas
            bboxes = clip_box(bboxes, [0,0,1 + img_shape[1], img_shape[0]], 0.25) # 对变换后的box:处理超出边界和面积小于阈值drop操作;
        
        
            return img, bboxes
    
    class RandomTranslate(object): # 随机平移
        """Randomly Translates the image    
        
        
        Bounding boxes which have an area of less than 25% in the remaining in the 
        transformed image is dropped. The resolution is maintained, and the remaining
        area if any is filled by black color.
        
        Parameters
        ----------
        translate: float or tuple(float)
            if **float**, the image is translated by a factor drawn 
            randomly from a range (1 - `translate` , 1 + `translate`). If **tuple**,
            `translate` is drawn randomly from values specified by the 
            tuple
            
        Returns
        -------
        
        numpy.ndaaray
            Translated image in the numpy format of shape `HxWxC`
        
        numpy.ndarray
            Tranformed bounding box co-ordinates of the format `n x 4` where n is 
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
            
        """
    
        def __init__(self, translate = 0.2, diff = False):
            self.translate = translate
            
            if type(self.translate) == tuple:
                assert len(self.translate) == 2, "Invalid range"  
                assert self.translate[0] > 0 & self.translate[0] < 1
                assert self.translate[1] > 0 & self.translate[1] < 1
    
    
            else:
                assert self.translate > 0 and self.translate < 1
                self.translate = (-self.translate, self.translate) # 必须在(0-1)之间
                
                
            self.diff = diff
    
        def __call__(self, img, bboxes):        
            #Chose a random digit to scale by 
            img_shape = img.shape
            
            #translate the image
            
            #percentage of the dimension of the image to translate
            translate_factor_x = random.uniform(*self.translate)
            translate_factor_y = random.uniform(*self.translate)
            
            if not self.diff:
                translate_factor_y = translate_factor_x
                
            canvas = np.zeros(img_shape).astype(np.uint8)
        
        
            corner_x = int(translate_factor_x*img.shape[1])
            corner_y = int(translate_factor_y*img.shape[0])
    
            #change the origin to the top-left corner of the translated box  # 相当于做一个平移操作,做超过边界处理等
            orig_box_cords =  [max(0,corner_y), max(corner_x,0), min(img_shape[0], corner_y + img.shape[0]), min(img_shape[1],corner_x + img.shape[1])]
    
            mask = img[max(-corner_y, 0):min(img.shape[0], -corner_y + img_shape[0]), max(-corner_x, 0):min(img.shape[1], -corner_x + img_shape[1]),:]
            canvas[orig_box_cords[0]:orig_box_cords[2], orig_box_cords[1]:orig_box_cords[3],:] = mask
            img = canvas
            
            bboxes[:,:4] += [corner_x, corner_y, corner_x, corner_y] # box做一个平移操作
            
            
            bboxes = clip_box(bboxes, [0,0,img_shape[1], img_shape[0]], 0.25)
            
            return img, bboxes
    
    class RandomRotate(object):
        """Randomly rotates an image    
        
        
        Bounding boxes which have an area of less than 25% in the remaining in the 
        transformed image is dropped. The resolution is maintained, and the remaining
        area if any is filled by black color.
        
        Parameters
        ----------
        angle: float or tuple(float)
            if **float**, the image is rotated by a factor drawn 
            randomly from a range (-`angle`, `angle`). If **tuple**,
            the `angle` is drawn randomly from values specified by the 
            tuple
            
        Returns
        -------
        
        numpy.ndaaray
            Rotated image in the numpy format of shape `HxWxC`
        
        numpy.ndarray
            Tranformed bounding box co-ordinates of the format `n x 4` where n is 
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
            
        """
    
        def __init__(self, angle = 10):
            self.angle = angle
            
            if type(self.angle) == tuple:
                assert len(self.angle) == 2, "Invalid range"  
                
            else:
                self.angle = (-self.angle, self.angle)
                
        def __call__(self, img, bboxes):
        
            angle = random.uniform(*self.angle)
        
            w,h = img.shape[1], img.shape[0]
            cx, cy = w//2, h//2
        
            img = rotate_im(img, angle) # 旋转后,为了保证整图信息,仿射后的图像变大,先求仿射矩阵,然后变换整图;
        
            corners = get_corners(bboxes) # 得到四个角点
        
            corners = np.hstack((corners, bboxes[:,4:]))
        
        
            corners[:,:8] = rotate_box(corners[:,:8], angle, cx, cy, h, w) # 根据仿射矩阵得到box旋转后的坐标
        
            new_bbox = get_enclosing_box(corners) # we have to find the tightest rectangle parallel to the sides of the image containing the tilted rectangular box.
        
        
            scale_factor_x = img.shape[1] / w
        
            scale_factor_y = img.shape[0] / h
        
            img = cv2.resize(img, (w,h)) # 旋转后变大的图像恢复到原图像大小;
        
            new_bbox[:,:4] /= [scale_factor_x, scale_factor_y, scale_factor_x, scale_factor_y] 
        
            bboxes  = new_bbox
        
            bboxes = clip_box(bboxes, [0,0,w, h], 0.25)
        
            return img, bboxes
    
    class RandomShear(object): # 旋转的特殊情况
        """Randomly shears an image in horizontal direction   
        
        
        Bounding boxes which have an area of less than 25% in the remaining in the 
        transformed image is dropped. The resolution is maintained, and the remaining
        area if any is filled by black color.
        
        Parameters
        ----------
        shear_factor: float or tuple(float)
            if **float**, the image is sheared horizontally by a factor drawn 
            randomly from a range (-`shear_factor`, `shear_factor`). If **tuple**,
            the `shear_factor` is drawn randomly from values specified by the 
            tuple
            
        Returns
        -------
        
        numpy.ndaaray
            Sheared image in the numpy format of shape `HxWxC`
        
        numpy.ndarray
            Tranformed bounding box co-ordinates of the format `n x 4` where n is 
            number of bounding boxes and 4 represents `x1,y1,x2,y2` of the box
            
        """
    
        def __init__(self, shear_factor = 0.2):
            self.shear_factor = shear_factor
            
            if type(self.shear_factor) == tuple:
                assert len(self.shear_factor) == 2, "Invalid range for scaling factor"   
            else:
                self.shear_factor = (-self.shear_factor, self.shear_factor)
            
            shear_factor = random.uniform(*self.shear_factor)
            
        def __call__(self, img, bboxes):
        
            shear_factor = random.uniform(*self.shear_factor)
        
            w,h = img.shape[1], img.shape[0]
        
            if shear_factor < 0:
                img, bboxes = HorizontalFlip()(img, bboxes) # 一种巧妙的方法,来避免...
        
            M = np.array([[1, abs(shear_factor), 0],[0,1,0]])
        
            nW =  img.shape[1] + abs(shear_factor*img.shape[0])
        
            bboxes[:,[0,2]] += ((bboxes[:,[1,3]]) * abs(shear_factor) ).astype(int) 
        
        
            img = cv2.warpAffine(img, M, (int(nW), img.shape[0])) # 只进行水平变换
        
            if shear_factor < 0:
                img, bboxes = HorizontalFlip()(img, bboxes)
        
            img = cv2.resize(img, (w,h))
        
            scale_factor_x = nW / w
        
            bboxes[:,:4] /= [scale_factor_x, 1, scale_factor_x, 1] 
        
        
            return img, bboxes

    通过多线程进行加速:

    def parse_data(data):
        img = np.array(cv2.imread(data))
        h, w, c = img.shape
        assert c == 3
        img = cv2.resize(img, (scale_size, scale_size))
        img = img.astype(np.float32)
    
        shift = (scale_size - crop_size) // 2
        img = img[shift: shift + crop_size, shift: shift + crop_size, :]
        # Flip image at random if flag is selected
        if np.random.random() < 0.5:  # self.horizontal_flip and
            img = cv2.flip(img, 1)
        img = (img - np.array(127.5)) / 127.5
    
        return img
    
    
    def parse_data_without_augmentation(data):
        img = np.array(cv2.imread(data))
        h, w, c = img.shape
        assert c == 3
        img = cv2.resize(img, (crop_size, crop_size))
        img = img.astype(np.float32)
        img = (img - np.array(127.5)) / 127.5
    return img
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2019/3/10 11:15
    # @Author  : Whu_DSP
    # @File    : dped_dataloader.py
    
    import multiprocessing as mtp
    import os
    import cv2
    import numpy as np
    from scipy import misc
    
    
    def parse_data(filename):
        I = np.asarray(misc.imread(filename))
        I = np.float16(I) / 255
        return I
    class Dataloader: def __init__(self, dped_dir, type_phone, batch_size, is_training, im_shape): self.works = mtp.Pool(10) self.dped_dir = dped_dir self.phone_type = type_phone self.batch_size = batch_size self.is_training = is_training self.im_shape = im_shape self.image_list, self.dslr_list = self._get_data_file_list() self.num_images = len(self.image_list) self._cur = 0 self._perm = None self._shuffle_index() # init order def _get_data_file_list(self): if self.is_training: directory_phone = os.path.join(self.dped_dir, str(self.phone_type), 'training_data', str(self.phone_type)) directory_dslr = os.path.join(self.dped_dir, str(self.phone_type), 'training_data', 'canon') else: directory_phone = os.path.join(self.dped_dir, str(self.phone_type), 'test_data', 'patches', str(self.phone_type)) directory_dslr = os.path.join(self.dped_dir, str(self.phone_type), 'test_data', 'patches', 'canon') # num_images = len([name for name in os.listdir(directory_phone) if os.path.isfile(os.path.join(directory_phone, name))]) image_list = [os.path.join(directory_phone, name) for name in os.listdir(directory_phone)] dslr_list = [os.path.join(directory_dslr, name) for name in os.listdir(directory_dslr)] return image_list, dslr_list def _shuffle_index(self): '''randomly permute the train order''' self._perm = np.random.permutation(np.arange(self.num_images)) self._cur = 0 def _get_next_minbatch_index(self): """return the indices for the next minibatch""" if self._cur + self.batch_size > self.num_images: self._shuffle_index() next_index = self._perm[self._cur:self._cur + self.batch_size] self._cur += self.batch_size return next_index def get_minibatch(self, minibatch_db): """return minibatch datas for train/test""" if self.is_training: jobs = self.works.map(parse_data, minibatch_db) else: jobs = self.works.map(parse_data, minibatch_db) index = 0 images_data = np.zeros([self.batch_size, self.im_shape[0], self.im_shape[1], 3]) for index_job in range(len(jobs)): images_data[index, :, :, :] = jobs[index_job] index += 1 return images_data def next_batch(self): """Get next batch images and labels""" db_index = self._get_next_minbatch_index() minibatch_db = [] for i in range(len(db_index)): minibatch_db.append(self.image_list[db_index[i]]) minibatch_db_t = [] for i in range(len(db_index)): minibatch_db_t.append(self.dslr_list[db_index[i]]) images_data = self.get_minibatch(minibatch_db) dslr_data = self.get_minibatch(minibatch_db_t) return images_data, dslr_data if __name__ == "__main__": data_dir = "F:\ranjiewen\TF_EnhanceDPED\data\dped" train_loader = Dataloader(data_dir, "iphone", 32, True,[100,100]) test_loader = Dataloader(data_dir, "iphone", 32, False, [100, 100]) for i in range(10): image_batch,label_batch = train_loader.next_batch() print(image_batch.shape,label_batch.shape) print("-------------------------------------------") image_batch,label_batch = test_loader.next_batch() print(image_batch.shape,label_batch.shape)
  • 相关阅读:
    bootstrap多选框
    window.open()总结
    sql游标及模仿游标操作
    表变量及临时表数据批量插入和更新 写法
    表变量类型的创建及使用
    事物及exec
    [NOI2017]蚯蚓排队
    [NOI2017]游戏
    [NOI2017]蔬菜
    luogu P4194 矩阵
  • 原文地址:https://www.cnblogs.com/ranjiewen/p/10720007.html
Copyright © 2011-2022 走看看