zoukankan      html  css  js  c++  java
  • pyfasterrcnn代码阅读3roidb.py

    roidb是比较复杂的数据结构,存放了数据集的roi信息。原始的roidb来自数据集,在trian.py的get_training_roidb(imdb)函数进行了水平翻转扩充数量,然后prepare_roidb(imdb)【定义在roidb.py】为roidb添加了一些说明性的属性。

    在这里暂时记录下roidb的结构信息,后面继续看的时候可能会有些修正:

    roidb是由字典组成的list,roidb[img_index]包含了该图片索引所包含到roi信息,下面以roidb[img_index]为例说明:

    roidb[img_index]包含的key, value
    boxes box位置信息,box_num*4的np array
    gt_overlaps 所有box在不同类别的得分,box_num*class_num矩阵
    gt_classes 所有box的真实类别,box_num长度的list
    flipped 是否翻转
     image 该图片的路径,字符串
    width 图片的宽
    height  图片的高
    max_overlaps 每个box的在所有类别的得分最大值,box_num长度
    max_classes 每个box的得分最高所对应的类,box_num长度
    bbox_targets 每个box的类别,以及与最接近的gt-box的4个方位偏移

     参考iamzhangzhuping的博客,感谢!更多信息请移步iamzhangzhuping的博客

    下面是代码

    roidb.py
    
    import numpy as np
    from fast_rcnn.config import cfg
    from fast_rcnn.bbox_transform import bbox_transform
    from utils.cython_bbox import bbox_overlaps
    import PIL
    
    def prepare_roidb(imdb):
        # 给原始roidata添加一些说明性的附加属性
        """Enrich the imdb's roidb by adding some derived quantities that
        are useful for training. This function precomputes the maximum
        overlap, taken over ground-truth boxes, between each ROI and
        each ground-truth box. The class with maximum overlap is also
        recorded.
        """
        sizes = [PIL.Image.open(imdb.image_path_at(i)).size
                 for i in xrange(imdb.num_images)]
        # 当在‘Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model’阶段中,roidb由rpn_roidb()
        # 方法生成,其中的每一张图像的box不仅仅只有gtbox,还包括rpn_file里面的box。
        roidb = imdb.roidb
        for i in xrange(len(imdb.image_index)):
            roidb[i]['image'] = imdb.image_path_at(i)
            roidb[i]['width'] = sizes[i][0]
            roidb[i]['height'] = sizes[i][1]
            # need gt_overlaps as a dense array for argmax  
            # gt_overlaps是一个box_num*classes_num的矩阵,应该是每个box在不同类别的得分
            gt_overlaps = roidb[i]['gt_overlaps'].toarray()
            # max overlap with gt over classes (columns)
            # 每个box的在所有类别的得分最大值,box_num长度
            max_overlaps = gt_overlaps.max(axis=1)
            # gt class that had the max overlap
            # 每个box的得分最高所对应的类,box_num长度
            max_classes = gt_overlaps.argmax(axis=1)
            roidb[i]['max_classes'] = max_classes
            roidb[i]['max_overlaps'] = max_overlaps
            # sanity checks
            # 做检查,max_overlaps == 0意味着背景,否则非背景
            # max overlap of 0 => class should be zero (background)
            zero_inds = np.where(max_overlaps == 0)[0]
            assert all(max_classes[zero_inds] == 0)
            # max overlap > 0 => class should not be zero (must be a fg class)
            nonzero_inds = np.where(max_overlaps > 0)[0]
            assert all(max_classes[nonzero_inds] != 0)
    
    def add_bbox_regression_targets(roidb):
        """Add information needed to train bounding-box regressors."""
        assert len(roidb) > 0
        assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
    
        num_images = len(roidb)
        # Infer number of classes from the number of columns in gt_overlaps
        # 类别数,roidb[0]对应第0号图片上的roi,shape[1]多少列表示roi属于不同类上的概率
        num_classes = roidb[0]['gt_overlaps'].shape[1]
        for im_i in xrange(num_images):
            rois = roidb[im_i]['boxes']
            max_overlaps = roidb[im_i]['max_overlaps']
            max_classes = roidb[im_i]['max_classes']
            # bbox_targets:每个box的类别,以及与最接近的gt-box的4个方位偏移
            roidb[im_i]['bbox_targets'] = \
                    _compute_targets(rois, max_overlaps, max_classes)
        
        # 这里config是false
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Use fixed / precomputed "means" and "stds" instead of empirical values
            # 使用固定的均值和方差代替经验值
            means = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
            stds = np.tile(
                    np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
        else:
            # Compute values needed for means and stds
            # 计算所需的均值和方差
            # var(x) = E(x^2) - E(x)^2
            # 计数各个类别出现box的数量
            class_counts = np.zeros((num_classes, 1)) + cfg.EPS  #加上cfg.EPS防止除0出错
            # 21类*4个位置,如果出现box的类别与其中某一类相同,将该box的4个target加入4个列元素中
            sums = np.zeros((num_classes, 4)) 
            # 21类*4个位置,如果出现box的类别与其中某一类相同,将该box的4个target的平方加入4个列元素中
            squared_sums = np.zeros((num_classes, 4))
            for im_i in xrange(num_images):
                targets = roidb[im_i]['bbox_targets']
                for cls in xrange(1, num_classes):
                    cls_inds = np.where(targets[:, 0] == cls)[0]
                    # box的类别与该类匹配,计入
                    if cls_inds.size > 0:
                        class_counts[cls] += cls_inds.size
                        sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
                        squared_sums[cls, :] += \
                                (targets[cls_inds, 1:] ** 2).sum(axis=0)
    
            means = sums / class_counts # 均值
            stds = np.sqrt(squared_sums / class_counts - means ** 2) #标准差
    
        print 'bbox target means:'
        print means
        print means[1:, :].mean(axis=0) # ignore bg class
        print 'bbox target stdevs:'
        print stds
        print stds[1:, :].mean(axis=0) # ignore bg class
    
        # Normalize targets
        # 对每一box归一化target
        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
            print "Normalizing targets"
            for im_i in xrange(num_images):
                targets = roidb[im_i]['bbox_targets']
                for cls in xrange(1, num_classes):
                    cls_inds = np.where(targets[:, 0] == cls)[0]
                    roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
                    roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
        else:
            print "NOT normalizing targets"
    
        # 均值和方差也用于预测
        # These values will be needed for making predictions
        # (the predicts will need to be unnormalized and uncentered)
        return means.ravel(), stds.ravel()  # ravel()排序拉成一维
    
    def _compute_targets(rois, overlaps, labels):  # 参数rois只含有当前图片的box信息
        """Compute bounding-box regression targets for an image."""
        # Indices目录 of ground-truth ROIs
        # ground-truth ROIs
        gt_inds = np.where(overlaps == 1)[0]
        if len(gt_inds) == 0:
            # Bail if the image has no ground-truth ROIs
            # 不存在gt ROI,返回空数组
            return np.zeros((rois.shape[0], 5), dtype=np.float32)
        # Indices of examples for which we try to make predictions
        # BBOX阈值,只有ROI与gt的重叠度大于阈值,这样的ROI才能用作bb回归的训练样本
        ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
    
        # Get IoU overlap between each ex ROI and gt ROI
        # 计算ex ROI and gt ROI的IoU
        ex_gt_overlaps = bbox_overlaps(
            # 变数据格式为float
            np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
            np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
    
        # Find which gt ROI each ex ROI has max overlap with:
        # this will be the ex ROI's gt target
        # 这里每一行代表一个ex_roi,列代表gt_roi,元素数值代表两者的IoU
        gt_assignment = ex_gt_overlaps.argmax(axis=1) #按行求最大,返回索引.
        gt_rois = rois[gt_inds[gt_assignment], :]  #每个ex_roi对应的gt_rois,与下面ex_roi数量相同
        ex_rois = rois[ex_inds, :]
    
        targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
        targets[ex_inds, 0] = labels[ex_inds]  #第一个元素是label
        targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)  #后4个元素是ex_box与gt_box的4个方位的偏移
        return targets
  • 相关阅读:
    CSS3中制作倒影box-reflect
    JS中==与===区别
    CSS3-Animation
    CSS3-transform
    CSS3-transition
    CSS盒模型-box-sizing
    css兼容处理-hack
    CSS3-rgba
    CSS3-文本-word-wrap,word-break,white-space
    CSS3-文本-text-shadow
  • 原文地址:https://www.cnblogs.com/alanma/p/6803713.html
Copyright © 2011-2022 走看看