zoukankan      html  css  js  c++  java
  • Pytorch版本yolov3源码阅读

    Pytorch版本yolov3源码阅读

    1. 阅读test.py

    1.1 参数解读

    parser = argparse.ArgumentParser()
    parser.add_argument('-batch_size', type=int, default=32, help='size of each image batch')
    parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file')
    parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='path to data config file')
    parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.pt', help='path to weights file')
    parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
    parser.add_argument('-iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
    parser.add_argument('-conf_thres', type=float, default=0.5, help='object confidence threshold')
    parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
    parser.add_argument('-n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')
    parser.add_argument('-img_size', type=int, default=608, help='size of each image dimension')
    opt = parser.parse_args()
    print(opt)
    
    • batch_size: 每个batch大小,跟darknet不太一样,没有subdivision
    • cfg: 网络配置文件
    • data_config_path: coco.data文件,存储相关信息
    • weights_path: 权重文件路径
    • class_path: 类别文件,注意类别的顺序,coco.names
    • iou_thres: iou阈值
    • conf_thres: 目标执行度阈值
    • nms_thres: 非极大抑制阈值
    • n_cpu: 实用多少个线程来创建batch
    • img_size: 设置初始图片大小

    1.2 data文件解析

    def parse_data_config(path):
        """Parses the data configuration file"""
        options = dict()
        options['gpus'] = '0,1'
        options['num_workers'] = '10'
        with open(path, 'r') as fp:
            lines = fp.readlines()
        for line in lines:
            line = line.strip()
            if line == '' or line.startswith('#'):
                continue
            key, value = line.split('=')
            options[key.strip()] = value.strip()
        return options
    

    将data文件中内容存储到options这个dict中,获取的时候就可以对这个对象通过key进行提取value。

    1.3 cfg文件解析

    def parse_model_config(path):
        """Parses the yolo-v3 layer configuration file and returns module definitions"""
        file = open(path, 'r')
        lines = file.read().split('
    ')
        lines = [x for x in lines if x and not x.startswith('#')]
        lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
        module_defs = []
        for line in lines:
            if line.startswith('['): # This marks the start of a new block
                module_defs.append({})
                module_defs[-1]['type'] = line[1:-1].rstrip()
                if module_defs[-1]['type'] == 'convolutional':
                    module_defs[-1]['batch_normalize'] = 0
            else:
                key, value = line.split("=")
                value = value.strip()
                module_defs[-1][key.rstrip()] = value.strip()
    
        return module_defs
    

    返回的module_defs存储的是所有的网络参数信息,一个list中套了很多个dict.

    1.4 根据cfg文件创建模块

    def create_modules(module_defs):
        """
        Constructs module list of layer blocks from module configuration in module_defs
        """
        #将第一层内容,也就是网络超参数设定
        hyperparams = module_defs.pop(0)
        
        output_filters = [int(hyperparams['channels'])]
    
        module_list = nn.ModuleList()
        for i, module_def in enumerate(module_defs):
            #一个时序容器。`Modules` 会以他们传入的顺序被添加到容器中。当然,也可以传入一个`OrderedDict`
            modules = nn.Sequential()
            #根据不同的层进行不同的设计
            if module_def['type'] == 'convolutional':
                bn = int(module_def['batch_normalize'])
                filters = int(module_def['filters'])
                kernel_size = int(module_def['size'])
                pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
                #将一个 `child module` 添加到当前 `modle`。 被添加的`module`可以通过 `name`属性来获取。
                modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
                                                            out_channels=filters,
                                                            kernel_size=kernel_size,
                                                            stride=int(module_def['stride']),
                                                            padding=pad,
                                                            bias=not bn))
                if bn:
                    modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
                if module_def['activation'] == 'leaky':
                    modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
    
            elif module_def['type'] == 'upsample':
                # pytorch中的上采样函数
                upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
                modules.add_module('upsample_%d' % i, upsample)
    
            elif module_def['type'] == 'route':
                # 对yolo cfg文件中的route层进行解析
                # eg: route -1, 14
                layers = [int(x) for x in module_def['layers'].split(',')]
                # 将多个层进行以sum的形式合并
                # 这个地方发现与darknet中不同,darknet中是以concate的方式进行的
                filters = sum([output_filters[layer_i] for layer_i in layers])
                modules.add_module('route_%d' % i, EmptyLayer())
    
            elif module_def['type'] == 'shortcut':
                # eg from yolov3.cfg
                # from=-3
                # activation = linear
                # 未定义activation方式???
                filters = output_filters[int(module_def['from'])]
                modules.add_module('shortcut_%d' % i, EmptyLayer())
    
            elif module_def['type'] == 'yolo':
                anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
                # Extract anchors
                anchors = [float(x) for x in module_def['anchors'].split(',')]
                anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
                anchors = [anchors[i] for i in anchor_idxs]
                num_classes = int(module_def['classes'])
                img_height = int(hyperparams['height'])
                # Define detection layer
                yolo_layer = YOLOLayer(anchors, num_classes, img_height, anchor_idxs)
                modules.add_module('yolo_%d' % i, yolo_layer)
    
            # Register module list and number of output filters
            # 将module添加到module_list中进行保存
            module_list.append(modules)
            output_filters.append(filters)
    
        return hyperparams, module_list
    

    这里开始就涉及到pytorch部分的内容了:

    • module_list = nn.ModuleList(): 创建一个list,其中存放的是module
    • nn.Sequential(): 一个时序容器。Modules 会以他们传入的顺序被添加到容器中。当然,也可以传入一个OrderedDict
    • add_module(name,module):将一个 child module 添加到当前 modle。 被添加的module可以通过 name属性来获取。

    1.5 YOLOLayer

    class YOLOLayer(nn.Module):
    
        def __init__(self, anchors, nC, img_dim, anchor_idxs):
            super(YOLOLayer, self).__init__()
    
            anchors = [(a_w, a_h) for a_w, a_h in anchors]  # (pixels)
            nA = len(anchors)
    
            self.anchors = anchors
            self.nA = nA  # number of anchors (3)
            self.nC = nC  # number of classes (80)
            self.bbox_attrs = 5 + nC
            self.img_dim = img_dim  # from hyperparams in cfg file, NOT from parser
    
            if anchor_idxs[0] == (nA * 2):  # 6
                stride = 32
            elif anchor_idxs[0] == nA:  # 3
                stride = 16
            else:
                stride = 8
    
            # Build anchor grids
            nG = int(self.img_dim / stride)
            self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()
            self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
            self.scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])
            self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))
            self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))
    
        def forward(self, p, targets=None, requestPrecision=False):
            FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor
    
            bs = p.shape[0]  # batch size
            nG = p.shape[2]  # number of grid points
            stride = self.img_dim / nG
    
            if p.is_cuda and not self.grid_x.is_cuda:
                self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
                self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()
    
            # p.view(12, 255, 13, 13) -- > (12, 3, 13, 13, 80)  # (bs, anchors, grid, grid, classes + xywh)
            p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction
    
            # Get outputs
            x = torch.sigmoid(p[..., 0])  # Center x
            y = torch.sigmoid(p[..., 1])  # Center y
    
            # Width and height (yolo method)
            w = p[..., 2]  # Width
            h = p[..., 3]  # Height
            width = torch.exp(w.data) * self.anchor_w
            height = torch.exp(h.data) * self.anchor_h
    
            # Width and height (power method)
            # w = torch.sigmoid(p[..., 2])  # Width
            # h = torch.sigmoid(p[..., 3])  # Height
            # width = ((w.data * 2) ** 2) * self.anchor_w
            # height = ((h.data * 2) ** 2) * self.anchor_h
    
            # Add offset and scale with anchors (in grid space, i.e. 0-13)
            pred_boxes = FT(bs, self.nA, nG, nG, 4)
            pred_conf = p[..., 4]  # Conf
            pred_cls = p[..., 5:]  # Class
    
            # Training
            if targets is not None:
                MSELoss = nn.MSELoss(size_average=True)
                BCEWithLogitsLoss = nn.BCEWithLogitsLoss(size_average=True)
                CrossEntropyLoss = nn.CrossEntropyLoss()
    
                if requestPrecision:
                    gx = self.grid_x[:, :, :nG, :nG]
                    gy = self.grid_y[:, :, :nG, :nG]
                    pred_boxes[..., 0] = x.data + gx - width / 2
                    pred_boxes[..., 1] = y.data + gy - height / 2
                    pred_boxes[..., 2] = x.data + gx + width / 2
                    pred_boxes[..., 3] = y.data + gy + height / 2
    
                tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = 
                    build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,
                                  requestPrecision)
                tcls = tcls[mask]
                if x.is_cuda:
                    tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda(), mask.cuda(), tcls.cuda()
    
                # Mask outputs to ignore non-existing objects (but keep confidence predictions)
                nT = sum([len(x) for x in targets])  # number of targets
                nM = mask.sum().float()  # number of anchors (assigned to targets)
                nB = len(targets)  # batch size
                k = nM / nB
                if nM > 0:
                    lx = k * MSELoss(x[mask], tx[mask])
                    ly = k * MSELoss(y[mask], ty[mask])
                    lw = k * MSELoss(w[mask], tw[mask])
                    lh = k * MSELoss(h[mask], th[mask])
    
                    # lconf = k * BCEWithLogitsLoss(pred_conf[mask], mask[mask].float())
                    lconf = k * BCEWithLogitsLoss(pred_conf, mask.float())
    
                    lcls = k * CrossEntropyLoss(pred_cls[mask], torch.argmax(tcls, 1))
                    # lcls = k * BCEWithLogitsLoss(pred_cls[mask], tcls.float())
                else:
                    lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])
    
                # Add confidence loss for background anchors (noobj)
                #lconf += k * BCEWithLogitsLoss(pred_conf[~mask], mask[~mask].float())
    
                # Sum loss components
                loss = lx + ly + lw + lh + lconf + lcls
    
                # Sum False Positives from unassigned anchors
                i = torch.sigmoid(pred_conf[~mask]) > 0.9
                if i.sum() > 0:
                    FP_classes = torch.argmax(pred_cls[~mask][i], 1)
                    FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu()  # extra FPs
                else:
                    FPe = torch.zeros(self.nC)
    
                return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), 
                       nT, TP, FP, FPe, FN, TC
    
            else:
                pred_boxes[..., 0] = x.data + self.grid_x
                pred_boxes[..., 1] = y.data + self.grid_y
                pred_boxes[..., 2] = width
                pred_boxes[..., 3] = height
    
                # If not in training phase return predictions
                output = torch.cat((pred_boxes.view(bs, -1, 4) * stride,
                                    torch.sigmoid(pred_conf.view(bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)
                return output.data
    

    暂且放到这里,之后在做解析

    1.6 初始化模型

    model = Darknet(opt.cfg, opt.img_size)

    转到定义:

    class Darknet(nn.Module):
        """YOLOv3 object detection model"""
    
        def __init__(self, config_path, img_size=416):
            super(Darknet, self).__init__()
            self.module_defs = parse_model_config(config_path)
            self.module_defs[0]['height'] = img_size
            self.hyperparams, self.module_list = create_modules(self.module_defs)
            self.img_size = img_size
            self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC']
    
        def forward(self, x, targets=None, requestPrecision=False):
            is_training = targets is not None
            output = []
            self.losses = defaultdict(float)
            layer_outputs = []
    
            for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
                if module_def['type'] in ['convolutional', 'upsample']:
                    x = module(x)
                elif module_def['type'] == 'route':
                    layer_i = [int(x) for x in module_def['layers'].split(',')]
                    x = torch.cat([layer_outputs[i] for i in layer_i], 1)
                elif module_def['type'] == 'shortcut':
                    layer_i = int(module_def['from'])
                    x = layer_outputs[-1] + layer_outputs[layer_i]
                elif module_def['type'] == 'yolo':
                    # Train phase: get loss
                    if is_training:
                        x, *losses = module[0](x, targets, requestPrecision)
                        for name, loss in zip(self.loss_names, losses):
                            self.losses[name] += loss
                    # Test phase: Get detections
                    else:
                        x = module(x)
                    output.append(x)
                layer_outputs.append(x)
    
            if is_training:
                self.losses['nT'] /= 3
                self.losses['TC'] /= 3
                metrics = torch.zeros(4, len(self.losses['FPe']))  # TP, FP, FN, target_count
    
                ui = np.unique(self.losses['TC'])[1:]
                for i in ui:
                    j = self.losses['TC'] == float(i)
                    metrics[0, i] = (self.losses['TP'][j] > 0).sum().float()  # TP
                    metrics[1, i] = (self.losses['FP'][j] > 0).sum().float()  # FP
                    metrics[2, i] = (self.losses['FN'][j] == 3).sum().float()  # FN
                metrics[3] = metrics.sum(0)
                metrics[1] += self.losses['FPe']
    
                self.losses['TP'] = metrics[0].sum()
                self.losses['FP'] = metrics[1].sum()
                self.losses['FN'] = metrics[2].sum()
                self.losses['TC'] = 0
                self.losses['metrics'] = metrics
    
            return sum(output) if is_training else torch.cat(output, 1)
    

    梳理一下属性值,以便更好理解:

    • module_def: dict类型,存储cfg文件中
    • hyperparams: 超参数,整个网络需要的参数被存储到改属性中
    • module_list:整个网络所有的模型加载到pytorch中的nn.ModuleList()
    • loss_names: 有必要理解一下这里的loss中参数的含义
      • loss
      • x,y,w,h
      • conf
      • cls
      • nT
      • TP,FP,FPe,FN,TC

    loss参数含义还不是很明白,留坑,待填坑

    1.7 加载权重

    都知道,pytorch版的yolov3权重文件是.pt结尾的,darknet版本的yolov3权重文件是.weights结尾的。

    所以得知了这个版本可以使用加载weights文件。

    # Load weights
    if opt.weights_path.endswith('.weights'):  # darknet format
        load_weights(model, opt.weights_path)
    elif opt.weights_path.endswith('.pt'):  # pytorch format
        checkpoint = torch.load(opt.weights_path, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        del checkpoint
    

    1.8 计算mAP

    print('Compute mAP...')
    
    correct = 0
    targets = None
    outputs, mAPs, TP, confidence, pred_class, target_class = [], [], [], [], [], []
    for batch_i, (imgs, targets) in enumerate(dataloader):
        imgs = imgs.to(device)
    
        with torch.no_grad():
            output = model(imgs)
            output = non_max_suppression(output, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres)
    
        # Compute average precision for each sample
        for sample_i in range(len(targets)):
            correct = []
    
            # Get labels for sample where width is not zero (dummies)
            annotations = targets[sample_i]
            # Extract detections
            detections = output[sample_i]
    
            if detections is None:
                # If there are no detections but there are annotations mask as zero AP
                if annotations.size(0) != 0:
                    mAPs.append(0)
                continue
    
            # Get detections sorted by decreasing confidence scores
            detections = detections[np.argsort(-detections[:, 4])]
    
            # If no annotations add number of detections as incorrect
            if annotations.size(0) == 0:
                target_cls = []
                #correct.extend([0 for _ in range(len(detections))])
                mAPs.append(0)
                continue
            else:
                target_cls = annotations[:, 0]
    
                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(annotations[:, 1:5])
                target_boxes *= opt.img_size
    
                detected = []
                for *pred_bbox, conf, obj_conf, obj_pred in detections:
    
                    pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                    # Compute iou with target boxes
                    iou = bbox_iou(pred_bbox, target_boxes)
                    # Extract index of largest overlap
                    best_i = np.argmax(iou)
                    # If overlap exceeds threshold and classification is correct mark as correct
                    if iou[best_i] > opt.iou_thres and obj_pred == annotations[best_i, 0] and best_i not in detected:
                        correct.append(1)
                        detected.append(best_i)
                    else:
                        correct.append(0)
    
            # Compute Average Precision (AP) per class
            AP = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=detections[:, 6], target_cls=target_cls)
    
            # Compute mean AP for this image
            mAP = AP.mean()
    
            # Append image mAP to list
            mAPs.append(mAP)
    
            # Print image mAP and running mean mAP
            print('+ Sample [%d/%d] AP: %.4f (%.4f)' % (len(mAPs), len(dataloader) * opt.batch_size, mAP, np.mean(mAPs)))
    
    print('Mean Average Precision: %.4f' % np.mean(mAPs))
    

    留坑,待填

    2. 阅读train.py

    2.1 参数解读

    parser = argparse.ArgumentParser()
    parser.add_argument('-epochs', type=int, default=68, help='number of epochs')
    parser.add_argument('-batch_size', type=int, default=12, help='size of each image batch')
    parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='data config file path')
    parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
    parser.add_argument('-resume', default=False, help='resume training flag')
    opt = parser.parse_args()
    print(opt)
    
    • epochs 设置循环的参数
    • batch_size: 设置batch
    • data_config_path: data文件位置
    • cfg: 记录cfg文件的位置
    • img_size: 设置图片大小
    • resume: 是否恢复训练(True or False)

    2.2 随机初始化

    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(0)
    if cuda:
        torch.cuda.manual_seed(0)
        torch.cuda.manual_seed_all(0)
        torch.backends.cudnn.benchmark = True
    

    2.3 设置优化器

    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3,momentum=.9, weight_decay=5e-4, nesterov=True)
    

    使用SGD优化器,learning_rate=0.001,momentum=0.9,weight_decay=5e-4,使用nesterov动量

    2.4 更新优化器

    根据当前epoch来确定使用哪一个lr:

            # Update scheduler (automatic)
            # scheduler.step()
    
            # Update scheduler (manual)
            if epoch < 54:
                lr = 1e-3
            elif epoch < 61:
                lr = 1e-4
            else:
                lr = 1e-5
            for g in optimizer.param_groups:
                g['lr'] = lr
    

    可以自动更新参数,也可以手工更新参数。

    2.5 loss指标

    • mean_precision:
                # Precision
                precision = metrics[0] / (metrics[0] + metrics[1] + 1e-16)
                k = (metrics[0] + metrics[1]) > 0
                if k.sum() > 0:
                    mean_precision = precision[k].mean()
                else:
                    mean_precision = 0
    
    • mean_recall:
                # Recall
                recall = metrics[0] / (metrics[0] + metrics[2] + 1e-16)
                k = (metrics[0] + metrics[2]) > 0
                if k.sum() > 0:
                    mean_recall = recall[k].mean()
                else:
                    mean_recall = 0
    

    然后将所有指标写到results.txt文件中

    2.6 checkpoint相关

    checkpoint参数:epoch, best_loss,model,optimizer

    latest.pt: 最新的权重文件

    best.pt: 当前最好的权重文件

            # Save latest checkpoint
            checkpoint = {'epoch': epoch,
                          'best_loss': best_loss,
                          'model': model.state_dict(),
                          'optimizer': optimizer.state_dict()}
            torch.save(checkpoint, 'checkpoints/latest.pt')
    
            # Save best checkpoint
            if best_loss == loss_per_target:
                os.system('cp checkpoints/latest.pt checkpoints/best.pt')
    
            # Save backup checkpoint
            if (epoch > 0) & (epoch % 5 == 0):
                os.system('cp checkpoints/latest.pt checkpoints/backup' + str(epoch) + '.pt')
    

    3. 阅读detect.py

    3.1 参数解读

    parser.add_argument('-image_folder', type=str, default='data/samples', help='path to images')
    parser.add_argument('-output_folder', type=str, default='output', help='path to outputs')
    parser.add_argument('-plot_flag', type=bool, default=True)
    parser.add_argument('-txt_out', type=bool, default=False)
    parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
    parser.add_argument('-conf_thres', type=float, default=0.50, help='object confidence threshold')
    parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
    parser.add_argument('-batch_size', type=int, default=1, help='size of the batches')
    parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
    opt = parser.parse_args()
    print(opt)
    
    • image_folder: data/samples, 待检测的图片的文件夹
    • output_folder: output,结果输出文件
    • plot_flag: True or False, 添加bbox, 保存图片
    • txt_out: True or False, 是否保存图片检测结果
    • cfg: cfg文件路径
    • class_path: 类别名称文件位置
    • conf_thres, nms_thres: 目标检测置信度,非极大抑制阈值
    • batch_size: 一般设置为1,选用默认的即可
    • img_size: 设置加载图片时候的图片大小

    3.2 预测框的获取

            # Get detections
            with torch.no_grad():
                chip = torch.from_numpy(img).unsqueeze(0).to(device)
                pred = model(chip)
                pred = pred[pred[:, :, 4] > opt.conf_thres]
    
                if len(pred) > 0:
                    detections = non_max_suppression(pred.unsqueeze(0), opt.conf_thres, opt.nms_thres)
                    img_detections.extend(detections)
                    imgs.extend(img_paths)
    

    获取预测框,非极大值抑制。

    3.2 核心-迭代图片画出预测框

    # Iterate through images and save plot of detections
        for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
            print("image %g: '%s'" % (img_i, path))
    
            if opt.plot_flag:
                img = cv2.imread(path)
    
            # The amount of padding that was added
            pad_x = max(img.shape[0] - img.shape[1], 0) * (opt.img_size / max(img.shape))
            pad_y = max(img.shape[1] - img.shape[0], 0) * (opt.img_size / max(img.shape))
            # Image height and width after padding is removed
            unpad_h = opt.img_size - pad_y
            unpad_w = opt.img_size - pad_x
    
            # Draw bounding boxes and labels of detections
            if detections is not None:
                unique_classes = detections[:, -1].cpu().unique()
                bbox_colors = random.sample(color_list, len(unique_classes))
    
                # write results to .txt file
                results_img_path = os.path.join(opt.output_folder, path.split('/')[-1])
                results_txt_path = results_img_path + '.txt'
                if os.path.isfile(results_txt_path):
                    os.remove(results_txt_path)
    
                for i in unique_classes:
                    n = (detections[:, -1].cpu() == i).sum()
                    print('%g %ss' % (n, classes[int(i)]))
    
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                    # Rescale coordinates to original dimensions
                    box_h = ((y2 - y1) / unpad_h) * img.shape[0]
                    box_w = ((x2 - x1) / unpad_w) * img.shape[1]
                    y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()
                    x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()
                    x2 = (x1 + box_w).round().item()
                    y2 = (y1 + box_h).round().item()
                    x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)
    
                    # write to file
                    if opt.txt_out:
                        with open(results_txt_path, 'a') as file:
                            file.write(('%g %g %g %g %g %g 
    ') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))
    
                    if opt.plot_flag:
                        # Add the bbox to the plot
                        label = '%s %.2f' % (classes[int(cls_pred)], conf)
                        color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
                        plot_one_box([x1, y1, x2, y2], img, label=label, color=color)
    
            if opt.plot_flag:
                # Save generated image with detections
                cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)
    
  • 相关阅读:
    给我30000出租车,还你一个不堵车的北京
    使用vim代替IDE
    (转)声明,函数与函数指针
    想恶作剧的请看过来
    bash命令提示符的更改
    (转)微软面试
    140个Google面试问题
    UTF8 GBK UTF8 GB2312 之间的区别和关系(转)
    MyBooksReadingStatic
    让SlickEdit 自动编译Keil C51工程
  • 原文地址:https://www.cnblogs.com/pprp/p/10152698.html
Copyright © 2011-2022 走看看