zoukankan      html  css  js  c++  java
  • few-shot-learning for object detection

    github  https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

    train.py

    from __future__ import print_function
    import sys
    
    import time
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim
    import torch.backends.cudnn as cudnn
    from torchvision import datasets, transforms
    from torch.autograd import Variable
    from tqdm import tqdm
    
    import dataset
    import random
    import math
    import os
    from utils import *
    from cfg import parse_cfg, cfg
    from darknet import Darknet
    import pdb
    
    # Training settings
    # datacfg = sys.argv[1]
    # darknetcfg = parse_cfg(sys.argv[2])
    # learnetcfg = parse_cfg(sys.argv[3])
    
    datacfg = "cfg/fewyolov3_voc.data"
    darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
    learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
    weightfile = "tmp/000050.weights"
    if len(sys.argv) == 5:
        weightfile = sys.argv[4]
    
    data_options = read_data_cfg(datacfg)
    net_options = darknetcfg[0]
    meta_options = learnetcfg[0]
    
    # Configure options
    cfg.config_data(data_options)
    cfg.config_meta(meta_options)
    cfg.config_net(net_options)
    
    # Parameters
    metadict = data_options['meta']
    trainlist = data_options['train']
    
    testlist = data_options['valid']
    backupdir = data_options['backup']
    gpus = data_options['gpus']  # e.g. 0,1,2,3
    ngpus = len(gpus.split(','))
    num_workers = int(data_options['num_workers'])
    
    batch_size = int(net_options['batch'])
    print("batch_size:",batch_size)
    max_batches = int(net_options['max_batches'])
    learning_rate = float(data_options['learning_rate'])
    momentum = float(net_options['momentum'])
    decay = float(net_options['decay'])
    steps = [float(step) for step in data_options['steps'].split(',')]
    scales = [float(scale) for scale in data_options['scales'].split(',')]
    
    # Train parameters
    use_cuda = True
    seed = int(time.time())
    
    ## --------------------------------------------------------------------------
    ## MAIN
    backupdir = cfg.backup
    print('logging to ' + backupdir)
    if not os.path.exists(backupdir):
        os.makedirs(backupdir)
    
    torch.manual_seed(seed)
    if use_cuda:
        os.environ['CUDA_VISIBLE_DEVICES'] = gpus
        torch.cuda.manual_seed(seed)
    
    model = Darknet(darknetcfg, learnetcfg)
    region_loss = model.loss
    
    model.print_network()
    # if len(sys.argv) == 5:
    model.load_weights(weightfile)
    
    ###################################################
    ### Meta-model parameters
    region_loss.seen = model.seen
    processed_batches = 0 if cfg.tuning else model.seen / batch_size
    trainlist = dataset.build_dataset(data_options)
    nsamples = len(trainlist)
    init_width = model.width
    init_height = model.height
    init_epoch = 0 if cfg.tuning else model.seen / nsamples
    max_epochs = max_batches * batch_size / nsamples + 1
    max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
    print(cfg.repeat, nsamples, max_batches, batch_size)
    print(num_workers)
    
    kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
    
    if use_cuda:
        if ngpus > 1:
            model = torch.nn.DataParallel(model).cuda()
        else:
            model = model.cuda()
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    
    def adjust_learning_rate(optimizer, processed_batches):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        lr = learning_rate
        for i in range(len(steps)):
            scale = scales[i] if i < len(scales) else 1
            if processed_batches >= steps[i]:
                lr = lr * scale
                if processed_batches == steps[i]:
                    break
            else:
                break
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return lr
    
    
    def train(epoch):
        global processed_batches
        t0 = time.time()
        if ngpus > 1:
            cur_model = model.module
        else:
            cur_model = model
    
        train_loader = torch.utils.data.DataLoader(
            dataset.listDataset(trainlist, shape=(init_width, init_height),
                                shuffle=False,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                ]),
                                train=True,
                                seen=cur_model.seen,
                                batch_size=batch_size,
                                num_workers=num_workers),
            batch_size=batch_size, shuffle=False, **kwargs)
    
        metaset = dataset.MetaDataset(metafiles=metadict, train=True)
        metaloader = torch.utils.data.DataLoader(
            metaset,
            batch_size=metaset.batch_size,
            shuffle=False,
            num_workers=num_workers,
            pin_memory=True
        )
        metaloader = iter(metaloader)
    
        lr = adjust_learning_rate(optimizer, processed_batches)
        logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))
    
        model.train()
        t1 = time.time()
        avg_time = torch.zeros(9)
        with tqdm(total=train_loader.__len__()) as t:
    
            for batch_idx, (data, target) in enumerate(train_loader):
                metax, mask = metaloader.next()
                t2 = time.time()
                adjust_learning_rate(optimizer, processed_batches)
                processed_batches = processed_batches + 1
                if use_cuda:
                    data = data.cuda()
                    metax = metax.cuda()
                    mask = mask.cuda()
                    # target= target.cuda()
                t3 = time.time()
                data, target = Variable(data), Variable(target)
                metax, mask = Variable(metax), Variable(mask)
                t4 = time.time()
                optimizer.zero_grad()
                t5 = time.time()
                output = model(data, metax, mask)
                t6 = time.time()
                region_loss.seen = region_loss.seen + data.data.size(0)
                cur_model.seen = region_loss.seen
                region_loss.input_size = (data.data.size(2), data.data.size(3))
                loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
                t.set_description('Epoch %d' % epoch)
                t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
                              cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
                t.update()
    
                t7 = time.time()
                loss.backward()
                t8 = time.time()
                optimizer.step()
                t9 = time.time()
                if False and batch_idx > 1:
                    avg_time[0] = avg_time[0] + (t2 - t1)
                    avg_time[1] = avg_time[1] + (t3 - t2)
                    avg_time[2] = avg_time[2] + (t4 - t3)
                    avg_time[3] = avg_time[3] + (t5 - t4)
                    avg_time[4] = avg_time[4] + (t6 - t5)
                    avg_time[5] = avg_time[5] + (t7 - t6)
                    avg_time[6] = avg_time[6] + (t8 - t7)
                    avg_time[7] = avg_time[7] + (t9 - t8)
                    avg_time[8] = avg_time[8] + (t9 - t1)
                    print('-------------------------------')
                    print('       load data : %f' % (avg_time[0] / (batch_idx)))
                    print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
                    print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
                    print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
                    print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
                    print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
                    print('        backward : %f' % (avg_time[6] / (batch_idx)))
                    print('            step : %f' % (avg_time[7] / (batch_idx)))
                    print('           total : %f' % (avg_time[8] / (batch_idx)))
                t1 = time.time()
            print('')
            t1 = time.time()
            logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))
    
            if (epoch + 1) % cfg.save_interval == 0:
                logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
                cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))
    
    init_epoch = int(init_epoch)
    max_epochs = int(max_epochs)
    print("init_epoch:",init_epoch)
    print("max_epochs:",max_epochs)
    for epoch in range(init_epoch, max_epochs):
        train(epoch)
    

    region_loss.py

    import time
    import torch
    import math
    import torch.nn as nn
    import torch.nn.functional as F
    import numpy as np
    from torch.autograd import Variable
    from utils import *
    from cfg import cfg
    from numbers import Number
    from random import random, randint
    import pdb
    
    
    def neg_filter(pred_boxes, target, withids=False):
        assert pred_boxes.size(0) == target.size(0)
        if cfg.neg_ratio == 'full':
            inds = list(range(pred_boxes.size(0)))
        elif isinstance(cfg.neg_ratio, Number):
            flags = torch.sum(target, 1) != 0
            flags = flags.cpu().data.tolist()
            ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
            if ratio >= 1:
                inds = list(range(pred_boxes.size(0)))
            else:
                flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
                inds = np.argwhere(flags).squeeze()
                pred_boxes, target = pred_boxes[inds], target[inds]
        else:
            raise NotImplementedError('neg_ratio not recognized')
        if withids:
            return pred_boxes, target, inds
        else:
            return pred_boxes, target
    
    
    def neg_filter_v2(pred_boxes, target, withids=False):
        assert pred_boxes.size(0) == target.size(0)
        if cfg.neg_ratio == 'full':
            inds = list(range(pred_boxes.size(0)))
        elif isinstance(cfg.neg_ratio, Number):
            flags = torch.sum(target, 1) != 0
            flags = flags.cpu().data.tolist()
            ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
            if ratio >= 1:
                inds = list(range(pred_boxes.size(0)))
            else:
                flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
                if sum(flags) == 0:
                    flags[randint(0, len(flags) - 1)] = 1
                inds = np.nonzero(flags)[0]
                pred_boxes, target = pred_boxes[inds], target[inds]
        else:
            raise NotImplementedError('neg_ratio not recognized')
        if withids:
            return pred_boxes, target, inds
        else:
            return pred_boxes, target
    
    
    def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
        nB = target.size(0)
        nA = num_anchors
        # print('anchor_step: ', anchor_step)
        obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
        noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
        tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
        iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    
        tboxes = target.view(-1, 5)
        nonzero_ind = tboxes[:, 3] > 0
        tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
        ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
        ind_B = ind_B[nonzero_ind]
        gx = (tboxes[:, 1] * feature_size[1]).float()
        gy = (tboxes[:, 2] * feature_size[0]).float()
        gw = (tboxes[:, 3] * input_size[1]).float()
        gh = (tboxes[:, 4] * input_size[0]).float()
        aw = anchors[:, 0]
        ah = anchors[:, 1]
        nbox = tboxes.size(0)
        gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
        anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
        ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
        best_ious, best_a = ious.max(1)
        gj = gy.long()
        gi = gx.long()
        obj_mask[ind_B, best_a, gj, gi] = 1
        noobj_mask[ind_B, best_a, gj, gi] = 0
    
        for i, iou in enumerate(ious):
            if (iou > ignore_thresh).sum():
                noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0
    
        tx[ind_B, best_a, gj, gi] = gx - gx.floor()
        ty[ind_B, best_a, gj, gi] = gy - gy.floor()
        tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
        th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
        tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
        tconf = obj_mask.float()
        pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
        conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
        target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
                                  (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
                                  gw.unsqueeze(0),
                                  gh.unsqueeze(0)], 0)
    
        iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
        conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
        detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
        detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50
    
        return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75
    
    
    class RegionLoss(nn.Module):
        def __init__(self, num_classes=0, anchors=[], num_anchors=1):
            super(RegionLoss, self).__init__()
            self.num_classes = num_classes
            self.anchors = anchors
            self.num_anchors = num_anchors
            self.anchor_step = len(anchors) / num_anchors
            self.coord_scale = 1
            self.noobject_scale = 1
            self.object_scale = 5
            self.class_scale = 1
            self.thresh = 0.6
            self.seen = 0
    
        def forward(self, output, target):
            # import pdb; pdb.set_trace()
            # output : BxAs*(4+1+num_classes)*H*W
    
            # if target.dim() == 3:
            #     # target : B * n_cls * l
            #     l = target.size(-1)
            #     target = target.permute(1,0,2).contiguous().view(-1, l)
            if target.dim() == 3:
                target = target.view(-1, target.size(-1))
            bef = target.size(0)
            output, target = neg_filter(output, target)
            # print("{}/{}".format(target.size(0), bef))
    
            t0 = time.time()
            nB = output.data.size(0)
            nA = self.num_anchors
            nC = self.num_classes
            nH = output.data.size(2)
            nW = output.data.size(3)
    
            output = output.view(nB, nA, (5 + nC), nH, nW)
            x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
            y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
            w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
            h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
            conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
            # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
            cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
            cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
    
            t1 = time.time()
    
            pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
            grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
            grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
            anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
            anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
            anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
            anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
            pred_boxes[0] = x.data + grid_x
            pred_boxes[1] = y.data + grid_y
            pred_boxes[2] = torch.exp(w.data) * anchor_w
            pred_boxes[3] = torch.exp(h.data) * anchor_h
            pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
            t2 = time.time()
    
            nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
                                                                                                        target.data,
                                                                                                        self.anchors, nA,
                                                                                                        nC, 
                                                                                                        nH, nW,
                                                                                                        self.noobject_scale,
                                                                                                        self.object_scale,
                                                                                                        self.thresh,
                                                                                                        self.seen)
            cls_mask = (cls_mask == 1)
            if cfg.metayolo:
                tcls.zero_()
            nProposals = int((conf > 0.25).float().sum().data[0])
    
            tx = Variable(tx.cuda())
            ty = Variable(ty.cuda())
            tw = Variable(tw.cuda())
            th = Variable(th.cuda())
            tconf = Variable(tconf.cuda())
            tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
    
            coord_mask = Variable(coord_mask.cuda())
            conf_mask = Variable(conf_mask.cuda().sqrt())
            cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
            cls = cls[cls_mask].view(-1, nC)
    
            t3 = time.time()
    
            loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
            loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
            loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
            loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
            loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
            loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            t4 = time.time()
            if False:
                print('-----------------------------------')
                print('        activation : %f' % (t1 - t0))
                print(' create pred_boxes : %f' % (t2 - t1))
                print('     build targets : %f' % (t3 - t2))
                print('       create loss : %f' % (t4 - t3))
                print('             total : %f' % (t4 - t0))
            print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
                self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
                loss_conf.data[0], loss_cls.data[0], loss.data[0]))
            return loss
    
    
    class RegionLossV2(nn.Module):
        """
        Yolo region loss + Softmax classification across meta-inputs
        """
    
        def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
            super(RegionLossV2, self).__init__()
            self.num_classes = num_classes
            self.anchors = anchors
            self.num_anchors = num_anchors
            self.coord_scale = 1
            self.class_scale = 1
            self.obj_scale = 1
            self.noobj_scale = 100
            self.thresh = 0.5
            self.seen = 0
            self.input_size = input_size
            self.feature_scale = [32, 16, 8]
            print('class_scale', self.class_scale)
    
        def forward(self, output, target):
            # output : (bs*cs, nA*(5+1), N)
            # target : (bs, cs, 50*5)
            # Get all classification prediction
            # pdb.set_trace()
            bs = target.size(0)
            cs = target.size(1)
            nA = self.num_anchors
            nC = self.num_classes
            N = output.data.size(2)
            # feature_size = [[26, 26], [52, 52], [104, 104]]
            cls = output.view(output.size(0), nA, (5 + nC), N)
            cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
            cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
            cls_conf = F.softmax(cls, 1)
            _, cls_max_ids = torch.max(cls_conf, 1)
            cls_max_ids = cls_max_ids.data
            pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
            pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
            pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)
    
            # Rearrange target and perform filtering operation
            target = target.view(-1, target.size(-1))
            # bef = target.size(0)
            output, target, inds = neg_filter_v2(output, target, withids=True)
            counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
            # print("{}/{}".format(target.size(0), bef))
            pre_cls_mask = pre_cls_mask[inds]
    
            t0 = time.time()
            nB = output.data.size(0)
    
            output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)
            x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)
            y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
            w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
            h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
            conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
            # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
            # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
            # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
            t1 = time.time()
    
            pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
            grid_x = []
            grid_y = []
            anchor_w = []
            anchor_h = []
            scale = []
            feature_size = []
            for fs in self.feature_scale:
                feature_h = self.input_size[0] // fs
                feature_w = self.input_size[1] // fs
                # print("feature_h:",feature_h)
                # print("feature_w:",feature_w)
                feature_size.append([feature_h, feature_w])
                grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) 
                              .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
                grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() 
                              .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
                scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
            grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)
            grid_y = torch.cat(grid_y, 2)
            scale = torch.cat(scale, 2)
            for i in range(3):
                aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                    .index_select(1, torch.LongTensor([0])).cuda()
                ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) 
                    .index_select(1, torch.LongTensor([1])).cuda()
                anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                                .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
                anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) 
                                .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
            anchor_w = torch.cat(anchor_w, 2)
            anchor_h = torch.cat(anchor_h, 2)
            pred_boxes[0] = (x.data + grid_x) * scale
            pred_boxes[1] = (y.data + grid_y) * scale
            pred_boxes[2] = torch.exp(w.data) * anchor_w
            pred_boxes[3] = torch.exp(h.data) * anchor_h
            pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)
            t2 = time.time()
            nGT = 0
            iou_scores = []
            obj_mask = []
            noobj_mask = []
            tx = []
            ty = []
            tw = []
            th = []
            tconf = []
            tcls = []
            start_N = 0
            detected50 = torch.zeros(0)
            detected75 = torch.zeros(0)
            for imap in range(3):
                nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, 
                tcls_temp, detected50_temp, detected75_temp = build_targets(
                    pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
                    target.data.cuda(),
                    conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
                    torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
                    nA,
                    feature_size[imap],
                    self.input_size,
                    self.thresh)
                if not len(detected50):
                    detected50 = torch.zeros(nGT).cuda()
                if not len(detected75):
                    detected75 = torch.zeros(nGT).cuda()
                detected50 += detected50_temp
                detected75 += detected75_temp
                start_N += feature_size[imap][0] * feature_size[imap][1]
                iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
                tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
    
            iou_scores = torch.cat(iou_scores, 2)
            obj_mask = torch.cat(obj_mask, 2)
            noobj_mask = torch.cat(noobj_mask, 2)
            tx = torch.cat(tx, 2)
            ty = torch.cat(ty, 2)
            tw = torch.cat(tw, 2)
            th = torch.cat(th, 2)
            tconf = torch.cat(tconf, 2)
            tcls = torch.cat(tcls, 2)
    
            # Take care of class mask
            idx_start = 0
            cls_mask_list = []
            tcls_list = []
            for i in range(len(counts)):
                if counts[i] == 0:
                    cur_mask = torch.zeros(nA, N).cuda()
                    cur_tcls = torch.zeros(nA, N).cuda()
                else:
                    cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
                    cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
                cls_mask_list.append(cur_mask)
                tcls_list.append(cur_tcls)
                idx_start += counts[i]
            cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)
            tcls = torch.stack(tcls_list)
    
            cls_mask = (cls_mask == 1)
            conf50 = (conf > 0.5).float().data
            iou50 = (iou_scores > 0.5).float()
            detected_mask = conf50 * tconf
            precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
            detected50 = (detected50 > 0).float()
            detected75 = (detected75 > 0).float()
            recall50 = detected50.sum() / (nGT + 1e-16)
            recall75 = detected75.sum() / (nGT + 1e-16)
            nProposals = int((conf > 0.25).float().sum().item())
            tx = Variable(tx)
            ty = Variable(ty)
            tw = Variable(tw)
            th = Variable(th)
            tconf = Variable(tconf)
    
            obj_mask = Variable(obj_mask.bool())
            noobj_mask = Variable(noobj_mask.bool())
            # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
            cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
            cls_max_ids = cls_max_ids[cls_mask.view(-1)]
            tcls = Variable(tcls[cls_mask].long())
            cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)
    
            ClassificationLoss = nn.CrossEntropyLoss()
            MseLoss = nn.MSELoss()
            BceLoss = nn.BCELoss()
    
            t3 = time.time()
    
            loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
            loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
            loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
            loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
            loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
            loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
            if len(cls):
                loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
            else:
                loss_cls = Variable(torch.Tensor([0]).float().cuda())
    
            # # pdb.set_trace()
            # ids = [9,11,12,16]
            # new_cls, new_tcls = select_classes(cls, tcls, ids)
            # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
            # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
            # loss_cls_new *= 10
            # loss_cls += loss_cls_new
    
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
            t4 = time.time()
            if False:
                print('-----------------------------------')
                print('        activation : %f' % (t1 - t0))
                print(' create pred_boxes : %f' % (t2 - t1))
                print('     build targets : %f' % (t3 - t2))
                print('       create loss : %f' % (t4 - t3))
                print('             total : %f' % (t4 - t0))
            # print(
            #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % 
            #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), 
            #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
            # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
            return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals
    
    
    def select_classes(pred, tgt, ids):
        # convert tgt to numpy
        tgt = tgt.cpu().data.numpy()
        new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
        new_tgt = np.max(np.stack(new_tgt), axis=0)
        idxes = np.argwhere(new_tgt > 0).squeeze()
        new_pred = pred[idxes]
        new_pred = new_pred[:, ids]
        new_tgt = new_tgt[idxes]
        return new_pred, new_tgt
    

      

    多思考也是一种努力,做出正确的分析和选择,因为我们的时间和精力都有限,所以把时间花在更有价值的地方。
  • 相关阅读:
    第七、八章学习笔记
    sort
    团队作业
    Linux C语言编程
    myod
    第十章学习笔记
    团队作业二—《需求分析》
    学习笔记4
    学习笔记1
    学习笔记2
  • 原文地址:https://www.cnblogs.com/LiuXinyu12378/p/14821803.html
Copyright © 2011-2022 走看看