zoukankan      html  css  js  c++  java
  • 语义分割-地表建筑物识别

    语义分割-地表建筑物识别

    赛题和数据下载:零基础入门语义分割-地表建筑物识别-天池大赛-阿里云天池 (aliyun.com)

    实验记录

    1.赛题理解与baseline

    1. backbone代码

    main.py

    import numpy as np
    import pandas as pd
    import os
    import numba, cv2,time
    from tqdm import tqdm
    import matplotlib.pyplot as plt
    import warnings
    warnings.filterwarnings('ignore')
    import albumentations as A
    import torch
    import torch.nn as nn
    import torch.utils.data as D
    import torchvision
    from rle import rle_encode,rle_decode
    from Tianchidataset import TianChiDataset
    from loss import loss_fn
    import argparse
    from torchvision import transforms as T
    
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    def get_model():
        model = torchvision.models.segmentation.fcn_resnet101(True)
    
        #     pth = torch.load("../input/pretrain-coco-weights-pytorch/fcn_resnet50_coco-1167a1af.pth")
        #     for key in ["aux_classifier.0.weight", "aux_classifier.1.weight", "aux_classifier.1.bias", "aux_classifier.1.running_mean", "aux_classifier.1.running_var", "aux_classifier.1.num_batches_tracked", "aux_classifier.4.weight", "aux_classifier.4.bias"]:
        #         del pth[key]
    
        model.classifier[4] = nn.Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))
        return model
    
    
    @torch.no_grad()
    def validation(model, loader, loss_fn):
        losses = []
        model.eval()
        for image, target in loader:
            image, target = image.to(DEVICE), target.float().to(DEVICE)
            output = model(image)['out']
            loss = loss_fn(output, target)
            losses.append(loss.item())
    
        return np.array(losses).mean()
    
    def parse_args():
        parser = argparse.ArgumentParser(description='Train semantic segmentation network')
        parser.add_argument('--modelDir',
                            help='saved model path name',
                            default="./checkpoints/model_best.pth",
                            type=str)
        parser.add_argument('--data_path',
                            help='dataset path',
                            default='/home/dzh/Desktop/data/dataset/segmentation/tianchi',
                            type=str)
        parser.add_argument('--epoch',
                            help='total train epoch num',
                            default=30,
                            type=int)
        parser.add_argument('--batch_size',
                            help='total train epoch num',
                            default=160,
                            type=int)
        parser.add_argument('--image_size',
                            help='total train epoch num',
                            default=256,
                            type=int)
        parser.add_argument('--gpu_ids',
                            help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU',
                            default=[0,1,2,3],
                            type=str)
        args=parser.parse_args()
        return args
    
    
    def main():
        args = parse_args()
        #--------------------------加载数据及数据增强----------------------------
        train_mask = pd.read_csv(os.path.join(args.data_path,'train_mask.csv'), sep='	', names=['name', 'mask'])
        train_mask['name'] = train_mask['name'].apply(lambda x: os.path.join(args.data_path,'train/') + x)
        mask = rle_decode(train_mask['mask'].iloc[0])
        print(rle_encode(mask) == train_mask['mask'].iloc[0])
    
        trfm = A.Compose([
            A.Resize(args.image_size, args.image_size),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(),
        ])
        dataset = TianChiDataset(
            train_mask['name'].values,
            train_mask['mask'].fillna('').values,
            trfm, False
        )
        valid_idx, train_idx = [], []
        for i in range(len(dataset)):
            if i % 7 == 0:
                valid_idx.append(i)
            #     else:
            elif i % 7 == 1:
                train_idx.append(i)
    
        train_ds = D.Subset(dataset, train_idx)
        valid_ds = D.Subset(dataset, valid_idx)
        # define training and validation data loaders
        loader = D.DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=0)
        vloader = D.DataLoader(valid_ds, batch_size=args.batch_size, shuffle=False, num_workers=0)
    
        #----------------------------加载模型及优化器------------------------------------
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
    
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-3)
        train_loss = []
        if os.path.exists(args.modelDir):
            checkpoint=torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            if 'epoch' in checkpoint:
                start_epoch=checkpoint['epoch']
            if 'optimizer' in checkpoint:
                optimizer.load_state_dict(checkpoint['optimizer'])
            if 'train_loss' in checkpoint:
                train_loss = checkpoint['train_loss']
            print("load model from {}".format(args.modelDir))
        else:
            start_epoch = 0
            print("==> no checkpoint found at '{}'".format(args.modelDir))
    
        # ----------------------------训练-----------------------------------
        header = r'''
                Train | Valid
        Epoch |  Loss |  Loss | Time, m
        '''
        #          Epoch         metrics            time
        raw_line = '{:6d}' + 'u2502{:7.3f}' * 2 + 'u2502{:6.2f}'
        print(header)
        best_loss = 10
    
        for epoch in range(start_epoch, args.epoch):
            losses = []
            start_time = time.time()
            model.train()
            for image, target in tqdm(loader):
                image, target = image.to(DEVICE), target.float().to(DEVICE)
                optimizer.zero_grad()
                output = model(image)['out']
                loss = loss_fn(output, target)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
                # print(loss.item())
    
            vloss = validation(model, vloader, loss_fn)
            print(raw_line.format(epoch, np.array(losses).mean(), vloss,(time.time() - start_time) / 60 ** 1))
            train_loss.append(np.array(losses).mean())
            if vloss < best_loss:
                best_loss = vloss
                state={
                    'epoch':epoch,
                    'state_dict':model.state_dict(),
                    'optimizer':optimizer.state_dict(),
                    'train_loss':train_loss
    
                }
                torch.save(state,args.modelDir)
    
        plt.figure(figsize=(10, 5))
        plt.title("Loss During Training")
        plt.plot(train_loss, label="loss")
        plt.xlabel("iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
        plt.savefig('./loss.png')
        #--------------------------------验证-----------------------------------
    def valid():
        args = parse_args()
        trfm = T.Compose([
            T.ToPILImage(),
            T.Resize(args.image_size),
            T.ToTensor(),
            T.Normalize([0.625, 0.448, 0.688],
                        [0.131, 0.177, 0.101]),
        ])
        subm = []
        model = get_model()
        model.to(DEVICE)
        model = torch.nn.DataParallel(model, device_ids=args.gpu_ids, output_device=0)
        if os.path.exists(args.modelDir):
            checkpoint = torch.load(args.modelDir)
            model.load_state_dict(checkpoint['state_dict'])
            print("load model from {}".format(args.modelDir))
        model.eval()
        test_mask = pd.read_csv(os.path.join(args.data_path,'test_a_samplesubmit.csv'), sep='	', names=['name', 'mask'])
        test_mask['name'] = test_mask['name'].apply(lambda x: os.path.join(args.data_path,'test_a/') + x)
    
        for idx, name in enumerate(tqdm(test_mask['name'].iloc[:])):
            image = cv2.imread(name)
            image = trfm(image)
            with torch.no_grad():
                image = image.to(DEVICE)[None]
                score = model(image)['out'][0][0]
                score_sigmoid = score.sigmoid().cpu().numpy()
                score_sigmoid = (score_sigmoid > 0.5).astype(np.uint8)
                score_sigmoid = cv2.resize(score_sigmoid, (512, 512))
                # break
            subm.append([name.split('/')[-1], rle_encode(score_sigmoid)])
        subm = pd.DataFrame(subm)
        subm.to_csv('./tmp.csv', index=None, header=None, sep='	')
        # plt.imsave('./output.png',rle_decode(subm[1].fillna('').iloc[0]), cmap='gray')
    
    if __name__ == '__main__':
    
        main()
        valid()
    

    Tianchidataset.py

    import torch.utils.data as D
    import cv2
    from torchvision import transforms as T
    from rle import rle_decode
    IMAGE_SIZE = 256
    class TianChiDataset(D.Dataset):
        def __init__(self, paths, rles, transform, test_mode=False):
            self.paths = paths
            self.rles = rles
            self.transform = transform
            self.test_mode = test_mode
    
            self.len = len(paths)
            self.as_tensor = T.Compose([
                T.ToPILImage(),
                T.Resize(IMAGE_SIZE),
                T.ToTensor(),
                T.Normalize([0.625, 0.448, 0.688],
                            [0.131, 0.177, 0.101]),
            ])
    
        # get data operation
        def __getitem__(self, index):
            img = cv2.imread(self.paths[index])
            if not self.test_mode:
                mask = rle_decode(self.rles[index])
                augments = self.transform(image=img, mask=mask)
                return self.as_tensor(augments['image']), augments['mask'][None]
            else:
                return self.as_tensor(img), ''
    
        def __len__(self):
            """
            Total number of samples in the dataset
            """
            return self.len
    

    loss.py

    import torch.nn as nn
    class SoftDiceLoss(nn.Module):
        def __init__(self, smooth=1., dims=(-2, -1)):
            super(SoftDiceLoss, self).__init__()
            self.smooth = smooth
            self.dims = dims
    
        def forward(self, x, y):
            tp = (x * y).sum(self.dims)
            fp = (x * (1 - y)).sum(self.dims)
            fn = ((1 - x) * y).sum(self.dims)
    
            dc = (2 * tp + self.smooth) / (2 * tp + fp + fn + self.smooth)
            dc = dc.mean()
            return 1 - dc
    def loss_fn(y_pred, y_true):
        bce_fn = nn.BCEWithLogitsLoss()
        dice_fn = SoftDiceLoss()
        bce = bce_fn(y_pred, y_true)
        dice = dice_fn(y_pred.sigmoid(), y_true)
        return 0.8*bce+ 0.2*dice
    
    

    rle.py

    import numpy as np
    def rle_encode(im):
        '''
        im: numpy array, 1 - mask, 0 - background
        Returns run length as string formated
        '''
        pixels = im.flatten(order = 'F')
        pixels = np.concatenate([[0], pixels, [0]])
        runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
        runs[1::2] -= runs[::2]
        return ' '.join(str(x) for x in runs)
    
    def rle_decode(mask_rle, shape=(512, 512)):
        '''
        mask_rle: run-length as string formated (start length)
        shape: (height,width) of array to return
        Returns numpy array, 1 - mask, 0 - background
    
        '''
        s = mask_rle.split()
        starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
        starts -= 1
        ends = starts + lengths
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        for lo, hi in zip(starts, ends):
            img[lo:hi] = 1
        return img.reshape(shape, order='F')
    

    2)提交测试结果

    1. 数据增强方法

    A.Rotate(),
    A.ShiftScaleRotate(),
    A.Cutout(),
    # A.RandomScale(),
    A.ShiftScaleRotate(),
    

    本次改进在原程序的基础上增加了以上数据增强方法,并选用了deeplabv3_resnet101进行训练,因为服务器占用只训练了这个版本,达到效果为:

    每天进步一点点
  • 相关阅读:
    漫谈递归转非递归
    (转)程序猿面试需要的知识点总结
    LeetCode:4_Median of Two Sorted Arrays | 求两个排序数组的中位数 | Hard
    LeetCode: 3_Longest Substring Without Repeating Characters | 求没有重复字符的最长子串的长度 | Medium
    LeetCode: 221_Maximal Square | 二维0-1矩阵中计算包含1的最大正方形的面积 | Medium
    LeetCode: 2_Add Two Numbers | 两个链表中的元素相加 | Medium
    算法导论第十五章 动态规划
    AVL树探秘
    算法导论第十四章 数据结构的扩张
    算法导论第十三章 红黑树
  • 原文地址:https://www.cnblogs.com/sariel-sakura/p/14421430.html
Copyright © 2011-2022 走看看