zoukankan      html  css  js  c++  java
  • pytorch model

    网络定义

    import torch as torch
    import torch.nn as nn
    class LeNet(nn.Module):
        def __init__(self):
            super(LeNet,self).__init__()
            layer1 = nn.Sequential()
            layer1.add_module('conv1',nn.Conv2d(1,6,5))
            layer1.add_module('pool1',nn.MaxPool2d(2,2))
            self.layer1 = layer1
    
            layer2 = nn.Sequential()
            layer2.add_module('conv2',nn.Conv2d(6,16,5))
            layer2.add_module('pool2',nn.MaxPool2d(2,2))
            self.layer2 = layer2
    
            layer3 = nn.Sequential()
            layer3.add_module('fc1',nn.Linear(16*5*5,120))
            layer3.add_module('fc2',nn.Linear(120,84))
            layer3.add_module('fc3',nn.Linear(84,10))
            self.layer3 = layer3
    
        def forward(self, x):
            x = self.layer1(x)
            x = self.layer2(x)
            x = x.view(x.size(0),-1)#转换(降低)数据维度,进入全连接层
            x = self.layer3(x)
            return x
    
    #代入数据检验
    y = torch.randn(1,1,32,32)
    model = LeNet()
    out = model(y)
    print(model)
    print(out)
    

    输出如下:

    LeNet(
      (layer1): Sequential(
        (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
        (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (layer2): Sequential(
        (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
        (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (layer3): Sequential(
        (fc1): Linear(in_features=400, out_features=120, bias=True)
        (fc2): Linear(in_features=120, out_features=84, bias=True)
        (fc3): Linear(in_features=84, out_features=10, bias=True)
      )
    )
    tensor([[ 0.0211,  0.1407, -0.1831, -0.1182,  0.0221,  0.1467, -0.0523, -0.0663,
             -0.0351, -0.0434]], grad_fn=<AddmmBackward>)
    

    def set_bn_momentum(model, momentum=0.1):
    for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
    m.momentum = momentum

    def fix_bn(model):
    for m in model.modules():
    if isinstance(m, nn.BatchNorm2d):
    m.eval()

    model.named_children 返回名字 和 操作

    print("*"*50)
    for name, module in model.named_children():
        print(name)
        print(module)
    

    打印如下:

    layer1
    Sequential(
      (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
      (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    layer2
    Sequential(
      (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
      (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    layer3
    Sequential(
      (fc1): Linear(in_features=400, out_features=120, bias=True)
      (fc2): Linear(in_features=120, out_features=84, bias=True)
      (fc3): Linear(in_features=84, out_features=10, bias=True)
    )
    

    可以用于forward,直接对输入遍历操作

      def forward(self, x):
            for name, module in self.named_children():
                x = module(x)
    

    model.modules() 可用于参数初始化

    print("#"*200)
    cnt = 0
    for name in model.modules():
        cnt += 1
        print('-------------------------------------------------------cnt=',cnt)
        print(name)
    

    输出如下:

    ########################################################################################################################################################################################################
    -------------------------------------------------------cnt= 1
    LeNet(
      (layer1): Sequential(
        (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
        (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (layer2): Sequential(
        (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
        (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
      (layer3): Sequential(
        (fc1): Linear(in_features=400, out_features=120, bias=True)
        (fc2): Linear(in_features=120, out_features=84, bias=True)
        (fc3): Linear(in_features=84, out_features=10, bias=True)
      )
    )
    -------------------------------------------------------cnt= 2
    Sequential(
      (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
      (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    -------------------------------------------------------cnt= 3
    Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    -------------------------------------------------------cnt= 4
    MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    -------------------------------------------------------cnt= 5
    Sequential(
      (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
      (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    -------------------------------------------------------cnt= 6
    Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    -------------------------------------------------------cnt= 7
    MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    -------------------------------------------------------cnt= 8
    Sequential(
      (fc1): Linear(in_features=400, out_features=120, bias=True)
      (fc2): Linear(in_features=120, out_features=84, bias=True)
      (fc3): Linear(in_features=84, out_features=10, bias=True)
    )
    -------------------------------------------------------cnt= 9
    Linear(in_features=400, out_features=120, bias=True)
    -------------------------------------------------------cnt= 10
    Linear(in_features=120, out_features=84, bias=True)
    -------------------------------------------------------cnt= 11
    Linear(in_features=84, out_features=10, bias=True)
    

    model.modules()用于参数初始化

    cnt = 0
    for name in model.modules():
        cnt += 1
        print('-------------------------------------------------------cnt=',cnt)
        print(name)
        if isinstance(name, nn.Conv2d):
            print('------------------isinstance(name, nn.Conv2d)------------------')
            print(name.weight)
            print(name.bias)
            print('--end----------------isinstance(name, nn.Conv2d)------------end------')
    
        if isinstance(name, nn.Conv2d):
            nn.init.kaiming_normal_(name.weight)
        elif isinstance(name, (nn.BatchNorm2d, nn.GroupNorm)):
            nn.init.constant_(name.weight, 1)
            nn.init.constant_(name.bias, 0)
    

    其中参数部分输出如下:

    ------------------isinstance(name, nn.Conv2d)------------------
    Parameter containing:
    tensor([[[[-0.1561, -0.0194, -0.0260, -0.0042,  0.1716],
              [ 0.1181, -0.1380, -0.0448,  0.0674, -0.1972],
              [-0.0197,  0.0359,  0.1186,  0.0876, -0.0395],
              [-0.0619,  0.0095, -0.0702,  0.0122,  0.1573],
              [ 0.1170,  0.1758, -0.1655,  0.1489, -0.0956]]],
           ...
      [[[-0.1337, -0.0562, -0.0624,  0.0885, -0.0640],
              [-0.0302, -0.1192, -0.0637,  0.0083,  0.0181],
              [ 0.1388, -0.1690,  0.1132,  0.1686, -0.1189],
              [-0.0246, -0.1649, -0.1817, -0.0330, -0.0430],
              [ 0.0672, -0.0671,  0.0469,  0.1284,  0.1420]]]], requires_grad=True)
    Parameter containing:
    tensor([ 0.0548,  0.0547,  0.1328, -0.0452,  0.1668, -0.1915],
           requires_grad=True)
    --end----------------isinstance(name, nn.Conv2d)------------end------
    

    model.modules()用于设置bn参数和冻结bn

    def set_bn_momentum(model, momentum=0.1):
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.momentum = momentum
    
    def fix_bn(model):
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()
    

    其他的可以参考:

    https://blog.csdn.net/MrR1ght/article/details/105246412
    model.children(): 返回模型的所有子模块的迭代器
    model.modules():返回模型的所有模块(不仅仅是子模块,还包含当前模块)
    model.named_children():返回当前子模块的迭代器。名字:模块
    model.named_modules():

    model.parameters() || torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)[source]

    参数:

    params (iterable) – 待优化参数的iterable或者是定义了参数组的dict
    lr (float) – 学习率
    momentum (float, 可选) – 动量因子(默认:0)
    weight_decay (float, 可选) – 权重衰减(L2惩罚)(默认:0)
    dampening (float, 可选) – 动量的抑制因子(默认:0)
    nesterov (bool, 可选) – 使用Nesterov动量(默认:False)
    

    例子:

    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
    optimizer.zero_grad()
    loss_fn(model(input), target).backward()
    optimizer.step()

    这里对model.parameters()比较好奇
    于是我打印:

    print(model.parameters())
    

    打印出这玩意:
    <generator object Module.parameters at 0x7f1d2272d728>
    感觉是一个指针,于是我在这样打印:

    print(*model.parameters())
    

    这回输出一大串数字:部分如下:

    Parameter containing:
    tensor([[[[-0.1751,  0.1829,  0.1973,  0.0780,  0.1220],
              [-0.0497,  0.0943,  0.0827,  0.1829,  0.0239],
              [-0.1044,  0.1268,  0.0716, -0.0100,  0.1991],
              [-0.0730,  0.1762, -0.0787,  0.0686, -0.0069],
              [ 0.1316,  0.0897, -0.1068,  0.0744,  0.0524]]],
    
            [[[-0.1034, -0.1946, -0.1312,  0.1076,  0.0129],
              [ 0.0450,  0.0552,  0.1448, -0.1283, -0.1868],
              [-0.0260, -0.1928,  0.0519, -0.0493, -0.1028],
              [-0.0936,  0.1719, -0.0997,  0.0008,  0.0871],
              [ 0.0995, -0.1274,  0.0388,  0.0779,  0.0006]]],
    
            [[[ 0.1846, -0.0723,  0.0649, -0.0169, -0.1595],
              [ 0.0145, -0.1893,  0.0784, -0.0886, -0.0044],
              [ 0.1914, -0.1009, -0.0736, -0.0992, -0.1618],
              [-0.0291,  0.0997,  0.0549,  0.1267, -0.1661],
              [-0.1333,  0.0168,  0.0648,  0.1047, -0.1506]]],
                ...
         -4.0503e-03,  9.4014e-02, -8.5686e-02,  7.7082e-02]],
           requires_grad=True) Parameter containing:
    tensor([-0.0106,  0.0448, -0.0001, -0.0914, -0.0310, -0.0628,  0.0899, -0.0047,
            -0.0390, -0.0291], requires_grad=True)
    

    自定义参数

    optimizer = torch.optim.SGD(params=[
            {'params': model.backbone.parameters(), 'lr': 0.1*opts.lr},
            {'params': model.classifier.parameters(), 'lr': opts.lr},
        ], lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
    
    

    还看到另外的写法:

        def get_1x_lr_params(self):
            modules = [self.backbone]
            for i in range(len(modules)):
                for m in modules[i].named_modules():
                    if self.freeze_bn:
                        if isinstance(m[1], nn.Conv2d):
                            for p in m[1].parameters():
                                if p.requires_grad:
                                    yield p
                    else:
                        if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) 
                                or isinstance(m[1], nn.BatchNorm2d):
                            for p in m[1].parameters():
                                if p.requires_grad:
                                    yield p
    
        def get_10x_lr_params(self):
            modules = [self.aspp, self.decoder]
            for i in range(len(modules)):
                for m in modules[i].named_modules():
                    if self.freeze_bn:
                        if isinstance(m[1], nn.Conv2d):
                            for p in m[1].parameters():
                                if p.requires_grad:
                                    yield p
                    else:
                        if isinstance(m[1], nn.Conv2d) or isinstance(m[1], SynchronizedBatchNorm2d) 
                                or isinstance(m[1], nn.BatchNorm2d):
                            for p in m[1].parameters():
                                if p.requires_grad:
                                    yield p
    
    train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr},
                            {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}]
    
    # Define Optimizer
    optimizer = torch.optim.SGD(train_params, momentum=args.momentum,
                           weight_decay=args.weight_decay, nesterov=args.nesterov)
    

    打印网络总参数量

     params = list(model.parameters())
        k = 0
        for i in params:
            l = 1
            print("该层的结构:" + str(list(i.size())))
            for j in i.size():
                l *= j
            print("该层参数和:" + str(l))
            k = k + l
        print("总参数数量和:" + str(k))
    

    打印如下:

    该层参数和:256
    该层的结构:[256, 2048, 3, 3]
    该层参数和:4718592
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256, 2048, 1, 1]
    该层参数和:524288
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256, 1280, 1, 1]
    该层参数和:327680
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256, 304, 3, 3]
    该层参数和:700416
    该层的结构:[256]
    该层参数和:256
    该层的结构:[256]
    该层参数和:256
    该层的结构:[26, 256, 1, 1]
    该层参数和:6656
    该层的结构:[26]
    该层参数和:26
    总参数数量和:58755258
    

    net.parameters() net.named_parameters() 显示网络参数

    for parameters in net.parameters():
        print(parameters)
    

    输出如下:

    Parameter containing:
    tensor([[[[-0.0104, -0.0555,  0.1417],
              [-0.3281, -0.0367,  0.0208],
              [-0.0894, -0.0511, -0.1253]]],
    
    
            [[[-0.1724,  0.2141, -0.0895],
              [ 0.0116,  0.1661, -0.1853],
              [-0.1190,  0.1292, -0.2451]]],
    

    2

    for name,parameters in net.named_parameters():
        print(name,':',parameters.size())
    

    输出如下:

    module.backbone.conv1.weight : torch.Size([64, 3, 7, 7])
    module.backbone.bn1.weight : torch.Size([64])
    module.backbone.bn1.bias : torch.Size([64])
    module.backbone.layer1.0.conv1.weight : torch.Size([64, 64, 1, 1])
    module.backbone.layer1.0.bn1.weight : torch.Size([64])
    module.backbone.layer1.0.bn1.bias : torch.Size([64])
    module.backbone.layer1.0.conv2.weight : torch.Size([64, 64, 3, 3])
    module.backbone.layer1.0.bn2.weight : torch.Size([64])
    module.backbone.layer1.0.bn2.bias : torch.Size([64])
    module.backbone.layer1.0.conv3.weight : torch.Size([256, 64, 1, 1])
    module.backbone.layer1.0.bn3.weight : torch.Size([256])
    module.backbone.layer1.0.bn3.bias : torch.Size([256])
    module.backbone.layer1.0.downsample.0.weight : torch.Size([256, 64, 1, 1])
    module.backbone.layer1.0.downsample.1.weight : torch.Size([256])
    module.backbone.layer1.0.downsample.1.bias : torch.Size([256])
    module.backbone.layer1.1.conv1.weight : torch.Size([64, 256, 1, 1])
    module.backbone.layer1.1.bn1.weight : torch.Size([64])
    module.backbone.layer1.1.bn1.bias : torch.Size([64])
    module.backbone.layer1.1.conv2.weight : torch.Size([64, 64, 3, 3])
    module.backbone.layer1.1.bn2.weight : torch.Size([64])
    module.backbone.layer1.1.bn2.bias : torch.Size([64])
    module.backbone.layer1.1.conv3.weight : torch.Size([256, 64, 1, 1])
    module.backbone.layer1.1.bn3.weight : torch.Size([256])
    module.backbone.layer1.1.bn3.bias : torch.Size([256])
    module.backbone.layer1.2.conv1.weight : torch.Size([64, 256, 1, 1])
    module.backbone.layer1.2.bn1.weight : torch.Size([64])
    module.backbone.layer1.2.bn1.bias : torch.Size([64])
    module.backbone.layer1.2.conv2.weight : torch.Size([64, 64, 3, 3])
    module.backbone.layer1.2.bn2.weight : torch.Size([64])
    module.backbone.layer1.2.bn2.bias : torch.Size([64])
    module.backbone.layer1.2.conv3.weight : torch.Size([256, 64, 1, 1])
    module.backbone.layer1.2.bn3.weight : torch.Size([256])
    module.backbone.layer1.2.bn3.bias : torch.Size([256])
    module.backbone.layer2.0.conv1.weight : torch.Size([128, 256, 1, 1])
    module.backbone.layer2.0.bn1.weight : torch.Size([128])
    module.backbone.layer2.0.bn1.bias : torch.Size([128])
    module.backbone.layer2.0.conv2.weight : torch.Size([128, 128, 3, 3])
    module.backbone.layer2.0.bn2.weight : torch.Size([128])
    module.backbone.layer2.0.bn2.bias : torch.Size([128])
    module.backbone.layer2.0.conv3.weight : torch.Size([512, 128, 1, 1])
    module.backbone.layer2.0.bn3.weight : torch.Size([512])
    module.backbone.layer2.0.bn3.bias : torch.Size([512])
    

    checkpoint = torch.load(model_path,map_location='cpu') 不加cpu会导致显存2倍!

    # checkpoint = torch.load(model_path)
    checkpoint = torch.load(model_path,map_location='cpu')
    model.load_state_dict(checkpoint['state_dict'],strict=False)
    

    改类别还需要继续finetune微调模型,一般只是最后一层由于类别数量对不上,那么就不加载和类别数有关的层就可以:

    例子1

    model = DeepLabV2_ResNet101_MSC(n_classes=CONFIG.DATASET.N_CLASSES)
    state_dict = torch.load(path_model)
    
    import collections
    new_state_dict = collections.OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('base.','')
        if 'aspp' in name:
            name = name + '_2'
        new_state_dict[name] = v
    
    
    print("    Init:", CONFIG.MODEL.INIT_MODEL)
    for m in model.base.state_dict().keys():
        if m not in new_state_dict.keys():
            print("    Skip init:", m)
    model.base.load_state_dict(new_state_dict, strict=False)
    

    例子2

    
    
    pretrained_model = torch.load(os.path.join(model_dir, '{}.pth'.format(pth)))
    # net.load_state_dict(pretrained_model['net'], strict=strict)
    
    print("#######################################################################################################")
    for name, parameters in net.named_parameters():
        print(name, ':', parameters.size())
    
    d = OrderedDict()
    for key, value in pretrained_model['net'].items():
        tmp = key[11:] ## del "module.net."
        d[tmp] = value
    
    net.load_state_dict(d, strict=strict)
    print("#######################################################################################################")
    
  • 相关阅读:
    【一些思路】web和app测试的区别
    【Python】I/O和比赛的其他一些问题
    【Python】迭代器和生成器的个人理解,再讲一讲协程
    【TCP/IP】如果打不开一个网页,需要如何处理?
    DOM事件
    GASP动画的基本使用
    Velocity的使用方法
    Swiper和Swiper Animate使用方法
    DOM操作
    JavaScript函数
  • 原文地址:https://www.cnblogs.com/yanghailin/p/13535343.html
Copyright © 2011-2022 走看看