zoukankan      html  css  js  c++  java
  • net_py_add_conv5_conv6

    ./flyai train -p=1 -b=64 -e=6000

    score : 82.21
    
    cnn = Net().to(device)
    optimizer = Adam(cnn.parameters(), lr=0.0005, betas=(0.99999999, 0.999999999999))  # 选用AdamOptimizer
    loss_fn = nn.CrossEntropyLoss()  # 定义损失函数
    

    ./flyai train -p=1 -b=64 -e=5000

    score : 81.8
    

    net.py

    #   build CNN
    from torch import nn
    
    # build CNN
    
    
    class Net(nn.Module):
        # def __init__(self,num_classes=10):
        def __init__(self):
            super(Net, self).__init__()   
            self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)       
            self.relu1 = nn.ReLU(True)
            self.bn1 = nn.BatchNorm2d(32)
            self.pool1 = nn.MaxPool2d(2, 2)        
            self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
            self.relu2 = nn.ReLU(True)
            self.bn2 = nn.BatchNorm2d(64)
            self.pool2 = nn.MaxPool2d(2, 2)   
            self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
            self.relu3 = nn.ReLU(True)
            self.bn3 = nn.BatchNorm2d(128)
            self.pool3 = nn.MaxPool2d(2, 2)    
            self.conv4 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
            self.relu4 = nn.ReLU(True)
            self.bn4 = nn.BatchNorm2d(128)
            self.pool4 = nn.MaxPool2d(2, 2)  
    
    # 加深网络
    #
    #
            self.conv5 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
            """
    class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
    参数:
    
        in_channels (-) – 输入信号的通道数.(最初输入的图片样本的 channels ,取决于图片类型,比如RGB;)
        out_channels (-) – 卷积后输出结果的通道数.( 卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels)
    
        最初输入的图片样本的 channels ,取决于图片类型,比如RGB
        卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels;
        卷积核中的 in_channels ,刚刚2中已经说了,就是上一次卷积的 out_channels ,如果是第一次做卷积,就是1中样本图片的 channels 。
    
        kernel_size (-) – 卷积核的形状.
        stride (-) – 卷积每次移动的步长, 默认为1.
        padding (-) – 处理边界时填充0的数量, 默认为0(不填充).
        dilation (-) – 采样间隔数量, 默认为1, 无间隔采样.
        groups (-) – 输入与输出通道的分组数量. 当不为1时, 默认为1(全连接).(group的作用之终极版:可以通过设置group,将某层的单路卷积分为多个并行的卷积支路。)
        参数group的作用为:将输入数据按通道顺序分组, 每组有in_channel/group个通道.(例:group为2时,输入数据前一半通道为一组)
        简而言之, group参数的目的就是将原本的大卷积分成多个并联(side by side)的小卷积
        另: 在in_channel不变的情况下, 当group>1时, kernel总数不变, 而filter总数缩小group倍.
        而在filter、kernel总数不变的情况下, group增大, 需要的in_channel按同样比例增大.
        
        group的作用之终极版:可以通过设置group,将某层的单路卷积分为多个并行的卷积支路。 
    
        参数dilation的作用为: 控制卷积核元素的间隔大小.具体可搜索“空洞卷积”
        bias (-) – 为 True 时, 添加偏置.
    Examples:
    >>> # With square kernels and equal stride
    >>> m = nn.Conv2d(16, 33, 3, stride=2)
    >>> # non-square kernels and unequal stride and with padding
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
    >>> # non-square kernels and unequal stride and with padding and dilation
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
    >>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
    >>> output = m(input)
            """
            self.relu5 = nn.ReLU(True)
            self.bn5 = nn.BatchNorm2d(256)
            """
    class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True)
    参数:
    
        num_features – 预期输入的特征数,大小为 ‘batch_size x num_features x height x width’
        eps – 给分母加上的值,保证数值稳定(分母不能趋近0或取0),默认为 1e-5
        momentum – 动态均值和动态方差使用的移动动量值,默认为 0.1
        affine – 布尔值,设为 True 时,表示该层添加可学习,可改变的仿射参数,即 gamma 和 beta,默认为 True
    
    形状:
    
        输入:(N, C, H, W)
        输出:(N, C, H, W) (same shape as input)
    示例:
    
    >>> # With Learnable Parameters
    >>> m = nn.BatchNorm2d(100)
    >>> # Without Learnable Parameters
    >>> m = nn.BatchNorm2d(100, affine=False)
    >>> input = autograd.Variable(torch.randn(20, 100, 35, 45))
    >>> output = m(input)
            """
            self.pool5 = nn.MaxPool2d(2, 2)  
            self.conv6 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
            """
    Traceback (most recent call last):
      File "main.py", line 68, in <module>
        outputs = cnn(x_train)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/FlyAI_wang/intelSceneClassification_FlyAI/net.py", line 93, in forward
        output = self.conv6(output)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 301, in forward
        self.padding, self.dilation, self.groups)
    
    
    RuntimeError: Given groups=1, weight of size [256, 256, 3, 3], expected input[32, 128, 4, 4] to have 256 channels, but got 128 channels instead
    
        最初输入的图片样本的 channels ,取决于图片类型,比如RGB;
        卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels;
        卷积核中的 in_channels ,刚刚2中已经说了,就是上一次卷积的 out_channels ,如果是第一次做卷积,就是1中样本图片的 channels 。
            """
    
            self.relu6 = nn.ReLU(True)
            self.bn6 = nn.BatchNorm2d(256)
            self.pool6 = nn.MaxPool2d(2, 2)
    #   MaxPool2d
            """
    class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=F
        kernel_size – 最大池化操作时的窗口大小
        stride – 最大池化操作时窗口移动的步长, 默认值是 kernel_size
        padding – 输入的每条边隐式补0的数量
        dilation – 用于控制窗口中元素的步长的参数
        return_indices – 如果等于 True, 在返回 max pooling 结果的同时返回最大值的索引 这在之后的 Unpooling 时很有用
        ceil_mode – 如果等于 True, 在计算输出大小时,将采用向上取整来代替默认的向下取整的方式
    >>> # pool of square window of size=3, stride=2
    >>> m = nn.MaxPool2d(3, stride=2)
    >>> # pool of non-square window
    >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
    >>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
    >>> output = m(input)
    
            """
    #
    #
    # 加深网络
    
    
    #        self.fc1 = nn.Linear(128*8*8, 1024)
            self.fc1 = nn.Linear(1024, 1024)
            self.relu5 = nn.ReLU(True)
            self.fc2 = nn.Linear(1024, 6)
            """
    >>> m = nn.Linear(20, 30)
    >>> input = torch.randn(128, 20)
    >>> output = m(input)
    >>> print(output.size())
    torch.Size([128, 30])
            """
    
        def forward(self, input):
                output = self.conv1(input)
                output = self.relu1(output)
                output = self.bn1(output)
                output = self.pool1(output)
                
                output = self.conv2(output)
                output = self.relu2(output)
                output = self.bn2(output)
                output = self.pool2(output)
    
                output = self.conv3(output)
                output = self.relu3(output)
                output = self.bn3(output)
                output = self.pool3(output)
    
                output = self.conv4(output)
                output = self.relu4(output)
                output = self.bn4(output)
                output = self.pool4(output)
                # print("size after pool4 : {}"
                #       .format(output.shape))
                
    # 加深网络
    
                output = self.conv5(output)
                output = self.relu5(output)
                output = self.bn5(output)
                output = self.pool5(output)
                output = self.conv6(output)
                output = self.relu6(output)
                output = self.bn6(output)
                output = self.pool6(output)
                # print("size after pool6 is : {}"
                #       "type of output is : {}"
                #       .format(output.shape,
                #               type(output))
                #       )
    # 加深网络
    
                """
    Traceback (most recent call last):
      File "main.py", line 68, in <module>
        outputs = cnn(x_train)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/.local/share/Trash/files/intelSceneClassification_FlyAI.3/net.py", line 164, in forward
        output = output.view(-1, 128*8*8)
    RuntimeError: invalid argument 2: size '[-1 x 8192]' is invalid for input with 102400 elements at /opt/conda/conda-bld/pytorch_1535493744281/work/aten/src/TH/THStorage.cpp:80
    
                """
    #            output = output.view(-1, 128*8*8)
                output = output.view(-1, 1024)  # 返回一个有相同数据但大小不同的新的 tensor
                """
    view(*args) → Tensor
    
    返回一个有相同数据但大小不同的新的 tensor.
    
    返回的 tensor 与原 tensor 共享相同的数据, 一定有相同数目的元素, 但大小不同. 一个 tensor 必须是连续的 ( contiguous() ) 才能被查看.
    
    参数:args (torch.Size 或 int...) – 期望的大小
    
    示例:
    
    >>> x = torch.randn(4, 4)
    >>> x.size()
    torch.Size([4, 4])
    >>> y = x.view(16)
    >>> y.size()
    torch.Size([16])
    >>> z = x.view(-1, 8)  # the size -1 is inferred from other dimensions ( -1 的意思是从其他维度进行infer推断)
    >>> z.size()
    torch.Size([2, 8])
    
    view_as(tensor)
    
    将该 tensor 作为指定的 tensor 返回查看.
    
    这相当于:
    
    self.view(tensor.size())
    
    zero_()
    
    用0填充该 tensor.
    
    class torch.ByteTensor
    
    下面这些函数方法只存在于 torch.ByteTensor.
    
    all() → bool
    
    如果 tensor 里的所有元素都是非零的, 则返回 True, 否在返回 False.
    
    any() → bool
    
    如果 tensor 里的存在元素是非零的, 则返回 True, 否在返回 False.
                """
                output = self.fc1(output)
                output = self.relu5(output)
                output = self.fc2(output)
                
                return output
    

    main.py

    # -*- coding: utf-8 -*
    import argparse
    import torch
    import torch.nn as nn
    from flyai.dataset import Dataset
    from torch.optim import Adam
    
    from model import Model
    from net import Net
    from path import MODEL_PATH
    
    # 数据获取辅助类
    dataset = Dataset()
    
    # 模型操作辅助类
    model = Model(dataset)
    
    # 超参
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--EPOCHS", default=10, type=int, help="train epochs")
    parser.add_argument("-b", "--BATCH", default=1, type=int, help="batch size")
    args = parser.parse_args()
    
    # 判断gpu是否可用
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    device = torch.device(device)
    
    
    def eval(model, x_test, y_test):
        cnn.eval()
        batch_eval = model.batch_iter(x_test, y_test)
        total_acc = 0.0
        data_len = len(x_test)
        for x_batch, y_batch in batch_eval:
            batch_len = len(x_batch)
            outputs = cnn(x_batch)
            _, prediction = torch.max(outputs.data, 1)
            correct = (prediction == y_batch).sum().item()
            acc = correct / batch_len
            total_acc += acc * batch_len
        return total_acc / data_len
    
    
    cnn = Net().to(device)
    optimizer = Adam(cnn.parameters(), lr=0.001, betas=(0.99, 0.9999))  # 选用AdamOptimizer
    loss_fn = nn.CrossEntropyLoss()  # 定义损失函数
    
    # 训练并评估模型
    
    best_accuracy = 0
    for i in range(args.EPOCHS):
        cnn.train()
        x_train, y_train, x_test, y_test = dataset.next_batch(args.BATCH)  # 读取数据
    
        x_train = torch.from_numpy(x_train)
        y_train = torch.from_numpy(y_train)
        x_train = x_train.float().to(device)
        y_train = y_train.long().to(device)
    
        x_test = torch.from_numpy(x_test)
        y_test = torch.from_numpy(y_test)
        x_test = x_test.float().to(device)
        y_test = y_test.long().to(device)
    
        outputs = cnn(x_train)
        _, prediction = torch.max(outputs.data, 1)
    
        optimizer.zero_grad()
    
        loss = loss_fn(outputs, y_train)
        loss.backward()
        optimizer.step()
        # 若测试准确率高于当前最高准确率,则保存模型
        train_accuracy = eval(model, x_test, y_test)
        if train_accuracy > best_accuracy:
            best_accuracy = train_accuracy
            model.save_model(cnn, MODEL_PATH, overwrite=True)
            print("step %d, best accuracy %g" % (i, best_accuracy))
    
        print(str(i) + "/" + str(args.EPOCHS))
    
  • 相关阅读:
    嵌入式 coredump
    CentOS7 systemctrl管理的服务,open files的神坑
    Linux 服务器网络流量查看工具
    shiro源码篇
    google guava
    Docker虚拟化管理:30分钟教你学会用Docker
    Shiro结合Redis实现分布式或集群环境下的Session共享
    Springboot整合redis
    Git分支操作方法
    Redis启动和在注册到windows服务
  • 原文地址:https://www.cnblogs.com/hugeng007/p/10629760.html
Copyright © 2011-2022 走看看