zoukankan      html  css  js  c++  java
  • net_py_add_conv5_conv6

    ./flyai train -p=1 -b=64 -e=6000

    score : 82.21
    
    cnn = Net().to(device)
    optimizer = Adam(cnn.parameters(), lr=0.0005, betas=(0.99999999, 0.999999999999))  # 选用AdamOptimizer
    loss_fn = nn.CrossEntropyLoss()  # 定义损失函数
    

    ./flyai train -p=1 -b=64 -e=5000

    score : 81.8
    

    net.py

    #   build CNN
    from torch import nn
    
    # build CNN
    
    
    class Net(nn.Module):
        # def __init__(self,num_classes=10):
        def __init__(self):
            super(Net, self).__init__()   
            self.conv1 = nn.Conv2d(3, 32, 5, stride=1, padding=2)       
            self.relu1 = nn.ReLU(True)
            self.bn1 = nn.BatchNorm2d(32)
            self.pool1 = nn.MaxPool2d(2, 2)        
            self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
            self.relu2 = nn.ReLU(True)
            self.bn2 = nn.BatchNorm2d(64)
            self.pool2 = nn.MaxPool2d(2, 2)   
            self.conv3 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
            self.relu3 = nn.ReLU(True)
            self.bn3 = nn.BatchNorm2d(128)
            self.pool3 = nn.MaxPool2d(2, 2)    
            self.conv4 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
            self.relu4 = nn.ReLU(True)
            self.bn4 = nn.BatchNorm2d(128)
            self.pool4 = nn.MaxPool2d(2, 2)  
    
    # 加深网络
    #
    #
            self.conv5 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
            """
    class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
    参数:
    
        in_channels (-) – 输入信号的通道数.(最初输入的图片样本的 channels ,取决于图片类型,比如RGB;)
        out_channels (-) – 卷积后输出结果的通道数.( 卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels)
    
        最初输入的图片样本的 channels ,取决于图片类型,比如RGB
        卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels;
        卷积核中的 in_channels ,刚刚2中已经说了,就是上一次卷积的 out_channels ,如果是第一次做卷积,就是1中样本图片的 channels 。
    
        kernel_size (-) – 卷积核的形状.
        stride (-) – 卷积每次移动的步长, 默认为1.
        padding (-) – 处理边界时填充0的数量, 默认为0(不填充).
        dilation (-) – 采样间隔数量, 默认为1, 无间隔采样.
        groups (-) – 输入与输出通道的分组数量. 当不为1时, 默认为1(全连接).(group的作用之终极版:可以通过设置group,将某层的单路卷积分为多个并行的卷积支路。)
        参数group的作用为:将输入数据按通道顺序分组, 每组有in_channel/group个通道.(例:group为2时,输入数据前一半通道为一组)
        简而言之, group参数的目的就是将原本的大卷积分成多个并联(side by side)的小卷积
        另: 在in_channel不变的情况下, 当group>1时, kernel总数不变, 而filter总数缩小group倍.
        而在filter、kernel总数不变的情况下, group增大, 需要的in_channel按同样比例增大.
        
        group的作用之终极版:可以通过设置group,将某层的单路卷积分为多个并行的卷积支路。 
    
        参数dilation的作用为: 控制卷积核元素的间隔大小.具体可搜索“空洞卷积”
        bias (-) – 为 True 时, 添加偏置.
    Examples:
    >>> # With square kernels and equal stride
    >>> m = nn.Conv2d(16, 33, 3, stride=2)
    >>> # non-square kernels and unequal stride and with padding
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
    >>> # non-square kernels and unequal stride and with padding and dilation
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
    >>> input = autograd.Variable(torch.randn(20, 16, 50, 100))
    >>> output = m(input)
            """
            self.relu5 = nn.ReLU(True)
            self.bn5 = nn.BatchNorm2d(256)
            """
    class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True)
    参数:
    
        num_features – 预期输入的特征数,大小为 ‘batch_size x num_features x height x width’
        eps – 给分母加上的值,保证数值稳定(分母不能趋近0或取0),默认为 1e-5
        momentum – 动态均值和动态方差使用的移动动量值,默认为 0.1
        affine – 布尔值,设为 True 时,表示该层添加可学习,可改变的仿射参数,即 gamma 和 beta,默认为 True
    
    形状:
    
        输入:(N, C, H, W)
        输出:(N, C, H, W) (same shape as input)
    示例:
    
    >>> # With Learnable Parameters
    >>> m = nn.BatchNorm2d(100)
    >>> # Without Learnable Parameters
    >>> m = nn.BatchNorm2d(100, affine=False)
    >>> input = autograd.Variable(torch.randn(20, 100, 35, 45))
    >>> output = m(input)
            """
            self.pool5 = nn.MaxPool2d(2, 2)  
            self.conv6 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
            """
    Traceback (most recent call last):
      File "main.py", line 68, in <module>
        outputs = cnn(x_train)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/FlyAI_wang/intelSceneClassification_FlyAI/net.py", line 93, in forward
        output = self.conv6(output)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 301, in forward
        self.padding, self.dilation, self.groups)
    
    
    RuntimeError: Given groups=1, weight of size [256, 256, 3, 3], expected input[32, 128, 4, 4] to have 256 channels, but got 128 channels instead
    
        最初输入的图片样本的 channels ,取决于图片类型,比如RGB;
        卷积操作完成后输出的 out_channels ,取决于卷积核的数量。此时的 out_channels 也会作为下一次卷积时的卷积核的 in_channels;
        卷积核中的 in_channels ,刚刚2中已经说了,就是上一次卷积的 out_channels ,如果是第一次做卷积,就是1中样本图片的 channels 。
            """
    
            self.relu6 = nn.ReLU(True)
            self.bn6 = nn.BatchNorm2d(256)
            self.pool6 = nn.MaxPool2d(2, 2)
    #   MaxPool2d
            """
    class torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=F
        kernel_size – 最大池化操作时的窗口大小
        stride – 最大池化操作时窗口移动的步长, 默认值是 kernel_size
        padding – 输入的每条边隐式补0的数量
        dilation – 用于控制窗口中元素的步长的参数
        return_indices – 如果等于 True, 在返回 max pooling 结果的同时返回最大值的索引 这在之后的 Unpooling 时很有用
        ceil_mode – 如果等于 True, 在计算输出大小时,将采用向上取整来代替默认的向下取整的方式
    >>> # pool of square window of size=3, stride=2
    >>> m = nn.MaxPool2d(3, stride=2)
    >>> # pool of non-square window
    >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
    >>> input = autograd.Variable(torch.randn(20, 16, 50, 32))
    >>> output = m(input)
    
            """
    #
    #
    # 加深网络
    
    
    #        self.fc1 = nn.Linear(128*8*8, 1024)
            self.fc1 = nn.Linear(1024, 1024)
            self.relu5 = nn.ReLU(True)
            self.fc2 = nn.Linear(1024, 6)
            """
    >>> m = nn.Linear(20, 30)
    >>> input = torch.randn(128, 20)
    >>> output = m(input)
    >>> print(output.size())
    torch.Size([128, 30])
            """
    
        def forward(self, input):
                output = self.conv1(input)
                output = self.relu1(output)
                output = self.bn1(output)
                output = self.pool1(output)
                
                output = self.conv2(output)
                output = self.relu2(output)
                output = self.bn2(output)
                output = self.pool2(output)
    
                output = self.conv3(output)
                output = self.relu3(output)
                output = self.bn3(output)
                output = self.pool3(output)
    
                output = self.conv4(output)
                output = self.relu4(output)
                output = self.bn4(output)
                output = self.pool4(output)
                # print("size after pool4 : {}"
                #       .format(output.shape))
                
    # 加深网络
    
                output = self.conv5(output)
                output = self.relu5(output)
                output = self.bn5(output)
                output = self.pool5(output)
                output = self.conv6(output)
                output = self.relu6(output)
                output = self.bn6(output)
                output = self.pool6(output)
                # print("size after pool6 is : {}"
                #       "type of output is : {}"
                #       .format(output.shape,
                #               type(output))
                #       )
    # 加深网络
    
                """
    Traceback (most recent call last):
      File "main.py", line 68, in <module>
        outputs = cnn(x_train)
      File "/home/hugeng/.conda/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 477, in __call__
        result = self.forward(*input, **kwargs)
      File "/home/hugeng/.local/share/Trash/files/intelSceneClassification_FlyAI.3/net.py", line 164, in forward
        output = output.view(-1, 128*8*8)
    RuntimeError: invalid argument 2: size '[-1 x 8192]' is invalid for input with 102400 elements at /opt/conda/conda-bld/pytorch_1535493744281/work/aten/src/TH/THStorage.cpp:80
    
                """
    #            output = output.view(-1, 128*8*8)
                output = output.view(-1, 1024)  # 返回一个有相同数据但大小不同的新的 tensor
                """
    view(*args) → Tensor
    
    返回一个有相同数据但大小不同的新的 tensor.
    
    返回的 tensor 与原 tensor 共享相同的数据, 一定有相同数目的元素, 但大小不同. 一个 tensor 必须是连续的 ( contiguous() ) 才能被查看.
    
    参数:args (torch.Size 或 int...) – 期望的大小
    
    示例:
    
    >>> x = torch.randn(4, 4)
    >>> x.size()
    torch.Size([4, 4])
    >>> y = x.view(16)
    >>> y.size()
    torch.Size([16])
    >>> z = x.view(-1, 8)  # the size -1 is inferred from other dimensions ( -1 的意思是从其他维度进行infer推断)
    >>> z.size()
    torch.Size([2, 8])
    
    view_as(tensor)
    
    将该 tensor 作为指定的 tensor 返回查看.
    
    这相当于:
    
    self.view(tensor.size())
    
    zero_()
    
    用0填充该 tensor.
    
    class torch.ByteTensor
    
    下面这些函数方法只存在于 torch.ByteTensor.
    
    all() → bool
    
    如果 tensor 里的所有元素都是非零的, 则返回 True, 否在返回 False.
    
    any() → bool
    
    如果 tensor 里的存在元素是非零的, 则返回 True, 否在返回 False.
                """
                output = self.fc1(output)
                output = self.relu5(output)
                output = self.fc2(output)
                
                return output
    

    main.py

    # -*- coding: utf-8 -*
    import argparse
    import torch
    import torch.nn as nn
    from flyai.dataset import Dataset
    from torch.optim import Adam
    
    from model import Model
    from net import Net
    from path import MODEL_PATH
    
    # 数据获取辅助类
    dataset = Dataset()
    
    # 模型操作辅助类
    model = Model(dataset)
    
    # 超参
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--EPOCHS", default=10, type=int, help="train epochs")
    parser.add_argument("-b", "--BATCH", default=1, type=int, help="batch size")
    args = parser.parse_args()
    
    # 判断gpu是否可用
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    device = torch.device(device)
    
    
    def eval(model, x_test, y_test):
        cnn.eval()
        batch_eval = model.batch_iter(x_test, y_test)
        total_acc = 0.0
        data_len = len(x_test)
        for x_batch, y_batch in batch_eval:
            batch_len = len(x_batch)
            outputs = cnn(x_batch)
            _, prediction = torch.max(outputs.data, 1)
            correct = (prediction == y_batch).sum().item()
            acc = correct / batch_len
            total_acc += acc * batch_len
        return total_acc / data_len
    
    
    cnn = Net().to(device)
    optimizer = Adam(cnn.parameters(), lr=0.001, betas=(0.99, 0.9999))  # 选用AdamOptimizer
    loss_fn = nn.CrossEntropyLoss()  # 定义损失函数
    
    # 训练并评估模型
    
    best_accuracy = 0
    for i in range(args.EPOCHS):
        cnn.train()
        x_train, y_train, x_test, y_test = dataset.next_batch(args.BATCH)  # 读取数据
    
        x_train = torch.from_numpy(x_train)
        y_train = torch.from_numpy(y_train)
        x_train = x_train.float().to(device)
        y_train = y_train.long().to(device)
    
        x_test = torch.from_numpy(x_test)
        y_test = torch.from_numpy(y_test)
        x_test = x_test.float().to(device)
        y_test = y_test.long().to(device)
    
        outputs = cnn(x_train)
        _, prediction = torch.max(outputs.data, 1)
    
        optimizer.zero_grad()
    
        loss = loss_fn(outputs, y_train)
        loss.backward()
        optimizer.step()
        # 若测试准确率高于当前最高准确率,则保存模型
        train_accuracy = eval(model, x_test, y_test)
        if train_accuracy > best_accuracy:
            best_accuracy = train_accuracy
            model.save_model(cnn, MODEL_PATH, overwrite=True)
            print("step %d, best accuracy %g" % (i, best_accuracy))
    
        print(str(i) + "/" + str(args.EPOCHS))
    
  • 相关阅读:
    sqlplus时报Linux-x86_64 Error: 13: Permission denied
    thrift之TTransport层的缓存传输类TBufferedTransport和缓冲基类TBufferBase
    Java实现 蓝桥杯 算法提高 新建Microsoft world文档
    Java实现 蓝桥杯 算法提高 新建Microsoft world文档
    Java实现 蓝桥杯 算法提高 快乐司机
    Java实现 蓝桥杯 算法提高 快乐司机
    Java实现 蓝桥杯 算法提高 队列操作
    Java实现 蓝桥杯 算法提高 队列操作
    Java实现 蓝桥杯 算法提高 文本加密
    Java实现 蓝桥杯 算法提高 合并石子
  • 原文地址:https://www.cnblogs.com/hugeng007/p/10629760.html
Copyright © 2011-2022 走看看