zoukankan      html  css  js  c++  java
  • pytorch学习笔记

    基本数据类型

    5月16号更新

    ---------------------------------------------

    • 保存模型
            print("Saving state, iter:", str(epoch))
            torch.save(model.state_dict(), f'logs/Epoch{epoch}-acc{acc}.pth')
    
    • 加载模型 / 用于预训练
          model = ResNet18().to(device)
        # ----------------------------#
        model_path = r"logs/Epoch2-acc0.6816.pth"
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(model_path, map_location=device)
        model.load_state_dict(state_dict, strict=True)
    
    • 使用部分预训练权重(5月21号更新)
          # -----使用部分预训练权重------------------#
        model_path = r"logs/Epoch2-acc0.6831.pth"
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model_dict = model.state_dict()
        pretrained_dict = torch.load(model_path, map_location=device)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
        # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}  # 上面不能用时用下面这个
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
        print('Finished!')
        # ---------------------------------------#
    

    ---------------------------------------------

    Data type

    type check

    a = torch.randn(2, 3)    # 随机生成2行3列的矩阵
    print(a.shape)
    print(a.size(1)) # 返回shape的第2个元素
    print(a.shape[1])    # 3
    
    # cpu上
    print(a.type())    # torch.FloatTensor
    print(type(a))
    print(isinstance(a, torch.FloatTensor))
    
    # Gpu上
    data = a.cuda()
    print(isinstance(data, torch.cuda.FloatTensor))
    
    """
    在pytorch0.3的版本中dimention为0的tensor是不存在的,如果表达是标量返回[0.3]
    在之后的版本中,标量返回0.3 (为了语义更加清晰,使用长度为0的标量)
    
    区分dim/size/shape/tensor
    [2, 2]
    dim: 2  rank
    size/shape: [2, 2]  
    tensor: 具体数字 [1, 3 ]
                     [2, 4]
    """
    

    Dimension 0/1/2

    # Dim=0,用于loss
    a = torch.tensor(2.2)
    print(a.shape)    # torch.Size([])
    print(len(a.shape))    # 0
    print(a.size())    # torch.Size([])
    
    # Dim=1,用于Bias/Linear input
    b = torch.tensor([2])    # 直接这样写,里面的数据类型跟着里面数据变化
    print(b)
    print(b.type())
    c = torch.tensor([1.1, 2.2])
    print(c)
    print(c.type())
    d = torch.FloatTensor(2)
    print(d)
    e = torch.IntTensor([2.2])
    print(e)
    
    data = np.ones(3)
    print(data)
    f = torch.from_numpy(data)    # 将numpy转换成tensor
    print(f)
    
    # Dim=2,Linear input/batch
    g = torch.randn(2, 3)    # 随机正太分布
    print(g)
    print(g.shape)
    print(g.size())
    print(g.size(0))
    print(g.size(1))
    print(g.shape[1])
    
    
    # Dim=3 RNN input/Batch
    h = torch.rand(3, 2, 3)    # 随机均匀分布
    print(h)
    print(h.shape)
    print(h[0])
    print(h[1])
    print(list(h.shape))
    
    
    # Dim=4 CNN:[b,c,h,w]
    # 下面解释为2张照片,每张照片通道数为3,长宽为28×28
    i = torch.rand(2, 3, 28, 28)    # 照片数 通道数(彩色图片为3) 图片长 图片宽
    print(i)
    

    创建Tensor

    import from numpy

    import torch
    import numpy as np
    
    # 从numpy中导入
    a = np.array([2, 3.3])
    data = torch.from_numpy(a)
    print(data)
    b = np.ones([3, 4])
    dd = torch.from_numpy(b)
    print(dd)
    

    import from list

    # 从list中导入
    # 大写的Tensor():与FloatTensor类似,接受shape作为参数,小写的接受现有的数据
    c = torch.tensor([2., 3.2])
    d = torch.FloatTensor([2., 3.2])    # 也可接受现有数据,但是数据必须用一个list来表示。如果接受shape:(2, 3)
    e = torch.tensor([[2., 3.2], [1., 22.3]])
    print(c)
    print(d)
    print(e)
    

    uninitialized

    # 生成未初始化数据:只是作为一个容器,后面会把数据写进来
    # torch.empty() : 给shape
    # torch.FloatTensor(d1, d2, d3)
    # torch.IntTensor(d1, d2, d3)
    
    f = torch.empty(2, 3)
    print(f)
    print(torch.Tensor(2, 3))    # 数据大小相差大,记得覆盖否则可能出现torch.not number或torch.infinity
    print(torch.IntTensor(2, 3))
    print(torch.FloatTensor(2, 3))
    

    set default type

    # set default type: torch中默认的类型是torch.FloatTensor
    print(torch.tensor([1.2, 3]).type())
    torch.set_default_tensor_type(torch.DoubleTensor)
    print(torch.tensor([1.2, 3]).type())
    

    rand/rand_like, randint

    # rand/rand_like, randint
    # rand : [0, 1]    均匀分布
    # rand_like: [min, max)    最大值不包含在里面
    # randint *_like
    
    print(torch.rand(3, 3))    # 比较均匀的采样出来
    a = torch.rand(3, 3)
    print(torch.rand_like(a))    # rand_like接受的参数是一个tensor,相当于把a.shape读出来再送给rand函数
    
    print(torch.randint(1, 10, [3, 3]))
    

    randn

    # randn: 正态分布
    # N(0, 1) 用在bias比较多
    # N(u, std)
    print(torch.randn(3, 3))
    #                   full函数生成长度为10都为0的list    反差从1到0慢慢减小
    print(torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1)))
    

    full

    # full
    print(torch.full([2, 3], 7))
    
    print(torch.full([], 7))    # dim=0
    
    print(torch.full([1], 7))    # dim=1
    

    arange/range

    print(torch.arange(0, 10))    # 不包括10
    print(torch.arange(0, 10, 2))
    

    linspace/logspace

    print(torch.linspace(0, 10, steps=4))    # 等分的切,包括10
    print(torch.logspace(0, 1, steps=10))    # 切10等分,每个取指数0**10~1**10
    

    Ones/zeros/eye

    # ones:生成全是0的,直接给出shape
    # zeros:生成全是1的
    # eye: 生成对角线全是1的,只接受1个参数或2个参数
    print(torch.ones(3, 3))
    print(torch.zeros(3, 3))
    print(torch.eye(3, 4))
    data = torch.ones(3, 3)
    print(torch.ones_like(data))
    

    randperm:随机打散

    # randperm:随机打散
    print(torch.randperm(10))
    
    a = torch.rand(2, 3)
    b = torch.rand(2, 2)
    idx = torch.randperm(2)
    print(idx)
    print(a)
    print(b)
    print(a[idx])    # 达到协同shuffle的功能
    print(b[idx])
    

    索引与切片

    indexing

    a = torch.rand(4, 3, 28, 28)
    print(a[0])
    print(a[0].shape)    # torch.Size([3, 28, 28]) :索引第一个维度 :取第0张图片
    
    print(a[0, 0].shape)    # torch.Size([28, 28]):第二个维度:第0张图片的第0个通道
    
    print(a[0, 0, 2])
    print(a[0, 0, 2, 4])    # tensor(0.9441) : 第0张图片第0个通道第二行第4列
    

    select first/last N

    # select first/last N
    a = torch.rand(4, 3, 28, 28)
    print(a.shape)    # torch.Size([4, 3, 28, 28])
    print(a[:2].shape)    # torch.Size([2, 3, 28, 28])
    print(a[:2, :1, :, :].shape)    # torch.Size([2, 1, 28, 28])
    print(a[:2, 1:, :, :].shape)    # torch.Size([2, 2, 28, 28])
    print(a[:2, -1:, :, :].shape)    # torch.Size([2, 1, 28, 28])
    

    select by steps

    # select by steps
    a = torch.rand(4, 3, 28, 28)
    print(a[:, :, 0:28:2, 0:28:2].shape)    # torch.Size([4, 3, 14, 14])
    
    print(a[:, :, ::2, ::2].shape)    # torch.Size([4, 3, 14, 14])
    

    select by specific index

    # select by specific index
    a = torch.rand(4, 3, 28, 28)
    print(a)
    print(a.index_select(0, torch.tensor([0, 2])).shape)    # 第1个参数的第0和第1个
    print(a.index_select(2, torch.arange(20)).shape)
    

    ... 任意多的维度

    # ... 任意多的维度
    a = torch.rand(4, 3, 28, 28)
    print(a[...].shape)    # torch.Size([4, 3, 28, 28])
    print(a[:, 1, ...].shape)    # torch.Size([4, 28, 28])
    print(a[..., :2].shape)    # torch.Size([4, 3, 28, 2])
    

    select by mask

    # select by mask
    x = torch.randn(3, 4)
    y = torch.randn(3, 4)
    print(x)
    mask = x.ge(0.5)    # >=0.5的位置为True
    print(mask)
    print(torch.masked_select(y, mask))    # 为True的位置选出来
    

    select by flatten index

    # select by flatten index
    src = torch.IntTensor(3, 4)
    print(src)
    print(torch.take(src, torch.tensor([0, 2, 5])))
    

    Tensor维度变换

    1. view # 将一个shape转换成例一个shape
    2. squeeze(减少维度)/unsqueeze(增加维度)
    3. transpose(单维变换)/t(转置)/repeat(多维变换)
    4. expand(改变理解方式)/repeat(实实在在增加数据 memory copied)

    view: lost dim information

    # view: lost dim information
    a = torch.rand(4, 1, 28, 28)
    print(a)
    print(a.shape)
    print(a.view(4, 28 * 28).shape)
    print(a.view(4 * 28, 28).shape)
    print(a.view(4*1, 28, 28).shape)
    b = a.view(4, 784)
    b.view(4, 28, 28, 1)    # logic bug
    
    # flexible but prone to corrupt, 维度不匹配
    print(a.view(4, 783))    # RuntimeError: shape '[4, 783]' is invalid for input of size 3136
    

    squeeze / unsqueeze

    unsqueeze

    """
    范围:
        [-a.dim()-1, a.dim()+1]
        [-5, 5)
    """
    a = torch.rand(4, 1, 28, 28)
    print(a.shape)
    print(a.unsqueeze(0).shape)
    print(a.unsqueeze(-1).shape)
    print(a.unsqueeze(4).shape)
    print(a.unsqueeze(-5).shape)
    print(a.unsqueeze(5).shape)    # IndexError: Dimension out of range (expected to be in range of [-5, 4], but got 5)
    
    a = torch.tensor([1.2, 2.3])
    print(a)
    print(a.unsqueeze(-1))
    print(a.unsqueeze(0))
    
    # 案例:
    b = torch.rand(32)
    f = torch.rand(4, 32, 14, 14)
    b = b.unsqueeze(1).unsqueeze(2).unsqueeze(0)
    print(b.shape)
    

    squeeze

    # squeeze
    b = torch.rand(1, 32, 1, 1)
    print(b.squeeze())    # 能压缩的都压缩
    print(b.squeeze(0).shape)    # 压缩第0个元素
    print(b.squeeze(-1).shape)
    print(b.squeeze(1).shape)    # 32不能压缩就不压缩
    print(b.squeeze(-4).shape)
    

    expand/repeat

    # expand/repeat
    # expand: broadcasting  改变理解方式
    # repeat: memory copied  实实在在的增加数据
    a = torch.rand(4, 32, 14, 14)
    
    b = torch.rand(1, 32, 1, 1)
    print(b)
    print(b.expand(4, 32, 14, 14))    # torch.Size([4, 32, 14, 14])
    
    print(b.expand(-1, 32, -1, -1).shape)    # -1表示该维度不变
    print(b.expand(-1, 32, -1, -4).shape)    # 写-4变-4    RuntimeError: invalid shape dimension -128
    
    
    # repeat:不建议使用
    print(b.repeat(4, 32, 1, 1).shape)    # 第二个拷贝32次
    print(b.repeat(4, 1, 1, 1).shape)
    print(b.repeat(4, 1, 32, 32).shape)    #
    

    t():转置 只适合2D tensor

    # t():转置 只适合2D tensor
    a = torch.randn(3, 4)
    print(a)
    print(a.t())
    

    Transpose: 维度变换

    # Transpose: 维度变换
    a = torch.rand(4, 3, 32, 32)
    print(a.shape)
    """
    RuntimeError: view size is not compatible with input tensor's size and stride
    (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
    """
    a1 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32)    # 要加contigous
    a2 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32).transpose(1, 3)
    print(a1.shape)
    print(a2.shape)
    

    permute:可以直接排位置,可以使用任意多次的transpose来达到他的目的

    # permute:可以直接排位置,可以使用任意多次的transpose来达到他的目的
    a = torch.rand(4, 3, 28, 28)
    print(a.transpose(1, 3).shape)    # torch.Size([4, 28, 28, 3])
    b = torch.rand(4, 3, 28, 32)
    print(b.transpose(1, 3).shape)    # torch.Size([4, 32, 28, 3])
    print(b.transpose(1, 3).transpose(1, 3).shape)    # torch.Size([4, 3, 28, 32])
    print(b.permute(0, 2, 3, 1).shape)    # torch.Size([4, 28, 32, 3])
    

    Broadcast自动扩展

    """
    expand
    without copying data
    
    insert 1 dim ahead
    expand dims with size 1 to same size
    feature maps:[4, 32, 14, 14]
    bias:[32, 1, 1] => [1, 32, 1, 1] => [4, 32, 14, 14]    bias的扩张
    """
    


    broadcast

    # situation 1
    # [4, 32, 14, 14]
    # [1, 32, 1, 1] => [4, 32, 14, 14]
    
    # situation2
    # [4, 32, 14, 14]
    # [14, 14] => [1, 1, 14, 14] => [4, 32, 14, 14]    # 可以先unsqueeze再expand
    
    # situation3(不符合)
    # [4, 32, 14, 14]
    # [2, 32, 14, 14]
    
    # a = torch.tensor([2, 32, 14, 14])
    # # print(a)
    # # print(a[:])
    
    # a = torch.IntTensor(4, 3)
    # b = torch.IntTensor(3)
    # print(a)
    # print(b)
    """
    match from last dim
    1. no dim
    2. dim of size 1
    """
    

    拼接与拆分

    """
    Merge or split
    合并:
    cat
    stack
    分割:
    split
    chunk
    """
    

    cat

    # cat
    a = torch.rand(4, 32, 8)
    b = torch.rand(5, 32, 8)
    print(torch.cat([a, b], dim=0).shape)    # torch.Size([9, 32, 8])
    
    a1 = torch.rand(4, 3, 32, 32)
    a2 = torch.rand(4, 1, 32, 32)
    print(torch.cat([a1, a2], dim=0).shape)    # RuntimeError: invalid argument 0 其他维度要一致
    print(torch.cat([a1, a2], dim=1).shape)    # torch.Size([4, 4, 32, 32])
    

    stack: create a new dim: 需求 维度完全一致

    # stack: create a new dim: 需求 维度完全一致
    a1 = torch.rand(4, 3, 16, 32)
    a2 = torch.rand(4, 3, 16, 32)
    print(torch.cat([a1, a2], dim=2).shape)    # torch.Size([4, 3, 32, 32])
    print(torch.stack([a1, a2], dim=2).shape)    # torch.Size([4, 3, 2, 16, 32])
    a = torch.rand(32, 8)
    b = torch.rand(32, 8)
    print(torch.stack([a, b], dim=0).shape)    # torch.Size([2, 32, 8])
    

    split: by len: 根据长度来分

    # split: by len: 根据长度来分
    b = torch.rand(32, 8)
    a = torch.rand(32, 8)
    # print(a.shape)    # torch.Size([32, 8])
    c = torch.stack([a, b], dim=0)
    # print(c.shape)    # torch.Size([2, 32, 8])
    aa, bb = c.split([4, 4], dim=2)
    print(aa.shape, bb.shape)    # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
    
    # aa, bb = c.split(2, dim=0)    # ValueError: not enough values to unpack (expected 2, got 1)
    
    print(c.shape)    # torch.Size([2, 32, 8])
    

    chunk: by num: 根据数量来分

    # chunk: by num: 根据数量来分
    aa, bb = c.chunk(2, dim=2)    # torch.Size([2, 32, 8])
    print(aa.shape, bb.shape)    # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
    

    数学运算

    """
    Math operation
    1. add/minus/multiply/divide
    2. matmul
    3. pow
    4. sqrt/rsqrt
    5. round
    """
    

    基础部分

    # 基础部分
    a = torch.rand(3, 4)
    b = torch.rand(4)
    
    print(a)
    print(b)
    print(a + b)    # b会被广播
    # all()函数的功能: 如果张量tensor中所有元素都是True, 才返回True; 否则返回False
    b = torch.tensor([1, 1, 1, 1])
    print(torch.all(torch.eq(a-b, torch.sub(a, b))))
    

    matmul

    # matmul
    # torch.mm
    #     only for 2d
    # torch.matmul
    # @
    a = torch.tensor([[3., 3.],
                     [3., 3.]])
    print(a)
    b = torch.ones(2, 2)
    print(b)
    
    print(torch.mm(a, b))    # 只针对2d矩阵
    
    print(torch.matmul(a, b))
    
    print(a@b)
    
    # 案例:
    # ==2d的tensor运算
    a = torch.rand(4, 784)
    x = torch.rand(4, 784)
    w = torch.rand(512, 784)    # 分别为ch-out ch-in
    
    print((x.@w.t()).shape)    # torch.Size([4, 512]) ×时第一个元素为out,所以需要转置
    
    print(torch.matmul(x, w.t()).shape)    # torch.Size([4, 512])
    
    # >2d的tensor运算
    a = torch.rand(4, 3, 28, 64)
    b = torch.rand(4, 3, 64, 32)
    print(torch.matmul(a, b).shape)    # torch.Size([4, 3, 28, 32])
    b = torch.rand(4, 1, 64, 32)
    print(torch.matmul(a, b).shape)    # torch.Size([4, 3, 28, 32]), 这种情况会先使用broadcast,再使用矩阵相乘
    

    power

    # power
    a = torch.full([2, 2], 3)
    print(a.pow(2))
    print(a**2)
    aa = a**2
    print(aa.sqrt())
    print(aa.rsqrt())
    print(aa**(0.5))
    

    exp/log

    # exp/log
    a = torch.exp(torch.ones(2, 2))
    print(a)
    print(torch.log(a))
    

    approximation

    # approximation
    a = torch.tensor(3.14)
    print(a.floor(), a.ceil(), a.trunc(), a.frac())    # tensor(3.) tensor(4.) tensor(3.) tensor(0.1400)
    #      往下取整    往上取整   截取,保留整数  截取,保留小数
    
    a = torch.tensor(3.499)
    print(a.round())    # tensor(3.)  四舍五入
    a = torch.tensor(3.5)
    print(a.round())    # tensor(4.)
    

    clamp:裁剪

    # clamp:裁剪
    """
    gradient clipping
    (min)
    (min, max)
    """
    grad = torch.rand(2, 3)*15
    print(grad)
    print(grad.max())
    print(grad.median())
    print(grad.clamp(10))    # 里面的元素小于10的全部变成10
    print(grad.clamp(2, 10))    # 小于2的裁剪成2, 大于10的裁剪成10
    

    Tensor统计

    """
    statistics
    norm:范数
    mean sum
    prod
    max, min, argmin(最小值的位置), argmax
    kthvalue(第几个值 默认是小的: 比如第8个小的), topk(top多少)
    """
    

    norm:

    # norm:
    a = torch.full([8], 1)
    b = a.view(2, 4)
    c = a.view(2, 2, 2)
    print(a)
    print(b)
    print(c)
    print(a.norm(1), b.norm(1), c.norm(1))    # nsor(8.) tensor(8.) tensor(8.)
    print(a.norm(2), b.norm(2), c.norm(2))    # tensor(2.8284) tensor(2.8284) tensor(2.8284)
    
    print(b.norm(1, dim=1))    # dim=1:将dim=1的部分取范数,同时二维向量变成一维向量  tensor([4., 4.])
    print(b.norm(2, dim=1))    # tensor([2., 2.])
    
    print(c.norm(1, dim=0))
    print(c.norm(2, dim=0))
    

    mean sum min max prod(阶乘)

    # mean sum min max prod(阶乘)
    a = torch.arange(8).view(2, 4).float()
    print(a)
    """
    tensor([[0., 1., 2., 3.],
            [4., 5., 6., 7.]])
    """
    print(a.min(), a.max(), a.mean(), a.prod())    # tensor(0.) tensor(7.) tensor(3.5000) tensor(0.)
    print(a.sum())    # tensor(28.)
    print(a.argmin(), a.argmax())    # tensor(0) tensor(7)
    

    argmin/argmax在指定维度的表示

    # argmin/argmax在指定维度的表示
    a = torch.rand(4, 5)
    print(a)
    print(a.argmax())
    print(a.argmax(dim=1))    # 在dim=1即取每个维度中最大值的位置
    

    keepdim

    # keepdim
    a = torch.rand(4, 10)
    print(a)
    # print(a.max(dim=1))
    print(a.argmax(dim=1))
    print(a.max(dim=1, keepdim=True))    # 这个会返回他在dim=1的最大值和最大值的位置
    

    top-k or k-th

    # top-k or k-th
    a = torch.rand(4, 10)
    print(a.topk(3, dim=1))
    print(a.topk(3, dim=1, largest=False))
    
    print(a.kthvalue(8, dim=1))    # 返回dim=1的第8大的值
    """
    torch.return_types.kthvalue(
    values=tensor([0.7363, 0.8011, 0.6856, 0.6297]),
    indices=tensor([4, 0, 7, 8]))
    """
    

    compare

    # compare
    """
    >  >= <  <=  !=  ==
    
    torch.eq(a, b)
    """
    a = torch.rand(4, 10)
    print(a > 5)    # 里面的每个元素都要比较
    print(torch.gt(a, 0))
    print(a != 0)
    
    a = torch.ones(2, 3)
    b = torch.randn(2, 3)
    
    """
    疑问: torch.rand()和torch.randn()的区分?
    答:rand()是均匀分布,randn()是标准正太分布
    """
    print(a)
    print(b)
    print(torch.eq(a,b))
    
    print(torch.eq(a, a))    # 返回每个元素
    """
    tensor([[True, True, True],
            [True, True, True]])
    """
    print(torch.equal(a, a))     # True    所有都为True才为True
    

    Tensor高阶

    """
    tensor 高级操作
    where
    gather: 收集,gather语句类似于查表的过程.   设计目的:使用GPU实现CPU的功能
    """
    


    where

    # where
    # torch.where(condition,x,y) --> Tensor
    # 案例:
    cond = torch.tensor([[0.6769, 0.7271],
                        [0.8884, 0.4163]])
    print(cond)
    a = torch.zeros(2, 2)
    print(a)
    b = torch.ones(2, 2)
    print(b)
    
    print(torch.where(cond > 0.5, a, b))    # 如果cond成立,选取a中的元素,否则选择b中的元素
    

    gather

    # 案例:检索  retrieve label
    prob = torch.randn(4, 10)
    # print(prob)
    
    idx = prob.topk(dim=1, k=3)
    # print(idx)
    idx = idx[1]
    # print(idx)
    label = torch.arange(10) + 100
    # print(label)
    label_expand = label.expand(4, 10)
    print(label_expand)
    print(idx)    # 这是索引
    print('------------------')
    # print(idx.long())    # 转换成Longtensor数据格式
    print(torch.gather(label_expand, dim=1, index=idx.long()))    # 按照index索引进行取数据
    

    梯度

    """
    1. len: 可以表示变化的层度
    2. dir:表示变化的方向
    """
    

    激活函数

    Sigmoid / Logistic梯度推导

    # 激活函数
    z = torch.linspace(-100, 100, 10)
    # sigmoid激活函数
    print(z)
    print(torch.sigmoid(z))    # 范围在0-1
    

    Tanh

    # tanh激活函数: 在rnn中用的比较多 取值范围为-1-1
    a = torch.linspace(-1, 1, 10)
    print(torch.tanh(a))
    

    Relu

    # Relu激活函数
    # 在pytorch中的两种实现:1.从torch.nn中  2. 从torch.relu中
    from torch.nn import functional as F
    a = torch.linspace(-1, 1, 10)
    print(torch.relu(a))
    print(F.relu(a))
    

    LOSS及其梯度

    """
    1. Mean Squared Error
    2. Cross Entropy Loss
        1. binary
        2. multi-class
    """
    

    MSE

    一:autograd.grad

    # 一:autograd.grad
    
    # Mean Squared Error
    # 这里注意MSE于2范数相比,2范数有开根号但是这里没有开根号
    
    # 使用pytorch进行简单的求导
    # 这里pred = w * x + b
    from torch.nn import functional as F
    x = torch.ones(1)
    w = torch.full([1], 2)
    mse = F.mse_loss(torch.ones(1), x*w)    # 第一个参数pred 第二个参数label
    print(torch.autograd.grad(mse, [w]))    # 第一个参数loss  第二个参数w1, w2, w3
    """
    RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn?
    w函数在初始化的时候没有设置他需要导数信息,pytorch在建图的时候标注torch不需要求导信息
    """
    # 改变如下:告诉pytorch w需要梯度信息
    w.requires_grad_()
    print(torch.autograd.grad(mse, [w]))
    """
    RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
    更新之后还是会报错,因为pytorch是一个动态图
    这里更新了w但是图还没有更新
    因为pytorch是做一步计算一次图
    """
    # 必须经过计算图的过程重新更新一遍
    mse = F.mse_loss(torch.ones(1), x*w)    # 动态图的建图
    print(torch.autograd.grad(mse, [w]))    # (tensor([2.]),) 图重新更新后可以计算出结果
    print(mse)
    

    二:loss.backward

    # 二:loss.backward
    from torch.nn import functional as F
    x = torch.ones(1)
    w = torch.full([1], 2)
    mse = F.mse_loss(torch.ones(1), x*w)
    
    # torch.autograd.grad(mse, [w])
    
    w.requires_grad_()    # 使w获取梯度
    
    mse = F.mse_loss(torch.ones(1), x*w)    # 再次计算获取动态图
    # torch.autograd.grad(mse, [w])    # 1. 自动计算  再次计算梯度
    
    mse.backward()    # 2. 手动计算tensor([2.])
    print(w.grad)
    

    总结

    """
    Gradient API
        1. 手动求导torch.autograd.grad(loss, [w1, w2, ...])    
            [w1 grad, w2 grad...]
        
        2. 自动求导loss.backward()    # 他返回的梯度信息不会返回而是附在每个梯度信息上面
            w1.grad
            w2.grad
    """
    

    Softmax



    """
    softmax求导:
        pi(1-pj)    if i=j
        -pj*pi      if i!=j
    
        1  if i=j
        0  if i!=j
    """
    import torch
    from torch.nn import functional as F
    a = torch.rand(3)    # tensor([0.4207, 0.2955, 0.8440])
    print(a.requires_grad_())    # 这样之后就可以求梯度 tensor([0.5424, 0.1913, 0.9416], requires_grad=True)
    
    p = F.softmax(a, dim=0)    # 自动完成建图操作 tensor([0.2489, 0.3556, 0.3954], grad_fn=<SoftmaxBackward>)
    
    # 当你调用一次backward之后除了完成一次反向传播以外,还会把这个图的梯度信息清除掉
    
    print(torch.autograd.grad(p[1], [a],retain_graph=True))    # (tensor([-0.0755,  0.1879, -0.1125]),)    i=1为正的其他的为负的
    # #
    # #
    print(torch.autograd.grad(p[2], [a]))    # (tensor([-0.1349, -0.1125,  0.2473]),)    # i=2为正的其他的为负的
    

    感知机

    单一输出感知机

    import torch
    from torch.nn import functional as F
    x = torch.randn(1, 10)
    w = torch.randn(1, 10, requires_grad=True)
    print(x)
    print(w)
    o = torch.sigmoid(x@w.t())    # 这里没有写bias
    print(o)
    print(torch.ones(1, 1))
    loss = F.mse_loss(torch.ones(1, 1), o)
    print(loss)
    loss.backward()
    print(w.grad)
    

    多输出感知机

    import torch
    from torch.nn import functional as F
    x = torch.randn(1, 10)
    w = torch.randn(2, 10, requires_grad=True)
    print(x)
    print(w)
    o = torch.sigmoid(x@w.t())
    loss = F.mse_loss(torch.ones(1, 2), o)
    loss.backward()
    print(w.grad)
    

    链式法则

    import torch
    
    x = torch.tensor(1.)
    w1 = torch.tensor(2., requires_grad=True)
    b1 = torch.tensor(1.)
    w2 = torch.tensor(2., requires_grad=True)
    b2 = torch.tensor(1.)
    
    y1 = x*w1 + b1
    y2 = y1*w2 + b2
    
    dy2_dy1 = torch.autograd.grad(y2, [y1], retain_graph=True)[0]
    dy1_dw1 = torch.autograd.grad(y1, [w1], retain_graph=True)[0]
    
    dy2_dw1 = torch.autograd.grad(y2, w1, retain_graph=True)[0]    # 这里的w1加不加[]都行??
    
    print(dy2_dy1*dy1_dw1)
    
    print(dy2_dw1)
    

    优化实例

    import numpy as np
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import pyplot as plt
    import torch
    def himmelblau(x):
        return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2
    
    # 画图
    x = np.arange(-6, 6, 0.1)
    y = np.arange(-6, 6, 0.1)
    print('x,y range:', x.shape, y.shape)
    X, Y = np.meshgrid(x, y)    # 将x这个图片和y这个图片拼接到一起
    print('X,Y maps:', X.shape, Y.shape)
    Z = himmelblau([X, Y])
    
    fig = plt.figure('himmelblau')
    ax = fig.gca(projection='3d')
    ax.plot_surface(X, Y, Z)    # 把x, y的坐标送入Z函数里面得到z的坐标
    ax.view_init(60, -30)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    plt.show()
    
    
    # 找最小值--初始点不同找的也不同
    # [1., 0.], [-4, 0.], [4, 0.]
    x = torch.tensor([4., 0.], requires_grad=True)    # 在这里不同的初始化权重更新的速率和最后得到的结果都不太同。所以说梯度下降法的初始化很关键
    optimizer = torch.optim.Adam([x], lr=1e-3)
    for step in range(20000):
    
        pred = himmelblau(x)    # x送进来得到预测值,目的是min这个预测值
    
        optimizer.zero_grad()    # 将梯度信息进行清零
        pred.backward()    # 生成x.grad和y.grad即:x和y的梯度信息
        optimizer.step()    # 将x,y的梯度进行更新
    
        if step % 2000 == 0:
            print('step {}: x = {}, f(x) = {}'
                   .format(step, x.tolist(), pred.item()))
    

    Logistic Regression


    Cross Entropy



    import torch
    a = torch.full([4], 1/4)
    print(a)
    print(a*torch.log2(a))
    print(-(a*torch.log2(a)).sum())    # tensor(2.) 熵越高代表越稳定,没有惊喜度
    b = torch.tensor([0.1, 0.1, 0.1, 0.7])
    print(-(b*torch.log2(b)).sum())    # tensor(1.3568) higher uncertainty  惊喜度较高
    c = torch.tensor([0.001, 0.001, 0.001, 0.999])
    print(-(c*torch.log2(c)).sum())    # tensor(0.0313)  极度不稳定
    

    numerical stability

    import torch
    from torch.nn import functional as F
    x = torch.randn(1, 784)
    w = torch.randn(10, 784)
    logits = x@w.t()
    print(logits.shape)
    pred = F.softmax(logits, dim=1)
    print(pred)
    pred_log = torch.log(pred)
    loss1 = F.nll_loss(pred_log, torch.tensor([3]))
    print(loss1)
    loss2 = F.cross_entropy(logits, torch.tensor([3]))    # 这里使用logits, 因为cross_entropy = softmax + log + nll_loss   (这三个操作一起)
    print(loss2)
    

    全连接层

    import torch
    import  torch.nn as nn
    
    x = torch.randn(1, 784)    # torch.Size([1, 784])
    print(x.shape)
    
    layer1 = nn.Linear(784, 200)    # 在这里第一个参数使ch-in 第二个参数是ch-out
    layer2 = nn.Linear(200, 200)
    layer3 = nn.Linear(200, 10)
    
    x = layer1(x)
    print(x.shape)    # torch.Size([1, 200])
    
    x = layer2(x)
    print(x.shape)    # torch.Size([1, 200])
    
    x = layer3(x)
    print(x.shape)    # torch.Size([1, 10])
    print(x)
    

    nn.Relu vs F.relu

    import torch
    import torch.nn as nn
    from torch.nn import functional as F
    
    x = torch.randn(1, 10)
    print(x.shape)
    
    # 调用方式:类方法
    class ML(nn.module):
        def __init__(self):
            super(ML, self).__init__()
    
            self.model = nn.Sequential(  # 构建模型
                nn.Linear(784, 200),
                nn.ReLU(inplace=True),
                nn.Linear(200, 200),
                nn.ReLU(inplace=True),
                nn.Linear(200, 10),
                nn.ReLU(inplace=True),
            )
    # 函数方法
    x = F.relu(x, inplace=True)
    

    GPU加速

    device = torch.device('cuda:0')    # 使用设备, 可以选择将需要运算的搬到你需要的设备。
    
    # 将需要加速的运算送进GPU
    criteon = nn.CrossEntropyLoss().to(device)    # 使用.to()方法会返回个inference,他的类型取决于原来的类型
    

    计算准确的代码

    """
    计算准确度的代码
    """
    
    import torch
    from torch.nn import functional as F
    logits = torch.rand(4, 10)
    
    pred = F.softmax(logits, dim=1)
    print(pred)
    
    pred_label = pred.argmax(dim=1)    # 取最大值的下标
    
    print(pred_label)
    
    label = torch.tensor([9, 3, 2, 9])
    correct = torch.eq(pred_label, label)
    print(correct)
    print(correct.sum().float().item()/4)    # item()作用是得到里面的元素
    

    Visdom可视化

    """
    pytorch可视化需要:
    方法一:
    pip install tensorboardX
    1. 需要开启一个监听的进程
    
    方法二:Visdom
    1. pip install visdom
    2. python -m visdom.server  (相当于开启了一个web服务器,web服务器会把数据渲染到网页上去)
        可能会遇到的问题: ERROR:root:Error 404 while downloading https://unpkg.com/layout-bin-packer@1.4.0
    
    解决方法: install form source(从github的facebookresearch/visdom下载)
        步骤1: pip uninstall visdom
        步骤2: 官网下载源代码,之后cd进去目录(进去visdom-master),之后运行pip install -e .
        步骤3: 退回用户目录后再python -m visdom.server
        步骤4:打开浏览器,输入他给的地址
    """
    
    
    # 测试:
    from visdom import Visdom
    viz = Visdom()
    
    """
    {Y的值,X的值} win可以理解为ID(还有一个id叫做env(默认使用main))   opts是额外的配置信息
    
    对于非image还是numpy数据,image数据是tensor
    """
    # viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))
    # viz.line([loss.item()], [global_step], win='train_loss', update='append')
    

    在训练中

    global_step += 1
    viz.line([loss.item()], [global_step], win='train_loss', update='append')
    

    在test中

    # viz进行可视化
    viz.line([[test_loss, correct / len(test_loader.dataset)]],
                [global_step], win='test', update='append')
    viz.images(data.view(-1, 1, 28, 28), win='x')
    viz.text(str(pred.detach().cpu().numpy()), win='pred',
                opts=dict(title='pred'))
    

    正则化

    optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.01)    # 这里weight_decay=0.01是指进行正则化,这里是2范数
    

    Dropout

    import torch
    
    
    net_droped = torch.nn.Sequential(
        torch.nn.Linear(784, 200),
        torch.nn.Dropout(0.5),    # drop 50% of the neuron    (在两层之间断掉一些层)
        torch.nn.ReLU(),
        torch.nn.Linear(200, 200),
        torch.nn.Dropout(0.5),    # drop 50% of the neuron
        torch.nn.ReLU(),
        torch.nn.Linear(200, 10),
    )
    
    """
    在训练是需要加上Dropout()
    
    但是在test/val是不需要Dropout()
    例如:
    for epoch in range(epochs):
        # train
        net_dropped.train()
        for batch_idx, (data, targt) in enumerate(train_loader):
        ...
        
        net_dropped.eval()    # 在测试是需要加上这句话去掉dropout
        test_loss = 0
        correct = 0
        for data, target in test_loader:
        
    """
    

    卷积神经网络

    import torch.nn as nn
    import torch
    from torch.nn import functional as F
    
    # 第一个参数为input的chanel,第二个参数为kernel的数量,kernel_size=3*3    [1, 3, 26, 26]
    layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=0)
    x = torch.rand(1, 1, 28, 28)
    
    out = layer.forward(x)
    print(out.shape)    # torch.Size([1, 3, 26, 26])    # 26 = (28-3)/1 + 1
    
    
    layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1)
    out = layer.forward(x)
    print(out.shape)    # torch.Size([1, 3, 28, 28])
    
    layer = nn.Conv2d(1, 3, kernel_size=3, stride=2, padding=1)
    out = layer.forward(x)
    print(out.shape)    # torch.Size([1, 3, 14, 14])
    
    out = layer(x)    # 会自动进行,运用了python的魔术方法 __call__
    print(out.shape)    # torch.Size([1, 3, 14, 14])
    
    print(layer.weight)    # 查看layer的权重
    print(layer.weight.shape)    # torch.Size([3, 1, 3, 3])
    
    print(layer.bias.shape)    # torch.Size([3])
    
    
    # F.conv2D
    
    # 上面x = torch.rand(1, 1, 28, 28)
    w = torch.rand(16, 3, 5, 5)
    b = torch.rand(16)
    
    # out = F.conv2d(x, w, b, stride=1, padding=1)
    # print(out)    # 报错,一位x和w的chanels数对应不上
    """
    RuntimeError: Given groups=1, weight of size 16 3 5 5, expected input[1, 1, 28, 28] to have 3 channels,
     but got 1 channels instead
    """
    x = torch.randn(1, 3, 28, 28)
    out = F.conv2d(x, w, b, stride=1, padding=1)
    print(out.shape)    # torch.Size([1, 16, 26, 26])
    
    out = F.conv2d(x, w, b, stride=2, padding=2)
    print(out.shape)    # torch.Size([1, 16, 14, 14])
    

    池化层

    """
    outline:
        Pooling
        upsample
        Relu
    """
    import torch
    import torch.nn as nn
    from torch.nn import functional as F
    x = torch.randn(1, 16, 14, 14)
    print(x.shape)    # torch.Size([1, 16, 14, 14])
    
    # 从nn中导入最大池化
    layer = nn.MaxPool2d(2, stride=2)
    out = layer(x)
    print(out.shape)    # torch.Size([1, 16, 7, 7])    (14-2)/2 + 1 = 7
    
    
    # 使用F.的方式平均池化
    out = F.avg_pool2d(x, 2, stride=2)    # torch.Size([1, 16, 7, 7])
    print(out.shape)
    
    # ++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    # upsample
    # 采用F.interpolate
    # interpolate: 是插值的意思
    # +++++++++++++++++++++++++++++++++++++++++++++++++++++#
    x = out
    out = F.interpolate(x, scale_factor=2, mode='nearest')    # 采用最近邻采样
    print(out.shape)    # torch.Size([1, 16, 14, 14])
    
    out = F.interpolate(x, scale_factor=3, mode='nearest')
    print(out.shape)    # torch.Size([1, 16, 21, 21])
    
    #------------------------------------------------#
    #  Relu激活函数
    #
    # ------------------------------------------------#
    
    x = torch.randn(1, 16, 7, 7)
    print(x.shape)    # torch.Size([1, 16, 7, 7])
    
    # 方法1:采用nn.的方式
    layer = nn.ReLU(inplace=True)    # inplace=True x--->x'(x'使用x内存空间)
    out = layer(x)
    print(out.shape)    # torch.Size([1, 16, 7, 7])
    
    # 方法2:采用F.的方式
    out = F.relu(x)
    print(out.shape)    # torch.Size([1, 16, 7, 7])
    

    BatchNorm

    import torch
    import torch.nn as nn
    
    # ----------------------------#
    # BatchNorm1d
    # ----------------------------#
    x = torch.randn(100, 16) + 0.5
    print(x.shape)
    
    layer = torch.nn.BatchNorm1d(16)    # 这个必须与前面的匹配起来否则会报错
    
    print(layer.running_mean, layer.running_var)
    """
    tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) 
    tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
    """
    out = layer(x)
    print(layer.running_mean, layer.running_var)
    """
    tensor([0.0452, 0.0446, 0.0516, 0.0671, 0.0644, 0.0622, 0.0514, 0.0449, 0.0520,
            0.0546, 0.0461, 0.0620, 0.0332, 0.0450, 0.0384, 0.0580]) 
    tensor([0.9868, 0.9935, 1.0214, 1.0137, 1.0009, 0.9895, 1.0065, 1.0319, 0.9841,
            1.0051, 0.9967, 0.9968, 1.0045, 0.9877, 1.0011, 1.0031])
    """
    #----------------------------------------#
    # 这里的分布服从于 U(0.5, 1)
    #
    # ---------------------------------------#
    
    x = torch.randn(100, 16) + 0.5
    layer = torch.nn.BatchNorm1d(16)
    
    for i in range(5):    # 疑问????????,每循环一次经过一次batchnorm
        out = layer(x)
    
    print(layer.running_mean, layer.running_var)
    
    
    # ---------------------------#
    # nn.BatchNorm2d
    # ---------------------------#
    x = torch.rand(1, 16, 7, 7)
    print(x.shape)
    
    layer = nn.BatchNorm2d(16)
    out = layer(x)
    print(out.shape)    # torch.Size([1, 16, 7, 7])
    
    print(layer.weight)
    """
    这里的weight,bias更权重的那个不太一样
    """
    print(layer.weight.shape)    # torch.Size([16])
    
    print(layer.bias.shape)    # torch.Size([16])
    
    # -----------------------------------#
    #  class variables
    # -----------------------------------#
    print(vars(layer))
    
    
    # ------------------------------------#
    # Test
    # ------------------------------------#
    layer.eval()    # 加这行表示现在是在test阶段
    out = layer(x)
    print(vars(layer))
    

    nn.Module

    import torch
    from torch import nn
    from torch import optim
    
    # -----------------------------------#
    # 使用nn.Module的好处
    # 1. 所有的常用的方法都在里面,比如: Linear/Relu/Sigmoid等
    # 2. 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
    # 3. nn.Module可以自动管理parameters
    # 4. modules: all nodes / children: direct children
    # 5. to(device) (第84行)
    # 6. save and load(第90行)
    # 7. train/test的方便的切换(第87行)
    # 8. implement own layer 实现自己的类(第31 / 第41 行)    只有class才能写到nn.Sequential里面去[第48行]
    # -----------------------------------#
    
    class MyLinear(nn.Module):
    
        def __init__(self, inp, outp):
            super(MyLinear, self).__init__()
    
            # requires_grad = True
            self.w = nn.Parameter(torch.randn(outp, inp))    # nn.Parameter会自动地将torch.tensor通过nn.Parameter加到nn.parameter()里面去
            self.b = nn.Parameter(torch.randn(outp))
    
        def forward(self, x):
            x = x @ self.w.t() + self.b
            return x
    
    
    class Flatten(nn.Module):    # 将所有的打平
    
        def __init__(self):
            super(Flatten, self).__init__()
    
        def forward(self, input):
            return input.view(input.size(0), -1)    # -1表示将其他所有的打平
    
    
    
    class TestNet(nn.Module):
    
        def __init__(self):
            super(TestNet, self).__init__()
    
            self.net = nn.Sequential(nn.Conv2d(1, 16, stride=1, padding=1),
                                     nn.MaxPool2d(2, 2),
                                     Flatten(),    # 实现自己的类,里面只能写类
                                     nn.Linear(1*14*14, 10))
    
        def forward(self, x):
            return self.net(x)
    
    
    class BasicNet(nn.Module):
    
        def __init__(self):
            super(BasicNet, self).__init__()
    
            self.net = nn.Linear(4, 3)
    
        def forward(self, x):
            return self.net(x)
    
    
    
    class Net(nn.Module):
    
        def __init__(self):
            super(Net, self).__init__()
            #  使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
            self.net = nn.Sequential(BasicNet(),
                                     nn.ReLU(),
                                     nn.Linear(3, 2))
    
        def forward(self, x):
            return self.net(x)
    
    
    
    
    
    def main():
        device = torch.device('cuda')
        net = Net()
        net.to(device)    # .to()会返回net引用(和原来的net引用一样)    --->但是对于tensor操作来说不是这样的
    
        # train
        net.train()
        # test
        net.eval()
    
        # net.load_state_dict(torch.load('ckpt.mdl'))    # 在开始的时候要加载模型
        #
        #
        # torch.save(net.state_dict(), 'ckpt.mdl')    # 在模型断电或者中断保存模型的当前状态
    
        for name, t in net.named_parameters():
            print('parameters:', name, t.shape)    # 打印里面地parameters:权重和bias
    
        for name, m in net.named_children():    # 打印net Sequential的类
            print('children:', name, m)
    
    
        for name, m in net.named_modules():
            print('modules:', name, m)
    
    
    
    if __name__ == '__main__':
        main()
    

    数据增强

    # Data argumentation
    # ---------------------------------------#
    # 这些操作在torchvision包里面
    # 1. Flip:翻转
    # 2. Rotate
    # 3. Random Move & Crop
    # 4. GAN : 生成更多的样本
    # 5. Noise: N(0, 0.001)加高斯白噪声
    # ---------------------------------------#
    
    batch_size=200
    learning_rate=0.01
    epochs=10
    
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([    # Compose的操作类似于nn.Sequential里面
                           transforms.RandomHorizontalFlip(),    # 水平角度的翻转    (随机翻转-可能翻转也有可能不翻转)
                           transforms.RandomVerticalFlip(),    # 垂直方向
                           transforms.RandomRotation(15),    # 旋转方向,参数为旋转的度数
                           transforms.RandomRotation([90, 180, 270]),    # 随机的从90度180度270度中挑一个角度旋转
                           transforms.Resize([32, 32]),    # 传入的参数为list
                           transforms.RandomCrop([28, 28]),    # 裁剪
                           transforms.ToTensor(),
                           # transforms.Normalize((0.1307,), (0.3081,))
                       ])),    # x 转换成x'
        batch_size=batch_size, shuffle=True)
    

    Cifar-10与ResNet18实战

    resnet.py

    import torch
    from torch import nn
    from torch.nn import functional as F    # 这里F和nn经常是交叉使用的
    
    
    class ResBlk(nn.Module):
        """
        resnet block:这里是resnet的一个基本模块
        """
    
        def __init__(self, ch_in, ch_out, stride=1):
            """
    
            :param ch_in:
            :param ch_out:
            """
            super(ResBlk, self).__init__()
    
            # we add stride support for resbok, which is distinct from tutorials.
            self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
            self.bn1 = nn.BatchNorm2d(ch_out)
            self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
            self.bn2 = nn.BatchNorm2d(ch_out)
    
            self.extra = nn.Sequential()    # nn.Sequential()本来是空的
            if ch_out != ch_in:    # 如果不相等就把他的ch_in变成ch_out, 也就是说:他这个是resnet的旁边短接线
                # [b, ch_in, h, w] => [b, ch_out, h, w]
                self.extra = nn.Sequential(
                    nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(ch_out)
                )
        # -------------------------------#
        # 疑问: python在实例化的时候为啥不用调用forward函数?
        # 因为pytorch在nn.modules中使用了__call__,里面实现了forward方法
        # 只要实例化对象就会自动调用__call__,当自己又没有__call__方法,所以调用父类方法,由于子类重写了forward方法
        # 所以优先调用子类的forward方法
        # -------------------------------#
        def forward(self, x):
            """
    
            :param x: [b, ch, h, w]
            :return:
            """
            out = F.relu(self.bn1(self.conv1(x)))    # 这里经过卷积层,BN层, 然后经过relu层
            out = self.bn2(self.conv2(out))    # 这里经过卷积层,BN层
    
            # short cut.    # 这里是短接
            # extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
            out = self.extra(x) + out    # element-wise add:
            out = F.relu(out)    # 最后再经过relu层输出
            print('这里打印下out看看', out.shape)
            return out
    
    
    
    
    class ResNet18(nn.Module):
    
        def __init__(self):
            super(ResNet18, self).__init__()
    
            self.conv1 = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
                nn.BatchNorm2d(64)
            )
            # followed 4 blocks
            # [b, 64, h, w] => [b, 128, h ,w]    # 注意这里h,w是变化的
            self.blk1 = ResBlk(64, 128, stride=2)
            # [b, 128, h, w] => [b, 256, h, w]
            self.blk2 = ResBlk(128, 256, stride=2)
            # # [b, 256, h, w] => [b, 512, h, w]
            self.blk3 = ResBlk(256, 512, stride=2)
            # # [b, 512, h, w] => [b, 1024, h, w]
            self.blk4 = ResBlk(512, 512, stride=2)    # 这里视频是self.blk4 = ResBlk(512, 1024)
    
            self.outlayer = nn.Linear(512*1*1, 10)    # 最后再跟一个全连接层
    
        def forward(self, x):
            """
    
            :param x:
            :return:
            """
            x = F.relu(self.conv1(x))    # 先经过一个卷积层,后面再跟一个relu函数, 经过后x.shape = [128, 64, 10, 10]
            # [b, 64, h, w] => [b, 1024, h, w]
            x = self.blk1(x)    # 经过这层后x.shape = torch.Size([128, 128, 5, 5])
            x = self.blk2(x)    # 经过这层后x.shape = torch.Size([128, 256, 3, 3])
            x = self.blk3(x)    # 经过这层后x.shape = torch.Size([128, 512, 2, 2])
            x = self.blk4(x)    # 经过这层后x.shape = torch.Size([128, 512, 2, 2])
    
    
            # print('after conv:', x.shape) #[b, 512, 2, 2]
            # [b, 512, h, w] => [b, 512, 1, 1]
            x = F.adaptive_avg_pool2d(x, [1, 1])
            # print('after pool:', x.shape)
            x = x.view(x.size(0), -1)    # 经过这层后x.shape = torch.Size([128, 512])    x.size(0) = 128
            x = self.outlayer(x)    # 经过一个全连接层  经过这层后x.shape = torch.Size([128, 10])
    
    
            return x
    
    
    
    def main():
        # ResBlk
        blk = ResBlk(64, 128, stride=4)
        tmp = torch.randn(2, 64, 32, 32)
        out = blk(tmp)
        print('block:', out.shape)    # block: torch.Size([2, 128, 8, 8])
    
        # ResNet18
        x = torch.randn(2, 3, 32, 32)
        model = ResNet18()
        out = model(x)
        print('resnet:', out.shape)    # resnet: torch.Size([2, 10])
    
    
    
    
    if __name__ == '__main__':
        main()
    
    
    
    # ---------------ResNet18模型----------------------------#
    """
    ResNet18(
      (conv1): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (blk1): ResBlk(
        (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (extra): Sequential(
          (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (blk2): ResBlk(
        (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (extra): Sequential(
          (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (blk3): ResBlk(
        (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (extra): Sequential(
          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (blk4): ResBlk(
        (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (extra): Sequential()
      )
      (outlayer): Linear(in_features=512, out_features=10, bias=True)
    )
    """
    # ----------------------------------------------------------------------#
    

    main.py

    import torch
    from torch.utils.data import DataLoader    # DataLoader是为了能够批量加载数据
    from torchvision import datasets    # 从torchvision中导入数据集
    from torchvision import transforms
    from torch import nn, optim
    
    from lenet5 import Lenet5
    from resnet import ResNet18
    
    
    def main():
        batchsz = 128    # 这里是batch-size
    
        # torchvision中提供一些已有的数据集 #  第一个参数:自定目录,第二个参数:Train=True, transform:对数据做些变换
        cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]), download=False)    # download=True:可以自动的download
        cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True)    # Dataloader:方便一次加载多个. shuffle:加载的时候随机换一下
    
        cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]), download=False)
        cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
    
    
        x, label = iter(cifar_train).next()
        print('x:', x.shape, 'label:', label.shape)    # x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])
    
        device = torch.device('cuda')    # 后面可以使用GPU计算
        # model = Lenet5().to(device)
        model = ResNet18().to(device)
    
        criteon = nn.CrossEntropyLoss().to(device)    # loss函数他包含softmax, 因为是分类任务所以采用crossentropy
        optimizer = optim.Adam(model.parameters(), lr=1e-3)    # 优化器把网络里的参数传给他
        print(model)
    
        for epoch in range(1000):
    
            model.train()    # 模型为train模式
            for batchidx, (x, label) in enumerate(cifar_train):    # 从每个epoch里的batch_size
                # [b, 3, 32, 32]
                # [b]
                x, label = x.to(device), label.to(device)    # 转换到cuda上面来
    
    
                logits = model(x)    # 他与predict的区别是是否经过softmax操作
                # logits: [b, 10]
                # label:  [b]    # label不需要probality
                # loss: tensor scalar    # 长度为0的标量
                loss = criteon(logits, label)    # 这个label是y
    
                # backprop
                optimizer.zero_grad()    # 如果不清0就是累加的效果
                loss.backward()
                optimizer.step()    # 更新weight,更新的weight写进optimizer里面
    
    
            print(epoch, 'loss:', loss.item())    # 对于标量,使用item()把他转换成Numpy
    
            # test
            model.eval()    # 模型为test模式
            with torch.no_grad():    # 这一步是告诉不需要构建梯度(不需要构建图)
                # test
                total_correct = 0    # 正确的数量
                total_num = 0    # 总的数量
                for x, label in cifar_test:
                    # [b, 3, 32, 32]
                    # [b]
                    x, label = x.to(device), label.to(device)
    
                    # [b, 10]
                    logits = model(x)
                    # [b]
                    pred = logits.argmax(dim=1)
                    # [b] vs [b] => scalar tensor
                    correct = torch.eq(pred, label).float().sum().item()
                    total_correct += correct
                    total_num += x.size(0)
                    # print(correct)
    
                acc = total_correct / total_num
                print(epoch, 'test acc:', acc)
    
    
    
    if __name__ == '__main__':
        main()
    

    数据集格式

    -- cifar
        --cifar-10-batches-py
            --batches.meta
            --data_batch_1
            --data_batch_2
            --data_batch_3
            --data_batch_4
            --data_batch_5
            --readme.html
            --test_batch
    
  • 相关阅读:
    chrome webkitappearance
    图片占用内存
    javascript性能优化repaint和reflow
    vim 系统剪切板
    CSS选择符的命名(转载)
    relative 内部 margin
    中国软件企业
    dom元素排序
    shell
    tips for asm
  • 原文地址:https://www.cnblogs.com/zranguai/p/14560666.html
Copyright © 2011-2022 走看看