基本数据类型
5月16号更新
---------------------------------------------
- 保存模型
print("Saving state, iter:", str(epoch))
torch.save(model.state_dict(), f'logs/Epoch{epoch}-acc{acc}.pth')
- 加载模型 / 用于预训练
model = ResNet18().to(device)
# ----------------------------#
model_path = r"logs/Epoch2-acc0.6816.pth"
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
state_dict = torch.load(model_path, map_location=device)
model.load_state_dict(state_dict, strict=True)
- 使用部分预训练权重(5月21号更新)
# -----使用部分预训练权重------------------#
model_path = r"logs/Epoch2-acc0.6831.pth"
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path, map_location=device)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # 上面不能用时用下面这个
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')
# ---------------------------------------#
---------------------------------------------
Data type
type check
a = torch.randn(2, 3) # 随机生成2行3列的矩阵
print(a.shape)
print(a.size(1)) # 返回shape的第2个元素
print(a.shape[1]) # 3
# cpu上
print(a.type()) # torch.FloatTensor
print(type(a))
print(isinstance(a, torch.FloatTensor))
# Gpu上
data = a.cuda()
print(isinstance(data, torch.cuda.FloatTensor))
"""
在pytorch0.3的版本中dimention为0的tensor是不存在的,如果表达是标量返回[0.3]
在之后的版本中,标量返回0.3 (为了语义更加清晰,使用长度为0的标量)
区分dim/size/shape/tensor
[2, 2]
dim: 2 rank
size/shape: [2, 2]
tensor: 具体数字 [1, 3 ]
[2, 4]
"""
Dimension 0/1/2
# Dim=0,用于loss
a = torch.tensor(2.2)
print(a.shape) # torch.Size([])
print(len(a.shape)) # 0
print(a.size()) # torch.Size([])
# Dim=1,用于Bias/Linear input
b = torch.tensor([2]) # 直接这样写,里面的数据类型跟着里面数据变化
print(b)
print(b.type())
c = torch.tensor([1.1, 2.2])
print(c)
print(c.type())
d = torch.FloatTensor(2)
print(d)
e = torch.IntTensor([2.2])
print(e)
data = np.ones(3)
print(data)
f = torch.from_numpy(data) # 将numpy转换成tensor
print(f)
# Dim=2,Linear input/batch
g = torch.randn(2, 3) # 随机正太分布
print(g)
print(g.shape)
print(g.size())
print(g.size(0))
print(g.size(1))
print(g.shape[1])
# Dim=3 RNN input/Batch
h = torch.rand(3, 2, 3) # 随机均匀分布
print(h)
print(h.shape)
print(h[0])
print(h[1])
print(list(h.shape))
# Dim=4 CNN:[b,c,h,w]
# 下面解释为2张照片,每张照片通道数为3,长宽为28×28
i = torch.rand(2, 3, 28, 28) # 照片数 通道数(彩色图片为3) 图片长 图片宽
print(i)
创建Tensor
import from numpy
import torch
import numpy as np
# 从numpy中导入
a = np.array([2, 3.3])
data = torch.from_numpy(a)
print(data)
b = np.ones([3, 4])
dd = torch.from_numpy(b)
print(dd)
import from list
# 从list中导入
# 大写的Tensor():与FloatTensor类似,接受shape作为参数,小写的接受现有的数据
c = torch.tensor([2., 3.2])
d = torch.FloatTensor([2., 3.2]) # 也可接受现有数据,但是数据必须用一个list来表示。如果接受shape:(2, 3)
e = torch.tensor([[2., 3.2], [1., 22.3]])
print(c)
print(d)
print(e)
uninitialized
# 生成未初始化数据:只是作为一个容器,后面会把数据写进来
# torch.empty() : 给shape
# torch.FloatTensor(d1, d2, d3)
# torch.IntTensor(d1, d2, d3)
f = torch.empty(2, 3)
print(f)
print(torch.Tensor(2, 3)) # 数据大小相差大,记得覆盖否则可能出现torch.not number或torch.infinity
print(torch.IntTensor(2, 3))
print(torch.FloatTensor(2, 3))
set default type
# set default type: torch中默认的类型是torch.FloatTensor
print(torch.tensor([1.2, 3]).type())
torch.set_default_tensor_type(torch.DoubleTensor)
print(torch.tensor([1.2, 3]).type())
rand/rand_like, randint
# rand/rand_like, randint
# rand : [0, 1] 均匀分布
# rand_like: [min, max) 最大值不包含在里面
# randint *_like
print(torch.rand(3, 3)) # 比较均匀的采样出来
a = torch.rand(3, 3)
print(torch.rand_like(a)) # rand_like接受的参数是一个tensor,相当于把a.shape读出来再送给rand函数
print(torch.randint(1, 10, [3, 3]))
randn
# randn: 正态分布
# N(0, 1) 用在bias比较多
# N(u, std)
print(torch.randn(3, 3))
# full函数生成长度为10都为0的list 反差从1到0慢慢减小
print(torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1)))
full
# full
print(torch.full([2, 3], 7))
print(torch.full([], 7)) # dim=0
print(torch.full([1], 7)) # dim=1
arange/range
print(torch.arange(0, 10)) # 不包括10
print(torch.arange(0, 10, 2))
linspace/logspace
print(torch.linspace(0, 10, steps=4)) # 等分的切,包括10
print(torch.logspace(0, 1, steps=10)) # 切10等分,每个取指数0**10~1**10
Ones/zeros/eye
# ones:生成全是0的,直接给出shape
# zeros:生成全是1的
# eye: 生成对角线全是1的,只接受1个参数或2个参数
print(torch.ones(3, 3))
print(torch.zeros(3, 3))
print(torch.eye(3, 4))
data = torch.ones(3, 3)
print(torch.ones_like(data))
randperm:随机打散
# randperm:随机打散
print(torch.randperm(10))
a = torch.rand(2, 3)
b = torch.rand(2, 2)
idx = torch.randperm(2)
print(idx)
print(a)
print(b)
print(a[idx]) # 达到协同shuffle的功能
print(b[idx])
索引与切片
indexing
a = torch.rand(4, 3, 28, 28)
print(a[0])
print(a[0].shape) # torch.Size([3, 28, 28]) :索引第一个维度 :取第0张图片
print(a[0, 0].shape) # torch.Size([28, 28]):第二个维度:第0张图片的第0个通道
print(a[0, 0, 2])
print(a[0, 0, 2, 4]) # tensor(0.9441) : 第0张图片第0个通道第二行第4列
select first/last N
# select first/last N
a = torch.rand(4, 3, 28, 28)
print(a.shape) # torch.Size([4, 3, 28, 28])
print(a[:2].shape) # torch.Size([2, 3, 28, 28])
print(a[:2, :1, :, :].shape) # torch.Size([2, 1, 28, 28])
print(a[:2, 1:, :, :].shape) # torch.Size([2, 2, 28, 28])
print(a[:2, -1:, :, :].shape) # torch.Size([2, 1, 28, 28])
select by steps
# select by steps
a = torch.rand(4, 3, 28, 28)
print(a[:, :, 0:28:2, 0:28:2].shape) # torch.Size([4, 3, 14, 14])
print(a[:, :, ::2, ::2].shape) # torch.Size([4, 3, 14, 14])
select by specific index
# select by specific index
a = torch.rand(4, 3, 28, 28)
print(a)
print(a.index_select(0, torch.tensor([0, 2])).shape) # 第1个参数的第0和第1个
print(a.index_select(2, torch.arange(20)).shape)
... 任意多的维度
# ... 任意多的维度
a = torch.rand(4, 3, 28, 28)
print(a[...].shape) # torch.Size([4, 3, 28, 28])
print(a[:, 1, ...].shape) # torch.Size([4, 28, 28])
print(a[..., :2].shape) # torch.Size([4, 3, 28, 2])
select by mask
# select by mask
x = torch.randn(3, 4)
y = torch.randn(3, 4)
print(x)
mask = x.ge(0.5) # >=0.5的位置为True
print(mask)
print(torch.masked_select(y, mask)) # 为True的位置选出来
select by flatten index
# select by flatten index
src = torch.IntTensor(3, 4)
print(src)
print(torch.take(src, torch.tensor([0, 2, 5])))
Tensor维度变换
- view # 将一个shape转换成例一个shape
- squeeze(减少维度)/unsqueeze(增加维度)
- transpose(单维变换)/t(转置)/repeat(多维变换)
- expand(改变理解方式)/repeat(实实在在增加数据 memory copied)
view: lost dim information
# view: lost dim information
a = torch.rand(4, 1, 28, 28)
print(a)
print(a.shape)
print(a.view(4, 28 * 28).shape)
print(a.view(4 * 28, 28).shape)
print(a.view(4*1, 28, 28).shape)
b = a.view(4, 784)
b.view(4, 28, 28, 1) # logic bug
# flexible but prone to corrupt, 维度不匹配
print(a.view(4, 783)) # RuntimeError: shape '[4, 783]' is invalid for input of size 3136
squeeze / unsqueeze
unsqueeze
"""
范围:
[-a.dim()-1, a.dim()+1]
[-5, 5)
"""
a = torch.rand(4, 1, 28, 28)
print(a.shape)
print(a.unsqueeze(0).shape)
print(a.unsqueeze(-1).shape)
print(a.unsqueeze(4).shape)
print(a.unsqueeze(-5).shape)
print(a.unsqueeze(5).shape) # IndexError: Dimension out of range (expected to be in range of [-5, 4], but got 5)
a = torch.tensor([1.2, 2.3])
print(a)
print(a.unsqueeze(-1))
print(a.unsqueeze(0))
# 案例:
b = torch.rand(32)
f = torch.rand(4, 32, 14, 14)
b = b.unsqueeze(1).unsqueeze(2).unsqueeze(0)
print(b.shape)
squeeze
# squeeze
b = torch.rand(1, 32, 1, 1)
print(b.squeeze()) # 能压缩的都压缩
print(b.squeeze(0).shape) # 压缩第0个元素
print(b.squeeze(-1).shape)
print(b.squeeze(1).shape) # 32不能压缩就不压缩
print(b.squeeze(-4).shape)
expand/repeat
# expand/repeat
# expand: broadcasting 改变理解方式
# repeat: memory copied 实实在在的增加数据
a = torch.rand(4, 32, 14, 14)
b = torch.rand(1, 32, 1, 1)
print(b)
print(b.expand(4, 32, 14, 14)) # torch.Size([4, 32, 14, 14])
print(b.expand(-1, 32, -1, -1).shape) # -1表示该维度不变
print(b.expand(-1, 32, -1, -4).shape) # 写-4变-4 RuntimeError: invalid shape dimension -128
# repeat:不建议使用
print(b.repeat(4, 32, 1, 1).shape) # 第二个拷贝32次
print(b.repeat(4, 1, 1, 1).shape)
print(b.repeat(4, 1, 32, 32).shape) #
t():转置 只适合2D tensor
# t():转置 只适合2D tensor
a = torch.randn(3, 4)
print(a)
print(a.t())
Transpose: 维度变换
# Transpose: 维度变换
a = torch.rand(4, 3, 32, 32)
print(a.shape)
"""
RuntimeError: view size is not compatible with input tensor's size and stride
(at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
"""
a1 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32) # 要加contigous
a2 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32).transpose(1, 3)
print(a1.shape)
print(a2.shape)
permute:可以直接排位置,可以使用任意多次的transpose来达到他的目的
# permute:可以直接排位置,可以使用任意多次的transpose来达到他的目的
a = torch.rand(4, 3, 28, 28)
print(a.transpose(1, 3).shape) # torch.Size([4, 28, 28, 3])
b = torch.rand(4, 3, 28, 32)
print(b.transpose(1, 3).shape) # torch.Size([4, 32, 28, 3])
print(b.transpose(1, 3).transpose(1, 3).shape) # torch.Size([4, 3, 28, 32])
print(b.permute(0, 2, 3, 1).shape) # torch.Size([4, 28, 32, 3])
Broadcast自动扩展
"""
expand
without copying data
insert 1 dim ahead
expand dims with size 1 to same size
feature maps:[4, 32, 14, 14]
bias:[32, 1, 1] => [1, 32, 1, 1] => [4, 32, 14, 14] bias的扩张
"""
broadcast
# situation 1
# [4, 32, 14, 14]
# [1, 32, 1, 1] => [4, 32, 14, 14]
# situation2
# [4, 32, 14, 14]
# [14, 14] => [1, 1, 14, 14] => [4, 32, 14, 14] # 可以先unsqueeze再expand
# situation3(不符合)
# [4, 32, 14, 14]
# [2, 32, 14, 14]
# a = torch.tensor([2, 32, 14, 14])
# # print(a)
# # print(a[:])
# a = torch.IntTensor(4, 3)
# b = torch.IntTensor(3)
# print(a)
# print(b)
"""
match from last dim
1. no dim
2. dim of size 1
"""
拼接与拆分
"""
Merge or split
合并:
cat
stack
分割:
split
chunk
"""
cat
# cat
a = torch.rand(4, 32, 8)
b = torch.rand(5, 32, 8)
print(torch.cat([a, b], dim=0).shape) # torch.Size([9, 32, 8])
a1 = torch.rand(4, 3, 32, 32)
a2 = torch.rand(4, 1, 32, 32)
print(torch.cat([a1, a2], dim=0).shape) # RuntimeError: invalid argument 0 其他维度要一致
print(torch.cat([a1, a2], dim=1).shape) # torch.Size([4, 4, 32, 32])
stack: create a new dim: 需求 维度完全一致
# stack: create a new dim: 需求 维度完全一致
a1 = torch.rand(4, 3, 16, 32)
a2 = torch.rand(4, 3, 16, 32)
print(torch.cat([a1, a2], dim=2).shape) # torch.Size([4, 3, 32, 32])
print(torch.stack([a1, a2], dim=2).shape) # torch.Size([4, 3, 2, 16, 32])
a = torch.rand(32, 8)
b = torch.rand(32, 8)
print(torch.stack([a, b], dim=0).shape) # torch.Size([2, 32, 8])
split: by len: 根据长度来分
# split: by len: 根据长度来分
b = torch.rand(32, 8)
a = torch.rand(32, 8)
# print(a.shape) # torch.Size([32, 8])
c = torch.stack([a, b], dim=0)
# print(c.shape) # torch.Size([2, 32, 8])
aa, bb = c.split([4, 4], dim=2)
print(aa.shape, bb.shape) # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
# aa, bb = c.split(2, dim=0) # ValueError: not enough values to unpack (expected 2, got 1)
print(c.shape) # torch.Size([2, 32, 8])
chunk: by num: 根据数量来分
# chunk: by num: 根据数量来分
aa, bb = c.chunk(2, dim=2) # torch.Size([2, 32, 8])
print(aa.shape, bb.shape) # torch.Size([1, 32, 8]) torch.Size([1, 32, 8])
数学运算
"""
Math operation
1. add/minus/multiply/divide
2. matmul
3. pow
4. sqrt/rsqrt
5. round
"""
基础部分
# 基础部分
a = torch.rand(3, 4)
b = torch.rand(4)
print(a)
print(b)
print(a + b) # b会被广播
# all()函数的功能: 如果张量tensor中所有元素都是True, 才返回True; 否则返回False
b = torch.tensor([1, 1, 1, 1])
print(torch.all(torch.eq(a-b, torch.sub(a, b))))
matmul
# matmul
# torch.mm
# only for 2d
# torch.matmul
# @
a = torch.tensor([[3., 3.],
[3., 3.]])
print(a)
b = torch.ones(2, 2)
print(b)
print(torch.mm(a, b)) # 只针对2d矩阵
print(torch.matmul(a, b))
print(a@b)
# 案例:
# ==2d的tensor运算
a = torch.rand(4, 784)
x = torch.rand(4, 784)
w = torch.rand(512, 784) # 分别为ch-out ch-in
print((x.@w.t()).shape) # torch.Size([4, 512]) ×时第一个元素为out,所以需要转置
print(torch.matmul(x, w.t()).shape) # torch.Size([4, 512])
# >2d的tensor运算
a = torch.rand(4, 3, 28, 64)
b = torch.rand(4, 3, 64, 32)
print(torch.matmul(a, b).shape) # torch.Size([4, 3, 28, 32])
b = torch.rand(4, 1, 64, 32)
print(torch.matmul(a, b).shape) # torch.Size([4, 3, 28, 32]), 这种情况会先使用broadcast,再使用矩阵相乘
power
# power
a = torch.full([2, 2], 3)
print(a.pow(2))
print(a**2)
aa = a**2
print(aa.sqrt())
print(aa.rsqrt())
print(aa**(0.5))
exp/log
# exp/log
a = torch.exp(torch.ones(2, 2))
print(a)
print(torch.log(a))
approximation
# approximation
a = torch.tensor(3.14)
print(a.floor(), a.ceil(), a.trunc(), a.frac()) # tensor(3.) tensor(4.) tensor(3.) tensor(0.1400)
# 往下取整 往上取整 截取,保留整数 截取,保留小数
a = torch.tensor(3.499)
print(a.round()) # tensor(3.) 四舍五入
a = torch.tensor(3.5)
print(a.round()) # tensor(4.)
clamp:裁剪
# clamp:裁剪
"""
gradient clipping
(min)
(min, max)
"""
grad = torch.rand(2, 3)*15
print(grad)
print(grad.max())
print(grad.median())
print(grad.clamp(10)) # 里面的元素小于10的全部变成10
print(grad.clamp(2, 10)) # 小于2的裁剪成2, 大于10的裁剪成10
Tensor统计
"""
statistics
norm:范数
mean sum
prod
max, min, argmin(最小值的位置), argmax
kthvalue(第几个值 默认是小的: 比如第8个小的), topk(top多少)
"""
norm:
# norm:
a = torch.full([8], 1)
b = a.view(2, 4)
c = a.view(2, 2, 2)
print(a)
print(b)
print(c)
print(a.norm(1), b.norm(1), c.norm(1)) # nsor(8.) tensor(8.) tensor(8.)
print(a.norm(2), b.norm(2), c.norm(2)) # tensor(2.8284) tensor(2.8284) tensor(2.8284)
print(b.norm(1, dim=1)) # dim=1:将dim=1的部分取范数,同时二维向量变成一维向量 tensor([4., 4.])
print(b.norm(2, dim=1)) # tensor([2., 2.])
print(c.norm(1, dim=0))
print(c.norm(2, dim=0))
mean sum min max prod(阶乘)
# mean sum min max prod(阶乘)
a = torch.arange(8).view(2, 4).float()
print(a)
"""
tensor([[0., 1., 2., 3.],
[4., 5., 6., 7.]])
"""
print(a.min(), a.max(), a.mean(), a.prod()) # tensor(0.) tensor(7.) tensor(3.5000) tensor(0.)
print(a.sum()) # tensor(28.)
print(a.argmin(), a.argmax()) # tensor(0) tensor(7)
argmin/argmax在指定维度的表示
# argmin/argmax在指定维度的表示
a = torch.rand(4, 5)
print(a)
print(a.argmax())
print(a.argmax(dim=1)) # 在dim=1即取每个维度中最大值的位置
keepdim
# keepdim
a = torch.rand(4, 10)
print(a)
# print(a.max(dim=1))
print(a.argmax(dim=1))
print(a.max(dim=1, keepdim=True)) # 这个会返回他在dim=1的最大值和最大值的位置
top-k or k-th
# top-k or k-th
a = torch.rand(4, 10)
print(a.topk(3, dim=1))
print(a.topk(3, dim=1, largest=False))
print(a.kthvalue(8, dim=1)) # 返回dim=1的第8大的值
"""
torch.return_types.kthvalue(
values=tensor([0.7363, 0.8011, 0.6856, 0.6297]),
indices=tensor([4, 0, 7, 8]))
"""
compare
# compare
"""
> >= < <= != ==
torch.eq(a, b)
"""
a = torch.rand(4, 10)
print(a > 5) # 里面的每个元素都要比较
print(torch.gt(a, 0))
print(a != 0)
a = torch.ones(2, 3)
b = torch.randn(2, 3)
"""
疑问: torch.rand()和torch.randn()的区分?
答:rand()是均匀分布,randn()是标准正太分布
"""
print(a)
print(b)
print(torch.eq(a,b))
print(torch.eq(a, a)) # 返回每个元素
"""
tensor([[True, True, True],
[True, True, True]])
"""
print(torch.equal(a, a)) # True 所有都为True才为True
Tensor高阶
"""
tensor 高级操作
where
gather: 收集,gather语句类似于查表的过程. 设计目的:使用GPU实现CPU的功能
"""
where
# where
# torch.where(condition,x,y) --> Tensor
# 案例:
cond = torch.tensor([[0.6769, 0.7271],
[0.8884, 0.4163]])
print(cond)
a = torch.zeros(2, 2)
print(a)
b = torch.ones(2, 2)
print(b)
print(torch.where(cond > 0.5, a, b)) # 如果cond成立,选取a中的元素,否则选择b中的元素
gather
# 案例:检索 retrieve label
prob = torch.randn(4, 10)
# print(prob)
idx = prob.topk(dim=1, k=3)
# print(idx)
idx = idx[1]
# print(idx)
label = torch.arange(10) + 100
# print(label)
label_expand = label.expand(4, 10)
print(label_expand)
print(idx) # 这是索引
print('------------------')
# print(idx.long()) # 转换成Longtensor数据格式
print(torch.gather(label_expand, dim=1, index=idx.long())) # 按照index索引进行取数据
梯度
"""
1. len: 可以表示变化的层度
2. dir:表示变化的方向
"""
激活函数
Sigmoid / Logistic梯度推导
# 激活函数
z = torch.linspace(-100, 100, 10)
# sigmoid激活函数
print(z)
print(torch.sigmoid(z)) # 范围在0-1
Tanh
# tanh激活函数: 在rnn中用的比较多 取值范围为-1-1
a = torch.linspace(-1, 1, 10)
print(torch.tanh(a))
Relu
# Relu激活函数
# 在pytorch中的两种实现:1.从torch.nn中 2. 从torch.relu中
from torch.nn import functional as F
a = torch.linspace(-1, 1, 10)
print(torch.relu(a))
print(F.relu(a))
LOSS及其梯度
"""
1. Mean Squared Error
2. Cross Entropy Loss
1. binary
2. multi-class
"""
MSE
一:autograd.grad
# 一:autograd.grad
# Mean Squared Error
# 这里注意MSE于2范数相比,2范数有开根号但是这里没有开根号
# 使用pytorch进行简单的求导
# 这里pred = w * x + b
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w) # 第一个参数pred 第二个参数label
print(torch.autograd.grad(mse, [w])) # 第一个参数loss 第二个参数w1, w2, w3
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn?
w函数在初始化的时候没有设置他需要导数信息,pytorch在建图的时候标注torch不需要求导信息
"""
# 改变如下:告诉pytorch w需要梯度信息
w.requires_grad_()
print(torch.autograd.grad(mse, [w]))
"""
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
更新之后还是会报错,因为pytorch是一个动态图
这里更新了w但是图还没有更新
因为pytorch是做一步计算一次图
"""
# 必须经过计算图的过程重新更新一遍
mse = F.mse_loss(torch.ones(1), x*w) # 动态图的建图
print(torch.autograd.grad(mse, [w])) # (tensor([2.]),) 图重新更新后可以计算出结果
print(mse)
二:loss.backward
# 二:loss.backward
from torch.nn import functional as F
x = torch.ones(1)
w = torch.full([1], 2)
mse = F.mse_loss(torch.ones(1), x*w)
# torch.autograd.grad(mse, [w])
w.requires_grad_() # 使w获取梯度
mse = F.mse_loss(torch.ones(1), x*w) # 再次计算获取动态图
# torch.autograd.grad(mse, [w]) # 1. 自动计算 再次计算梯度
mse.backward() # 2. 手动计算tensor([2.])
print(w.grad)
总结
"""
Gradient API
1. 手动求导torch.autograd.grad(loss, [w1, w2, ...])
[w1 grad, w2 grad...]
2. 自动求导loss.backward() # 他返回的梯度信息不会返回而是附在每个梯度信息上面
w1.grad
w2.grad
"""
Softmax
"""
softmax求导:
pi(1-pj) if i=j
-pj*pi if i!=j
1 if i=j
0 if i!=j
"""
import torch
from torch.nn import functional as F
a = torch.rand(3) # tensor([0.4207, 0.2955, 0.8440])
print(a.requires_grad_()) # 这样之后就可以求梯度 tensor([0.5424, 0.1913, 0.9416], requires_grad=True)
p = F.softmax(a, dim=0) # 自动完成建图操作 tensor([0.2489, 0.3556, 0.3954], grad_fn=<SoftmaxBackward>)
# 当你调用一次backward之后除了完成一次反向传播以外,还会把这个图的梯度信息清除掉
print(torch.autograd.grad(p[1], [a],retain_graph=True)) # (tensor([-0.0755, 0.1879, -0.1125]),) i=1为正的其他的为负的
# #
# #
print(torch.autograd.grad(p[2], [a])) # (tensor([-0.1349, -0.1125, 0.2473]),) # i=2为正的其他的为负的
感知机
单一输出感知机
import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(1, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t()) # 这里没有写bias
print(o)
print(torch.ones(1, 1))
loss = F.mse_loss(torch.ones(1, 1), o)
print(loss)
loss.backward()
print(w.grad)
多输出感知机
import torch
from torch.nn import functional as F
x = torch.randn(1, 10)
w = torch.randn(2, 10, requires_grad=True)
print(x)
print(w)
o = torch.sigmoid(x@w.t())
loss = F.mse_loss(torch.ones(1, 2), o)
loss.backward()
print(w.grad)
链式法则
import torch
x = torch.tensor(1.)
w1 = torch.tensor(2., requires_grad=True)
b1 = torch.tensor(1.)
w2 = torch.tensor(2., requires_grad=True)
b2 = torch.tensor(1.)
y1 = x*w1 + b1
y2 = y1*w2 + b2
dy2_dy1 = torch.autograd.grad(y2, [y1], retain_graph=True)[0]
dy1_dw1 = torch.autograd.grad(y1, [w1], retain_graph=True)[0]
dy2_dw1 = torch.autograd.grad(y2, w1, retain_graph=True)[0] # 这里的w1加不加[]都行??
print(dy2_dy1*dy1_dw1)
print(dy2_dw1)
优化实例
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
import torch
def himmelblau(x):
return (x[0] ** 2 + x[1] - 11) ** 2 + (x[0] + x[1] ** 2 - 7) ** 2
# 画图
x = np.arange(-6, 6, 0.1)
y = np.arange(-6, 6, 0.1)
print('x,y range:', x.shape, y.shape)
X, Y = np.meshgrid(x, y) # 将x这个图片和y这个图片拼接到一起
print('X,Y maps:', X.shape, Y.shape)
Z = himmelblau([X, Y])
fig = plt.figure('himmelblau')
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z) # 把x, y的坐标送入Z函数里面得到z的坐标
ax.view_init(60, -30)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.show()
# 找最小值--初始点不同找的也不同
# [1., 0.], [-4, 0.], [4, 0.]
x = torch.tensor([4., 0.], requires_grad=True) # 在这里不同的初始化权重更新的速率和最后得到的结果都不太同。所以说梯度下降法的初始化很关键
optimizer = torch.optim.Adam([x], lr=1e-3)
for step in range(20000):
pred = himmelblau(x) # x送进来得到预测值,目的是min这个预测值
optimizer.zero_grad() # 将梯度信息进行清零
pred.backward() # 生成x.grad和y.grad即:x和y的梯度信息
optimizer.step() # 将x,y的梯度进行更新
if step % 2000 == 0:
print('step {}: x = {}, f(x) = {}'
.format(step, x.tolist(), pred.item()))
Logistic Regression
Cross Entropy
熵
import torch
a = torch.full([4], 1/4)
print(a)
print(a*torch.log2(a))
print(-(a*torch.log2(a)).sum()) # tensor(2.) 熵越高代表越稳定,没有惊喜度
b = torch.tensor([0.1, 0.1, 0.1, 0.7])
print(-(b*torch.log2(b)).sum()) # tensor(1.3568) higher uncertainty 惊喜度较高
c = torch.tensor([0.001, 0.001, 0.001, 0.999])
print(-(c*torch.log2(c)).sum()) # tensor(0.0313) 极度不稳定
numerical stability
import torch
from torch.nn import functional as F
x = torch.randn(1, 784)
w = torch.randn(10, 784)
logits = x@w.t()
print(logits.shape)
pred = F.softmax(logits, dim=1)
print(pred)
pred_log = torch.log(pred)
loss1 = F.nll_loss(pred_log, torch.tensor([3]))
print(loss1)
loss2 = F.cross_entropy(logits, torch.tensor([3])) # 这里使用logits, 因为cross_entropy = softmax + log + nll_loss (这三个操作一起)
print(loss2)
全连接层
import torch
import torch.nn as nn
x = torch.randn(1, 784) # torch.Size([1, 784])
print(x.shape)
layer1 = nn.Linear(784, 200) # 在这里第一个参数使ch-in 第二个参数是ch-out
layer2 = nn.Linear(200, 200)
layer3 = nn.Linear(200, 10)
x = layer1(x)
print(x.shape) # torch.Size([1, 200])
x = layer2(x)
print(x.shape) # torch.Size([1, 200])
x = layer3(x)
print(x.shape) # torch.Size([1, 10])
print(x)
nn.Relu vs F.relu
import torch
import torch.nn as nn
from torch.nn import functional as F
x = torch.randn(1, 10)
print(x.shape)
# 调用方式:类方法
class ML(nn.module):
def __init__(self):
super(ML, self).__init__()
self.model = nn.Sequential( # 构建模型
nn.Linear(784, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 200),
nn.ReLU(inplace=True),
nn.Linear(200, 10),
nn.ReLU(inplace=True),
)
# 函数方法
x = F.relu(x, inplace=True)
GPU加速
device = torch.device('cuda:0') # 使用设备, 可以选择将需要运算的搬到你需要的设备。
# 将需要加速的运算送进GPU
criteon = nn.CrossEntropyLoss().to(device) # 使用.to()方法会返回个inference,他的类型取决于原来的类型
计算准确的代码
"""
计算准确度的代码
"""
import torch
from torch.nn import functional as F
logits = torch.rand(4, 10)
pred = F.softmax(logits, dim=1)
print(pred)
pred_label = pred.argmax(dim=1) # 取最大值的下标
print(pred_label)
label = torch.tensor([9, 3, 2, 9])
correct = torch.eq(pred_label, label)
print(correct)
print(correct.sum().float().item()/4) # item()作用是得到里面的元素
Visdom可视化
"""
pytorch可视化需要:
方法一:
pip install tensorboardX
1. 需要开启一个监听的进程
方法二:Visdom
1. pip install visdom
2. python -m visdom.server (相当于开启了一个web服务器,web服务器会把数据渲染到网页上去)
可能会遇到的问题: ERROR:root:Error 404 while downloading https://unpkg.com/layout-bin-packer@1.4.0
解决方法: install form source(从github的facebookresearch/visdom下载)
步骤1: pip uninstall visdom
步骤2: 官网下载源代码,之后cd进去目录(进去visdom-master),之后运行pip install -e .
步骤3: 退回用户目录后再python -m visdom.server
步骤4:打开浏览器,输入他给的地址
"""
# 测试:
from visdom import Visdom
viz = Visdom()
"""
{Y的值,X的值} win可以理解为ID(还有一个id叫做env(默认使用main)) opts是额外的配置信息
对于非image还是numpy数据,image数据是tensor
"""
# viz.line([0.], [0.], win='train_loss', opts=dict(title='train loss'))
# viz.line([loss.item()], [global_step], win='train_loss', update='append')
在训练中
global_step += 1
viz.line([loss.item()], [global_step], win='train_loss', update='append')
在test中
# viz进行可视化
viz.line([[test_loss, correct / len(test_loader.dataset)]],
[global_step], win='test', update='append')
viz.images(data.view(-1, 1, 28, 28), win='x')
viz.text(str(pred.detach().cpu().numpy()), win='pred',
opts=dict(title='pred'))
正则化
optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.01) # 这里weight_decay=0.01是指进行正则化,这里是2范数
Dropout
import torch
net_droped = torch.nn.Sequential(
torch.nn.Linear(784, 200),
torch.nn.Dropout(0.5), # drop 50% of the neuron (在两层之间断掉一些层)
torch.nn.ReLU(),
torch.nn.Linear(200, 200),
torch.nn.Dropout(0.5), # drop 50% of the neuron
torch.nn.ReLU(),
torch.nn.Linear(200, 10),
)
"""
在训练是需要加上Dropout()
但是在test/val是不需要Dropout()
例如:
for epoch in range(epochs):
# train
net_dropped.train()
for batch_idx, (data, targt) in enumerate(train_loader):
...
net_dropped.eval() # 在测试是需要加上这句话去掉dropout
test_loss = 0
correct = 0
for data, target in test_loader:
"""
卷积神经网络
import torch.nn as nn
import torch
from torch.nn import functional as F
# 第一个参数为input的chanel,第二个参数为kernel的数量,kernel_size=3*3 [1, 3, 26, 26]
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=0)
x = torch.rand(1, 1, 28, 28)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 26, 26]) # 26 = (28-3)/1 + 1
layer = nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 28, 28])
layer = nn.Conv2d(1, 3, kernel_size=3, stride=2, padding=1)
out = layer.forward(x)
print(out.shape) # torch.Size([1, 3, 14, 14])
out = layer(x) # 会自动进行,运用了python的魔术方法 __call__
print(out.shape) # torch.Size([1, 3, 14, 14])
print(layer.weight) # 查看layer的权重
print(layer.weight.shape) # torch.Size([3, 1, 3, 3])
print(layer.bias.shape) # torch.Size([3])
# F.conv2D
# 上面x = torch.rand(1, 1, 28, 28)
w = torch.rand(16, 3, 5, 5)
b = torch.rand(16)
# out = F.conv2d(x, w, b, stride=1, padding=1)
# print(out) # 报错,一位x和w的chanels数对应不上
"""
RuntimeError: Given groups=1, weight of size 16 3 5 5, expected input[1, 1, 28, 28] to have 3 channels,
but got 1 channels instead
"""
x = torch.randn(1, 3, 28, 28)
out = F.conv2d(x, w, b, stride=1, padding=1)
print(out.shape) # torch.Size([1, 16, 26, 26])
out = F.conv2d(x, w, b, stride=2, padding=2)
print(out.shape) # torch.Size([1, 16, 14, 14])
池化层
"""
outline:
Pooling
upsample
Relu
"""
import torch
import torch.nn as nn
from torch.nn import functional as F
x = torch.randn(1, 16, 14, 14)
print(x.shape) # torch.Size([1, 16, 14, 14])
# 从nn中导入最大池化
layer = nn.MaxPool2d(2, stride=2)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7]) (14-2)/2 + 1 = 7
# 使用F.的方式平均池化
out = F.avg_pool2d(x, 2, stride=2) # torch.Size([1, 16, 7, 7])
print(out.shape)
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++#
# upsample
# 采用F.interpolate
# interpolate: 是插值的意思
# +++++++++++++++++++++++++++++++++++++++++++++++++++++#
x = out
out = F.interpolate(x, scale_factor=2, mode='nearest') # 采用最近邻采样
print(out.shape) # torch.Size([1, 16, 14, 14])
out = F.interpolate(x, scale_factor=3, mode='nearest')
print(out.shape) # torch.Size([1, 16, 21, 21])
#------------------------------------------------#
# Relu激活函数
#
# ------------------------------------------------#
x = torch.randn(1, 16, 7, 7)
print(x.shape) # torch.Size([1, 16, 7, 7])
# 方法1:采用nn.的方式
layer = nn.ReLU(inplace=True) # inplace=True x--->x'(x'使用x内存空间)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
# 方法2:采用F.的方式
out = F.relu(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
BatchNorm
import torch
import torch.nn as nn
# ----------------------------#
# BatchNorm1d
# ----------------------------#
x = torch.randn(100, 16) + 0.5
print(x.shape)
layer = torch.nn.BatchNorm1d(16) # 这个必须与前面的匹配起来否则会报错
print(layer.running_mean, layer.running_var)
"""
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
"""
out = layer(x)
print(layer.running_mean, layer.running_var)
"""
tensor([0.0452, 0.0446, 0.0516, 0.0671, 0.0644, 0.0622, 0.0514, 0.0449, 0.0520,
0.0546, 0.0461, 0.0620, 0.0332, 0.0450, 0.0384, 0.0580])
tensor([0.9868, 0.9935, 1.0214, 1.0137, 1.0009, 0.9895, 1.0065, 1.0319, 0.9841,
1.0051, 0.9967, 0.9968, 1.0045, 0.9877, 1.0011, 1.0031])
"""
#----------------------------------------#
# 这里的分布服从于 U(0.5, 1)
#
# ---------------------------------------#
x = torch.randn(100, 16) + 0.5
layer = torch.nn.BatchNorm1d(16)
for i in range(5): # 疑问????????,每循环一次经过一次batchnorm
out = layer(x)
print(layer.running_mean, layer.running_var)
# ---------------------------#
# nn.BatchNorm2d
# ---------------------------#
x = torch.rand(1, 16, 7, 7)
print(x.shape)
layer = nn.BatchNorm2d(16)
out = layer(x)
print(out.shape) # torch.Size([1, 16, 7, 7])
print(layer.weight)
"""
这里的weight,bias更权重的那个不太一样
"""
print(layer.weight.shape) # torch.Size([16])
print(layer.bias.shape) # torch.Size([16])
# -----------------------------------#
# class variables
# -----------------------------------#
print(vars(layer))
# ------------------------------------#
# Test
# ------------------------------------#
layer.eval() # 加这行表示现在是在test阶段
out = layer(x)
print(vars(layer))
nn.Module
import torch
from torch import nn
from torch import optim
# -----------------------------------#
# 使用nn.Module的好处
# 1. 所有的常用的方法都在里面,比如: Linear/Relu/Sigmoid等
# 2. 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
# 3. nn.Module可以自动管理parameters
# 4. modules: all nodes / children: direct children
# 5. to(device) (第84行)
# 6. save and load(第90行)
# 7. train/test的方便的切换(第87行)
# 8. implement own layer 实现自己的类(第31 / 第41 行) 只有class才能写到nn.Sequential里面去[第48行]
# -----------------------------------#
class MyLinear(nn.Module):
def __init__(self, inp, outp):
super(MyLinear, self).__init__()
# requires_grad = True
self.w = nn.Parameter(torch.randn(outp, inp)) # nn.Parameter会自动地将torch.tensor通过nn.Parameter加到nn.parameter()里面去
self.b = nn.Parameter(torch.randn(outp))
def forward(self, x):
x = x @ self.w.t() + self.b
return x
class Flatten(nn.Module): # 将所有的打平
def __init__(self):
super(Flatten, self).__init__()
def forward(self, input):
return input.view(input.size(0), -1) # -1表示将其他所有的打平
class TestNet(nn.Module):
def __init__(self):
super(TestNet, self).__init__()
self.net = nn.Sequential(nn.Conv2d(1, 16, stride=1, padding=1),
nn.MaxPool2d(2, 2),
Flatten(), # 实现自己的类,里面只能写类
nn.Linear(1*14*14, 10))
def forward(self, x):
return self.net(x)
class BasicNet(nn.Module):
def __init__(self):
super(BasicNet, self).__init__()
self.net = nn.Linear(4, 3)
def forward(self, x):
return self.net(x)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 使用nn.Sequential()容器[sequential是串行的意思], 不管是nn.Module中的还是自己写的都可以在这里使用
self.net = nn.Sequential(BasicNet(),
nn.ReLU(),
nn.Linear(3, 2))
def forward(self, x):
return self.net(x)
def main():
device = torch.device('cuda')
net = Net()
net.to(device) # .to()会返回net引用(和原来的net引用一样) --->但是对于tensor操作来说不是这样的
# train
net.train()
# test
net.eval()
# net.load_state_dict(torch.load('ckpt.mdl')) # 在开始的时候要加载模型
#
#
# torch.save(net.state_dict(), 'ckpt.mdl') # 在模型断电或者中断保存模型的当前状态
for name, t in net.named_parameters():
print('parameters:', name, t.shape) # 打印里面地parameters:权重和bias
for name, m in net.named_children(): # 打印net Sequential的类
print('children:', name, m)
for name, m in net.named_modules():
print('modules:', name, m)
if __name__ == '__main__':
main()
数据增强
# Data argumentation
# ---------------------------------------#
# 这些操作在torchvision包里面
# 1. Flip:翻转
# 2. Rotate
# 3. Random Move & Crop
# 4. GAN : 生成更多的样本
# 5. Noise: N(0, 0.001)加高斯白噪声
# ---------------------------------------#
batch_size=200
learning_rate=0.01
epochs=10
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([ # Compose的操作类似于nn.Sequential里面
transforms.RandomHorizontalFlip(), # 水平角度的翻转 (随机翻转-可能翻转也有可能不翻转)
transforms.RandomVerticalFlip(), # 垂直方向
transforms.RandomRotation(15), # 旋转方向,参数为旋转的度数
transforms.RandomRotation([90, 180, 270]), # 随机的从90度180度270度中挑一个角度旋转
transforms.Resize([32, 32]), # 传入的参数为list
transforms.RandomCrop([28, 28]), # 裁剪
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])), # x 转换成x'
batch_size=batch_size, shuffle=True)
Cifar-10与ResNet18实战
resnet.py
import torch
from torch import nn
from torch.nn import functional as F # 这里F和nn经常是交叉使用的
class ResBlk(nn.Module):
"""
resnet block:这里是resnet的一个基本模块
"""
def __init__(self, ch_in, ch_out, stride=1):
"""
:param ch_in:
:param ch_out:
"""
super(ResBlk, self).__init__()
# we add stride support for resbok, which is distinct from tutorials.
self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
self.bn1 = nn.BatchNorm2d(ch_out)
self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
self.extra = nn.Sequential() # nn.Sequential()本来是空的
if ch_out != ch_in: # 如果不相等就把他的ch_in变成ch_out, 也就是说:他这个是resnet的旁边短接线
# [b, ch_in, h, w] => [b, ch_out, h, w]
self.extra = nn.Sequential(
nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
nn.BatchNorm2d(ch_out)
)
# -------------------------------#
# 疑问: python在实例化的时候为啥不用调用forward函数?
# 因为pytorch在nn.modules中使用了__call__,里面实现了forward方法
# 只要实例化对象就会自动调用__call__,当自己又没有__call__方法,所以调用父类方法,由于子类重写了forward方法
# 所以优先调用子类的forward方法
# -------------------------------#
def forward(self, x):
"""
:param x: [b, ch, h, w]
:return:
"""
out = F.relu(self.bn1(self.conv1(x))) # 这里经过卷积层,BN层, 然后经过relu层
out = self.bn2(self.conv2(out)) # 这里经过卷积层,BN层
# short cut. # 这里是短接
# extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
out = self.extra(x) + out # element-wise add:
out = F.relu(out) # 最后再经过relu层输出
print('这里打印下out看看', out.shape)
return out
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=3, padding=0),
nn.BatchNorm2d(64)
)
# followed 4 blocks
# [b, 64, h, w] => [b, 128, h ,w] # 注意这里h,w是变化的
self.blk1 = ResBlk(64, 128, stride=2)
# [b, 128, h, w] => [b, 256, h, w]
self.blk2 = ResBlk(128, 256, stride=2)
# # [b, 256, h, w] => [b, 512, h, w]
self.blk3 = ResBlk(256, 512, stride=2)
# # [b, 512, h, w] => [b, 1024, h, w]
self.blk4 = ResBlk(512, 512, stride=2) # 这里视频是self.blk4 = ResBlk(512, 1024)
self.outlayer = nn.Linear(512*1*1, 10) # 最后再跟一个全连接层
def forward(self, x):
"""
:param x:
:return:
"""
x = F.relu(self.conv1(x)) # 先经过一个卷积层,后面再跟一个relu函数, 经过后x.shape = [128, 64, 10, 10]
# [b, 64, h, w] => [b, 1024, h, w]
x = self.blk1(x) # 经过这层后x.shape = torch.Size([128, 128, 5, 5])
x = self.blk2(x) # 经过这层后x.shape = torch.Size([128, 256, 3, 3])
x = self.blk3(x) # 经过这层后x.shape = torch.Size([128, 512, 2, 2])
x = self.blk4(x) # 经过这层后x.shape = torch.Size([128, 512, 2, 2])
# print('after conv:', x.shape) #[b, 512, 2, 2]
# [b, 512, h, w] => [b, 512, 1, 1]
x = F.adaptive_avg_pool2d(x, [1, 1])
# print('after pool:', x.shape)
x = x.view(x.size(0), -1) # 经过这层后x.shape = torch.Size([128, 512]) x.size(0) = 128
x = self.outlayer(x) # 经过一个全连接层 经过这层后x.shape = torch.Size([128, 10])
return x
def main():
# ResBlk
blk = ResBlk(64, 128, stride=4)
tmp = torch.randn(2, 64, 32, 32)
out = blk(tmp)
print('block:', out.shape) # block: torch.Size([2, 128, 8, 8])
# ResNet18
x = torch.randn(2, 3, 32, 32)
model = ResNet18()
out = model(x)
print('resnet:', out.shape) # resnet: torch.Size([2, 10])
if __name__ == '__main__':
main()
# ---------------ResNet18模型----------------------------#
"""
ResNet18(
(conv1): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(3, 3))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(blk1): ResBlk(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk2): ResBlk(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk3): ResBlk(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(blk4): ResBlk(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(extra): Sequential()
)
(outlayer): Linear(in_features=512, out_features=10, bias=True)
)
"""
# ----------------------------------------------------------------------#
main.py
import torch
from torch.utils.data import DataLoader # DataLoader是为了能够批量加载数据
from torchvision import datasets # 从torchvision中导入数据集
from torchvision import transforms
from torch import nn, optim
from lenet5 import Lenet5
from resnet import ResNet18
def main():
batchsz = 128 # 这里是batch-size
# torchvision中提供一些已有的数据集 # 第一个参数:自定目录,第二个参数:Train=True, transform:对数据做些变换
cifar_train = datasets.CIFAR10('cifar', True, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False) # download=True:可以自动的download
cifar_train = DataLoader(cifar_train, batch_size=batchsz, shuffle=True) # Dataloader:方便一次加载多个. shuffle:加载的时候随机换一下
cifar_test = datasets.CIFAR10('cifar', False, transform=transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
]), download=False)
cifar_test = DataLoader(cifar_test, batch_size=batchsz, shuffle=True)
x, label = iter(cifar_train).next()
print('x:', x.shape, 'label:', label.shape) # x: torch.Size([128, 3, 32, 32]) label: torch.Size([128])
device = torch.device('cuda') # 后面可以使用GPU计算
# model = Lenet5().to(device)
model = ResNet18().to(device)
criteon = nn.CrossEntropyLoss().to(device) # loss函数他包含softmax, 因为是分类任务所以采用crossentropy
optimizer = optim.Adam(model.parameters(), lr=1e-3) # 优化器把网络里的参数传给他
print(model)
for epoch in range(1000):
model.train() # 模型为train模式
for batchidx, (x, label) in enumerate(cifar_train): # 从每个epoch里的batch_size
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device) # 转换到cuda上面来
logits = model(x) # 他与predict的区别是是否经过softmax操作
# logits: [b, 10]
# label: [b] # label不需要probality
# loss: tensor scalar # 长度为0的标量
loss = criteon(logits, label) # 这个label是y
# backprop
optimizer.zero_grad() # 如果不清0就是累加的效果
loss.backward()
optimizer.step() # 更新weight,更新的weight写进optimizer里面
print(epoch, 'loss:', loss.item()) # 对于标量,使用item()把他转换成Numpy
# test
model.eval() # 模型为test模式
with torch.no_grad(): # 这一步是告诉不需要构建梯度(不需要构建图)
# test
total_correct = 0 # 正确的数量
total_num = 0 # 总的数量
for x, label in cifar_test:
# [b, 3, 32, 32]
# [b]
x, label = x.to(device), label.to(device)
# [b, 10]
logits = model(x)
# [b]
pred = logits.argmax(dim=1)
# [b] vs [b] => scalar tensor
correct = torch.eq(pred, label).float().sum().item()
total_correct += correct
total_num += x.size(0)
# print(correct)
acc = total_correct / total_num
print(epoch, 'test acc:', acc)
if __name__ == '__main__':
main()
数据集格式
-- cifar
--cifar-10-batches-py
--batches.meta
--data_batch_1
--data_batch_2
--data_batch_3
--data_batch_4
--data_batch_5
--readme.html
--test_batch