1. 激活函数

# 激活函数
z1 = torch.linspace(-100,100,10)
print(z1)
# tensor([-100.0000, -77.7778, -55.5556, -33.3333, -11.1111, 11.1111, 33.3333, 55.5556, 77.7778, 100.0000])
print(torch.sigmoid(z1))
# tensor([0.0000e+00, 1.6655e-34, 7.4564e-25, 3.3382e-15, 1.4945e-05, 9.9999e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00])
z2 = torch.linspace(-1,1,10)
print(torch.tanh(z2))
# tensor([-0.7616, -0.6514, -0.5047, -0.3215, -0.1107, 0.1107, 0.3215, 0.5047, 0.6514, 0.7616])
print(torch.relu(z2))
# tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1111, 0.3333, 0.5556, 0.7778, 1.0000])
2. loss及其梯度
2.1 均方差MSE(Mean Squared Error)
-
用于回归, 输出是连续的实数值
-
(loss = sum{|y - (xw + b)|^2}) ((hat{y}=xw+b))
-
(loss) = (norm)((y - (xw + b))^2)
- (L2 - norm = ||y - (xw + b)||_2)
均方损失函数 torch.nn.mse_loss(pred, label)
import torch
from torch.nn import functional as F
# y = x*w (b取0)
x = torch.ones(1)
w = torch.full([1], 2)
# 计算均方差
# 法一
mse2 = F.mse_loss(x*w, torch.ones(1))
print(mse2) # tensor(1.)
# 法二
mse1 = torch.norm(torch.ones(1)-x*w, 2)**2
print(mse1) # tensor(1.)
2.2 Cross Entropy Loss
-
用于分类,输出是条件概率分布
-
binary
-
multi-class
-
+softmax
-
Leave it to Logistic Regression Part
2.3 梯度计算
-
torch.autograd.grad(loss, [w1, w2,...])
-
第一个参数是损失函数,第二个参数是 该损失函数 要求梯度的参数列表
-
对w求导,需要先指明w需要梯度信息
w.requires_grad_()
-
返回结果
grad_val
是梯度列表,列表记录了每一个Tensor的grad信息
-
[[frac{alpha Loss}{alpha w1}, frac{alpha Loss}{alpha w2},...]
]
import torch
from torch.nn import functional as F
# y = x*w (b取0)
x = torch.ones(1)
w = torch.full([1], 2)
# 对w求导,需要先指明 w 需要梯度信息
w.requires_grad_()
print(w) # tensor([2.], requires_grad=True)
mse = F.mse_loss(x*w, torch.ones(1))
print(torch.autograd.grad(mse, [w])) # (tensor([2.]),)
-
loss.backward()
-
图的反向传播,自动求出该图上 所有的需要计算梯度的参数 相对于这个MSE的 梯度
-
梯度信息 附加到每个Tensor的成员变量grad中,不会额外返回。w1.grad,w2.grad...
-
注意:backward只能对标量(一个值)求导,不能多个分量
-
import torch
from torch.nn import functional as F
# y = x*w (b取0)
x = torch.ones(1)
w = torch.full([1], 2)
print(w)
w.requires_grad_()
print(w) # tensor([2.], requires_grad=True)
mse = F.mse_loss(x*w, torch.ones(1))
mse.backward()
print(w.grad) # tensor([2.])
Tip:
-
这个方法会将之前的梯度信息覆盖掉,所以多次调用这个loss对象的.backward()方法时会报错;
-
如果要再次调用以生成新的梯度信息,要给它一个参数
.backward(retain_graph=True)
. -
retain_graph=True
表示图不会被清除.
3. softmax
- 可以用于将多个输出值转换成多个概率值,使每个值都符合概率的定义,范围在[0, 1],且概率相加和为1,用于多分类问题。
import torch
from torch.nn import functional as F
a = torch.rand(3)
a.requires_grad_()
print(a)
# tensor([0.8197, 0.9687, 0.9991], requires_grad=True)
p = F.softmax(a, dim=0)
print(p)
# tensor([0.2979, 0.3457, 0.3564], grad_fn=<SoftmaxBackward>) 相加为1

import torch
from torch.nn import functional as F
a = torch.rand(3)
a.requires_grad_()
print(a)
# tensor([0.8225, 0.9540, 0.2114], requires_grad=True)
p = F.softmax(a, dim=0)
print(p)
# tensor([0.3727, 0.4251, 0.2023], grad_fn=<SoftmaxBackward>) 相加为1
p[0].backward(retain_graph=True) # 注意,只能对标量求导,如p[0],p[1],....
print(a.grad)
# (tensor([ 0.2338, -0.1584, -0.0754]),)
print(torch.autograd.grad(p[0], [a], retain_graph=True))
# (tensor([ 0.2338, -0.1584, -0.0754]),)
print(torch.autograd.grad(p[1], [a], retain_graph=True))
# (tensor([-0.1584, 0.2444, -0.0860]),)
print(torch.autograd.grad(p[2], [a]))
# (tensor([-0.0754, -0.0860, 0.1614]),)
Tip:
-
计算导数时,loss只能是一个值,不能多个分量,所以分别对p[0], p[1], p[2]对应的loss求导
-
loss可以对多个参数求偏导,eg. p0 对 a0, a1, a2求偏导
-
i == j
时,偏导为正;i != j
时,偏导为负