3.3 线性回归的简洁实现
3.3.1 生成数据集
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random
num_inputs = 2
num_examples = 1000
true_w = [2,-3.4]
true_b = 4.2
features = torch.from_numpy(np.random.normal(0,1,(num_examples,num_inputs)))
labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b
labels += torch.from_numpy(np.random.normal(0,0.01,size = labels.size()))
3.3.2 读取数据
import torch.utils.data as Data
batch_size = 10
# 把训练集的特征与标签进行组合
dataset = Data.TensorDataset(features, labels)
# 随机读取小批量数据
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)
这里的data_iter与上一节一样,根据设定的batch_size读取小批量数据
for x, y in data_iter:
print(x, y)
break
tensor([[ 0.4889, 0.8471],
[ 1.0232, 1.2690],
[ 0.0906, -0.6728],
[-0.6681, -0.1366],
[-0.4193, 0.0632],
[ 0.9439, -0.0119],
[ 1.0156, -0.0104],
[-0.2202, -1.1330],
[-0.0229, 0.8879],
[ 1.2394, -0.2916]], dtype=torch.float64) tensor([2.2978, 1.9403, 6.6585, 3.3204, 3.1487, 6.1291, 6.2743, 7.6209, 1.1419,
7.6899], dtype=torch.float64)
3.3.3 定义模型
- torch.nn 模块
import torch.nn as nn
class LinearNet(nn.Module):
def __init__(self, n_feature):
super(LinearNet, self).__init__()
self.linear = nn.Linear(n_feature,1)
def forward(self, x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
# 打印网络结构
print(net)
LinearNet(
(linear): Linear(in_features=2, out_features=1, bias=True)
)
也可以使用nn.Sequential来更加方便的搭建网络,Sequential是一个有序的容器
# 方法一
net = nn.Sequential(nn.Linear(num_inputs,1))
print(net)
print(net[0])
# 方法二
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module ...
print(net)
print(net[0])
#方法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([('linear', nn.Linear(num_inputs, 1))
#...
]))
print(net)
print(net[0])
Sequential(
(0): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)
Sequential(
(linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)
Sequential(
(linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)
通过net.parameters()查看模型所有的可学习参数,返回一个生成器
for param in net.parameters():
print(param)
# 返回的是权重 和 偏差
Parameter containing:
tensor([[-0.3807, 0.6131]], requires_grad=True)
Parameter containing:
tensor([0.4925], requires_grad=True)
3.3.4 初始化模型参数
在使用net 前需要初始化模型参数,即 权重和偏差。pytorch 在 init 模块中提供了参数初始化方法。
from torch.nn import init
init.normal_(net[0].weight, mean = 0, std = 0.01)
init.constant_(net[0].bias, val = 0)
Parameter containing:
tensor([0.], requires_grad=True)
3.3.5 定义损失函数
loss = nn.MSELoss()
3.3.6 定义优化算法
- torch.optim 模块 提供常用的优化算法
如 SGD Adam RMSProp等
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.03)
print(optimizer)
SGD (
Parameter Group 0
dampening: 0
lr: 0.03
momentum: 0
nesterov: False
weight_decay: 0
)
有时候需要为子网络设置不同的学习率
学习率(learning rate)是指导我们该如何通过损失函数的梯度调整网络权重的超参数。学习率越低,损失函数的变化速度就越慢。虽然使用低学习率可以确保我们不会错过任何局部极小值,但也意味着我们将花费更长的时间来进行收敛,特别是在被困在高原区域的情况下。
# optimizer = optim.SGD([
# # 如果对某个参数不指定学习率,就使⽤最外层的默认学习率
# {'params': net.subnet1.parameters()}, # lr=0.03
# {'params': net.subnet2.parameters(), 'lr': 0.01}
# ], lr=0.03)
# 如何调整学习率
# for param_group in optimizer.param_groups:
# param_group['lr'] *= 0.1 # 学习率为之前的0.1倍
3.3.7训练模型
num_epochs = 3
for epoch in range(1, num_epochs + 1):
for x, y in data_iter:
x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
# print(x.dtype)
output = net(x)
l = loss(output, y.view(-1,1))
optimizer.zero_grad()
l.backward()
optimizer.step()
print('epoch %d, loss:%f'%(epoch, l.item()))
epoch 1, loss:0.000840
epoch 2, loss:0.000063
epoch 3, loss:0.000052
dense = net[0]
print(true_w, dense.weight)
print(true_b, dense.bias)
[2, -3.4] Parameter containing:
tensor([[ 2.0001, -3.3998]], requires_grad=True)
4.2 Parameter containing:
tensor([4.2004], requires_grad=True)