zoukankan html css js c++ java

莫烦pytorch学习记录

感谢莫烦大神Pytorch B站视频：https://www.bilibili.com/video/av15997678?p=11

一个博主的笔记：https://blog.csdn.net/Will_Ye/article/details/104516423

一、PyTorch是什么？

它是一个基于Python的科学计算包，其主要是为了解决两类场景：

1、一种是可以替代Numpy进行科学计算，同时还可以使用张量在GPU上进行加速运算。
2、一个深度学习的研究平台，提供最大的灵活性和速度。

二、Numpy与Torch之间的转换

import torch
import numpy as np
from torch.autograd import Variable # torch 中 Variable 模块

### Torch 自称为神经网络界的 Numpy,
# 因为他能将 torch 产生的 tensor 放在 GPU 中加速运算
np_data = np.arange(6).reshape((2,3))
torch_data = torch.from_numpy(np_data) #torch形式
tensor2array = torch_data.numpy() # numpy形式
#  torch 做的和 numpy 能很好的兼容.
#  比如这样就能自由地转换 numpy array 和 torch tensor 了
print(
    '
numpy array:', np_data,  # [[0 1 2], [3 4 5]]
    '
torch tensor:', torch_data,  # 0  1  2 
 3  4  5    [torch.LongTensor of size 2x3]
    '
tensor to array:', tensor2array,  # [[0 1 2], [3 4 5]]
)

numpy array: [[0 1 2]
              [3 4 5]] 
torch tensor: tensor([[0, 1, 2],
                      [3, 4, 5]], dtype=torch.int32) 
tensor to array: [[0 1 2]
                  [3 4 5]]

三、Torch中的数学运算与numpy的对比

API手册

常用计算：注意！！！！所有在pytorch里的计算，都要先转换为tensor的形式，不然就报错，切记！！！

#abs绝对值运算
data = [-1, -2, 1, 2]
tensor = torch.FloatTensor(data) # 转换成32位浮点tensor
print(
    '
abs',
    '
numpy: ', np.abs(data),  # [1 2 1 2]
    '
torch: ', torch.abs(tensor)  # [1 2 1 2]
)

#sin 三角函数 sin
print(
    '
sin',
    '
numpy: ', np.sin(data),  # [-0.84147098 -0.90929743  0.84147098  0.90929743]
    '
torch: ', torch.sin(tensor)  # [-0.8415 -0.9093  0.8415  0.9093]
)

#mean 均值
print(
    '
mean',
    '
numpy: ', np.mean(data),         # 0.0
    '
torch: ', torch.mean(tensor)     # 0.0
)

矩阵计算：注意，有些numpy的封装的函数跟pytorch的不一样，这一点一定要区分清楚，也是很容易出问题的一个地方。

# matrix multiplication 矩阵点乘
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data)  # 转换成32位浮点 tensor
# correct method
print(
    '
matrix multiplication (matmul)',
    '
numpy: ', np.matmul(data, data),     # [[7, 10], [15, 22]]
    '
torch: ', torch.mm(tensor, tensor)   # [[7, 10], [15, 22]]
)

# !!!!  下面是错误的方法 !!!!
data = np.array(data)
print(
    '
matrix multiplication (dot)',
    '
numpy: ', data.dot(data),        # [[7, 10], [15, 22]] 在numpy 中可行
    # 关于 tensor.dot() 有了新的改变, 它只能针对于一维的数组. 所以上面的有所改变.
    # '
torch: ', tensor.dot(tensor)     # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0
    # 变为
    # '
torch: ', torch.dot(tensor.dot(tensor))#
)

四、Variable

tensor = torch.FloatTensor([[1,2],[3,4]])
# 里面的值会不停的变化. 就像一个裝鸡蛋的篮子, 鸡蛋数会不停变动.
# 那谁是里面的鸡蛋呢, 自然就是 Torch 的 Tensor 咯.
# 如果用一个 Variable 进行计算, 那返回的也是一个同类型的 Variable.
# 把鸡蛋放到篮子里,
variable = Variable(tensor, requires_grad=True)#requires_grad是参不参与误差反向传播, 要不要计算梯度

print('
',tensor)
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

print(variable)
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""

variable的计算

模仿一个计算梯度的情况

比较tensor的计算和variable的计算，在正向传播它们是看不出有什么不同的，而且variable和tensor有个很大的区别，variable是存储变量的，是会改变的，而tensor是不会改变的，是我们输入时就设定好的参数，variable会在反向传播后修正自己的数值。这是我觉得他们最大的不同。

t_out = torch.mean(tensor*tensor)       # x^2
v_out = torch.mean(variable*variable)   # x^2
print('
',t_out)
print('
',v_out)    # 7.5

假设mean的均值做为结果的误差，对误差反向传播得到各项梯度。利用这个例子去看，在反向传播中它们之间的不同。

v_out = torch.mean(variable*variable)就是给各个variable搭建一个运算的步骤，搭建的网络也是其中一种运算的步骤。

v_out.backward()    # 模拟 v_out 的误差反向传递,在背景计算图中加速运算
# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好.
# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤
# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2
print('
',variable.grad)    # 初始 Variable 的梯度
'''
 0.5000  1.0000
 1.5000  2.0000
'''

可以看到，在backward中已经计算好梯度了，利用*.grad将背景中计算好的variable的各项梯度print出来。

这样如果是个网络的运算步骤也可以在backward中将各个梯度计算好。

获取Variable里面的数据

直接print(variable)只会输出 Variable形式的数据, 在很多时候是用不了的(比如想要用 plt 画图), 需要转换成tensor形式.

## 获取 Variable 里面的数据
print(variable)     #  Variable 形式
"""
Variable containing:
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""
print(variable.data)    # tensor 形式
"""
 1  2
 3  4
[torch.FloatTensor of size 2x2]
"""
print(variable.data.numpy())    # numpy 形式
"""
[[ 1.  2.]
 [ 3.  4.]]
"""

五、常用几种激励函数及图像

常用几种激励函数：relu, sigmoid, tanh, softplus

# 做一些假数据来观看图像
x = torch.linspace(-5, 5, 200)  # x data (tensor), shape=(100, 1)
x = Variable(x)
# Torch 中的激励函数有很多, 不过我们平时要用到的就这几个.
# relu, sigmoid, tanh, softplus. 那我们就看看他们各自长什么样啦.
x_np = x.data.numpy()   # 换成 numpy array, 出图时用

莫烦大神那时的版本用的是torch.nn.relu，但后来版本改了，直接用torch.relu就可以，其他激励函数也一样。

# 几种常用的 激励函数
y_relu = F.relu(x).data.numpy()
y_sigmoid = torch.sigmoid(x).data.numpy()
y_tanh = torch.tanh(x).data.numpy()
y_softplus = F.softplus(x).data.numpy()
# y_softmax = F.softmax(x)  softmax 比较特殊, 不能直接显示, 不过他是关于概率的, 用于分类

#用pit画
plt.figure(1,figsize=(8,6))
plt.subplot(221)
plt.plot(x_np,y_relu, c='red', label='relu')
plt.ylim(-1,5)
plt.legend(loc='best')

plt.subplot(222)
plt.plot(x_np,y_sigmoid, c='red',label='sigmoid')
plt.ylim(-0.2,1.2)
plt.legend(loc='best')

plt.subplot(223)
plt.plot(x_np,y_tanh, c='red',label='tanh')
plt.ylim(-1.2,1.2)
plt.legend(loc='best')

plt.subplot(224)
plt.plot(x_np,y_softplus, c='red',label='softplus')
plt.ylim(-0.2,6)
plt.legend(loc='best')


#用ax画
fig, ax = plt.subplots(2,2,figsize=(8,6))
ax[0,0].plot(x_np,y_relu,c='red', label='relu')
ax[0,0].set_title('relu',fontsize=18)
ax[0,0].set_ylim(-1,5)
ax[0,0].legend()

ax[0,1].plot(x_np,y_sigmoid)
ax[1,0].plot(x_np,y_tanh)
ax[1,1].plot(x_np,y_softplus)
plt.show()

六、线性拟合回归

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
#######捏造数据#######
x = torch.unsqueeze(torch.linspace(-1,1,500),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())
# 画图看看捏的数据咋样
fig,ax = plt.subplots(2,1)
ax[0].scatter(x.data.numpy(),y.data.numpy())
ax[0].set_title("Pinched data",fontsize=18)

搭建网络

# 建立一个神经网络我们可以直接运用 torch 中的体系. 先定义所有的层属性(__init__()),
# 然后再一层层搭建(forward(x))层于层的关系链接. 建立关系的时候, 我们会用到激励函数.
class Net(torch.nn.Module): #继承torch中的Module
    def __init__(self,n_feature,n_hidden,n_output):
        super(Net, self).__init__() #继承__init__功能
        # 定义每层用什么样的形式
        self.hidden = torch.nn.Linear(n_feature,n_hidden) # 隐藏层线性输出
        self.predict = torch.nn.Linear(n_hidden,n_output)  # 输出层线性输出

    def forward(self,x): # 这同时也是Module中的forward功能
        # 正向传播输入值，神经网络分析出输出值
        x = F.relu(self.hidden(x))  # 激励函数（隐藏层的线性值）
        x = self.predict(x)         # 输出值
        return x

net = Net(n_feature=1, n_hidden=10, n_output=1)
print(net)
"""
Net (
  (hidden): Linear (1 -> 10)
  (predict): Linear (10 -> 1)
)
"""

开始训练

#optimizer训练工具
optimizer = torch.optim.SGD(net.parameters(), lr=0.2) # 传入net的所有参数，学习率
loss_func = torch.nn.MSELoss() # 预测值和真实值的误差计算公式（均方差）

plt.ion()
for t in range(10000):
    prediction = net(x) # 喂给net训练数据x，输出预测值
    loss = loss_func(prediction, y) # 计算两者的误差,#要预测值在前，label在后

    optimizer.zero_grad() # 清空上一步的残余更新参数值,#net.parameters()所有参数梯度变为0
    loss.backward()       # 误差反向传播，计算参数更新
    optimizer.step()      # 将参数更新施加到net的parameters上,#optimizr优化parameters

可视化训练过程

    if t % 5 == 0:
        # plot and show learning process
        ax[1].cla()
        ax[1].scatter(x.data.numpy(), y.data.numpy())
        ax[1].plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
        ax[1].text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color': 'red'})
        plt.pause(0.01)
#如果在脚本中使用ion()命令开启了交互模式，没有使用ioff()关闭的话，
# 则图像会一闪而过，并不会常留。要想防止这种情况，需要在plt.show()之前加上ioff()命令。
plt.ioff()
plt.show()

七、区分类型 (分类)

捏个数据

import torch
import matplotlib.pyplot as plt

# 假数据
n_data = torch.ones(100, 2)         # 数据的基本形态
x0 = torch.normal(2*n_data, 1)      # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1)     # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor)    # LongTensor = 64-bit integer


# 画图 会出错：会报错，因为画图x和y的数量不相同，x矩阵的形状是（200,2）的，而y矩阵的形状是（200），
# 所以需要把x分成两部分来画图才可以的。
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()
# 画图
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

搭个网络

class Net(torch.nn.Module):
    def __init__(self,n_feature,n_hidden,n_outpot):
        super(Net,self).__init__()  #继承__init__的功能
        self.hidden = torch.nn.Linear(n_feature,n_hidden)
        self.out = torch.nn.Linear(n_hidden,n_outpot)

    def forward(self,x):
        x = torch.relu(self.hidden(x))
        x = self.out(x)
        return x

net = Net(n_feature=2,n_hidden=10,n_outpot=2)

print(net)

训练网络

optimizer = torch.optim.SGD(net.parameters(), lr=0.02)  # 传入 net 的所有参数, 学习率
# 算误差的时候, 注意真实值!不是! one-hot 形式的, 而是1D Tensor, (batch,)
# 但是预测值是2D tensor (batch, n_classes)
loss_func = torch.nn.CrossEntropyLoss()

for t in range(100):
    out = net(x)     # 喂给 net 训练数据 x, 输出分析值

    loss = loss_func(out, y)     # 计算两者的误差

    optimizer.zero_grad()   # 清空上一步的残余更新参数值
    loss.backward()         # 误差反向传播, 计算参数更新值
    optimizer.step()        # 将参数更新值施加到 net 的 parameters 上

可视化

plt.ion()
for t in range(100):
    out = net(x)     # 喂给 net 训练数据 x, 输出分析值

    loss = loss_func(out, y)     # 计算两者的误差

    optimizer.zero_grad()   # 清空上一步的残余更新参数值
    loss.backward()         # 误差反向传播, 计算参数更新值
    optimizer.step()        # 将参数更新值施加到 net 的 parameters 上
    # 接着上面来
    if t % 2 == 0:
        plt.cla()
        # 过了一道 softmax 的激励函数后的最大概率才是预测值
        prediction = torch.max(F.softmax(out), 1)[1]
        #prediction=torch.max(F.softmax(out), 1) 中的1，表示【0,0,1】预测结果中，结果为1的结果的位置。
        pred_y = prediction.data.numpy().squeeze()#利用squeeze（）函数将表示向量的数组转换为秩为1的数组，这样利用matplotlib库函数画图时，就可以正常的显示结果了
        target_y = y.data.numpy()
        plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
        accuracy = sum(pred_y == target_y) / 200.  # 预测中有多少和真实值一样
        plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})
        plt.pause(0.1)

plt.ioff()
plt.show()

八、快速搭建法

　　Torch 中提供了很多方便的途径, 同样是神经网络, 能快则快, 我们看看如何用更简单的方式搭建同样的回归神经网络.

上一节用的方法更加底层，其实有更加快速的方法，对比一下就很清楚了：

Method 1

我们用 class 继承了一个 torch 中的神经网络结构, 然后对其进行了修改

class Net(torch.nn.Module): #我们用 class 继承了一个 torch 中的神经网络结构, 然后对其进行了修改
    def __init__(self,n_feature,n_hidden,n_outpot):
        super(Net,self).__init__()  #继承__init__的功能,
        self.hidden = torch.nn.Linear(n_feature,n_hidden)
        self.out = torch.nn.Linear(n_hidden,n_outpot)

    def forward(self,x):
        x = torch.relu(self.hidden(x))
        x = self.out(x)
        return x

net1 = Net(n_feature=2,n_hidden=10,n_outpot=2)

Method 2

用nn库里一个函数就能快速搭建了，注意ReLU也算做一层加入到网络序列中

net2 = torch.nn.Sequential(
    torch.nn.Linear(2,10),
    torch.nn.ReLU(),
    torch.nn.Linear(10,2)
)
print(net1)
print(net2)

结果是类似的：就是net2中将ReLU也做为一个神经层

print(net1)
"""
Net(
  (hidden): Linear(in_features=2, out_features=10, bias=True)
  (out): Linear(in_features=10, out_features=2, bias=True)
)
"""
print(net2)
"""
Sequential(
  (0): Linear(in_features=2, out_features=10, bias=True)
  (1): ReLU()
  (2): Linear(in_features=10, out_features=2, bias=True)
)
"""

九、保存与提取网络

保存

#######捏造数据#######
torch.manual_seed(1)    # reproducible 使得每次随机初始化的随机数是一致的
x = torch.unsqueeze(torch.linspace(-1,1,500),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())

def save():
    # 搭建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1),
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.5)
    loss_func = torch.nn.MSELoss()
    # 训练
    for t in range(100):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    torch.save(net1, 'net.pkl')  # 保存整个网络
    torch.save(net1.state_dict(), 'net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

提取网络

# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

只提取网络参数

# 这种方式将会提取所有的参数, 然后再放到你的新建网络中.
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.ReLU(),
        torch.nn.Linear(10, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)

完整代码并查看三个网络模型的结果

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
from matplotlib import animation
#######捏造数据#######
torch.manual_seed(1)    # reproducible 使得每次随机初始化的随机数是一致的
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)#x的数据，shape=(500,1)
y = x.pow(2) + 0.2*torch.rand(x.size())



def save():
    # 搭建网络
    net1 = torch.nn.Sequential(
        torch.nn.Linear(1, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 1),
    )
    optimizer = torch.optim.SGD(net1.parameters(), lr=0.3)
    loss_func = torch.nn.MSELoss()
    # 训练
    for t in range(1000):
        prediction = net1(x)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    torch.save(net1, 'net.pkl')  # 保存整个网络
    torch.save(net1.state_dict(), 'net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(131)
    plt.title('Net1')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('net.pkl')
    prediction = net2(x)

    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(132)
    plt.title('Net2')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

# 这种方式将会提取所有的参数, 然后再放到你的新建网络中.
def restore_params():
    # 新建 net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 1)
    )

    # 将保存的参数复制到 net3
    net3.load_state_dict(torch.load('net_params.pkl'))
    prediction = net3(x)
    # plot result
    plt.figure(1, figsize=(10,3))
    plt.subplot(133)
    plt.title('Net3')
    plt.scatter(x.data.numpy(), y.data.numpy())
    plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)



# 保存 net1 (1. 整个网络, 2. 只有参数)
save()

# 提取整个网络
restore_net()

# 提取网络参数, 复制到新网络
restore_params()

plt.show()

十、批训练

　　进行批量训练需要用到一个很好用的工具DataLoader 。

　　注意，莫烦大神用的版本跟现在新版还是有些出入的，用Data.TensorDataset这个函数，不要指定data_tensor和target_tensor会报错的，因为新版的库改了，直接输入x和y就行了。

数据分批

import torch
import torch.utils.data as tud
torch.manual_seed(1)

BATCH_SIZE = 20  #批训练的数据个数，每批20个
#######捏造数据#######
x = torch.unsqueeze(torch.linspace(-1,1,100),dim=1)#x的数据，shape=(100,1)
y = x.pow(2) + 0.2*torch.rand(x.size())

# 先转换成 torch 能识别的 Dataset
# torch_dataset = DL.TensorDataset(data_tensor=x, target_tensor=y)
# 新版的库改了，直接输入x和y就行了。
torch_dataset = tud.TensorDataset(x,y)

# 把dataset放入DataLoader
loader = tud.DataLoader(
    dataset=torch_dataset, # 转换好的torch能识别的dataset导入DataLoader
    batch_size=BATCH_SIZE, # 每批的大小是多大
    shuffle=True,          # 要不要打乱数据顺序（一般打乱比较好）
    num_workers=2          # 多线程来读取数据

)

分批训练

epoch表示整体数据训练次数，step则是每一批数据里面有几组数据

def show_batch():
    for epoch in range(3):     # 训练所有！整套！数据3次
        for step,(batch_x, batch_y) in enumerate(loader): #每一步loader释放一批数据来学习
            # 训练的地方

            # 打出来一些数据
            print('Epoch:',epoch,'|Step:',step,'|batch x:',batch_x.numpy(),'|batch y:',batch_y.numpy())

if __name__ == '__main__':
    show_batch()

"""
Epoch: 0 |Step: 0 |batch x: [ 5.  7. 10.  3.  4.] |batch y: [6. 4. 1. 8. 7.]
Epoch: 0 |Step: 1 |batch x: [2. 1. 8. 9. 6.] |batch y: [ 9. 10.  3.  2.  5.]
Epoch: 1 |Step: 0 |batch x: [ 4.  6.  7. 10.  8.] |batch y: [7. 5. 4. 1. 3.]
Epoch: 1 |Step: 1 |batch x: [5. 3. 2. 1. 9.] |batch y: [ 6.  8.  9. 10.  2.]
Epoch: 2 |Step: 0 |batch x: [ 4.  2.  5.  6. 10.] |batch y: [7. 9. 6. 5. 1.]
Epoch: 2 |Step: 1 |batch x: [3. 9. 1. 8. 7.] |batch y: [ 8.  2. 10.  3.  4.]
"""

　　可以看出, 每步都导出了5个数据进行学习. 然后每个 epoch 的导出数据都是先打乱了以后再导出。

　　改变一下 BATCH_SIZE = 8, 这样我们就知道, step=0 会导出8个数据, 但是, step=1 时数据库中的数据不够 8个, 这时怎么办呢:这时, 在 step=1 就只给你返回这个 epoch 中剩下的数据就好了.

Epoch: 0 |Step: 0 |batch x: [ 5.  7. 10.  3.  4.  2.  1.  8.] |batch y: [ 6.  4.  1.  8.  7.  9. 10.  3.]
Epoch: 0 |Step: 1 |batch x: [9. 6.] |batch y: [2. 5.]
Epoch: 1 |Step: 0 |batch x: [ 4.  6.  7. 10.  8.  5.  3.  2.] |batch y: [7. 5. 4. 1. 3. 6. 8. 9.]
Epoch: 1 |Step: 1 |batch x: [1. 9.] |batch y: [10.  2.]
Epoch: 2 |Step: 0 |batch x: [ 4.  2.  5.  6. 10.  3.  9.  1.] |batch y: [ 7.  9.  6.  5.  1.  8.  2. 10.]
Epoch: 2 |Step: 1 |batch x: [8. 7.] |batch y: [3. 4.]

十一、Optimizer 优化器

各种不同的优化器在同一个网路上的对比

SGD 是最普通的优化器, 也可以说没有加速效果, 而 Momentum 是 SGD 的改良版, 它加入了动量原则. 后面的 RMSprop 又是 Momentum 的升级版. 而 Adam 又是 RMSprop 的升级版. 不过从这个结果中我们看到, Adam 的效果似乎比 RMSprop 要差一点. 所以说并不是越先进的优化器, 结果越佳.

1.捏一个数据

import torch
import torch.utils.data as tud
import matplotlib.pyplot as plt

torch.manual_seed(1)

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

######捏一个数据######
x = torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

plt.scatter(x.numpy(),y.numpy())
plt.show()

torch_dataset = tud.TensorDataset(x,y)
loader = tud.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

2.建立同样的网络

# 搭建同样的网络Net
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1,20)
        self.predict = torch.nn.Linear(20,1)

    def forward(self,x):
        x = torch.ReLU(self.hidden(x))
        x = self.predict(x)
        return  x

# 为每个优化器创建一个net
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD,net_Momentum,net_RMSprop,net_Adam]

3.不同的optimizer

　　接下来在创建不同的优化器, 用来训练不同的网络. 并创建一个 loss_func 用来计算误差. 我们用几种常见的优化器, SGD, Momentum, RMSprop, Adam.

# different optimizers
opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

loss_func = torch.nn.MSELoss()
losses_his = [[], [], [], []]   # 记录 training 时不同神经网络的 loss

4.训练出图

    for epoch in range(EPOCH):
        print('Epoch',epoch)
        for step,(b_x,b_y) in enumerate(loader):
            # 对每个优化器, 优化属于他的神经网络
            for net, opt, l_his in zip(nets, optimizers, losses_his):
                output = net(b_x)  # 获得每个网络的输出
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()  # clear gradients for next train
                loss.backward()  # backpropagation, compute gradients
                opt.step()  # apply gradients
                l_his.append(loss.data.numpy())  # loss recoder

    labels = ['SGD', 'MoMENTUM', 'RMSPROP', 'Adam']
    for i, l_his in enumerate(losses_his):
        plt.plot(l_his, label=labels[i])
    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0, 0.2))
    plt.show()

完整代码

import torch
import torch.utils.data as tud
import matplotlib.pyplot as plt
import torch.nn.functional as F
torch.manual_seed(1)

LR = 0.01
BATCH_SIZE = 32
EPOCH = 12

######捏一个数据######
x = torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plt.scatter(x.numpy(),y.numpy())
# plt.show()

torch_dataset = tud.TensorDataset(x,y)
loader = tud.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

# 搭建同样的网络Net
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(1,20)
        self.predict = torch.nn.Linear(20,1)

    def forward(self,x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return  x

if __name__== '__main__':
    # 为每个优化器创建一个net
    net_SGD = Net()
    net_Momentum = Net()
    net_RMSprop = Net()
    net_Adam = Net()
    nets = [net_SGD,net_Momentum,net_RMSprop,net_Adam]

    # different optimizers
    opt_SGD         = torch.optim.SGD(net_SGD.parameters(), lr=LR)
    opt_Momentum    = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
    opt_RMSprop     = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
    opt_Adam        = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
    optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

    loss_func = torch.nn.MSELoss()
    losses_his = [[], [], [], []]   # 记录 training 时不同神经网络的 loss

    for epoch in range(EPOCH):
        print('Epoch',epoch)
        for step,(b_x,b_y) in enumerate(loader):
            # 对每个优化器, 优化属于他的神经网络
            for net, opt, l_his in zip(nets, optimizers, losses_his):
                output = net(b_x)  # 获得每个网络的输出
                loss = loss_func(output, b_y)  # compute loss for every net
                opt.zero_grad()  # clear gradients for next train
                loss.backward()  # backpropagation, compute gradients
                opt.step()  # apply gradients
                l_his.append(loss.data.numpy())  # loss recoder

    labels = ['SGD', 'MoMENTUM', 'RMSPROP', 'Adam']
    for i, l_his in enumerate(losses_his):
        plt.plot(l_his, label=labels[i])
    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0, 0.2))
    plt.show()

十二、CNN_classification

1、MINIST手写数据

import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt

torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = True  # 如果你已经下载好了mnist数据就写上 False


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.test_labels[:2000]

2、CNN模型

和以前一样, 我们用一个 class 来建立 CNN 模型. 这个 CNN 整体流程是

卷积(Conv2d) -> 激励函数(ReLU) -> 池化, 向下采样 (MaxPooling)

-> 再来一遍 -> 展平多维的卷积成的特征图 -> 接入全连接层 (Linear) -> 输出

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类
    def forword(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output

cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

3、训练，全部代码

下面我们开始训练, 将 x y 都用 Variable 包起来, 然后放入 cnn 中计算 output, 最后再计算误差.下面代码省略了计算精确度 accuracy 的部分

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm

torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

# plot one example
print(train_data.data.size())                 # (60000, 28, 28)
print(train_data.targets.size())               # (60000)
plt.imshow(train_data.data[0].numpy(), cmap='gray')
plt.title('%i' % train_data.targets[0])
plt.show()


# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets[:2000]

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类

    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output, x


def train_save():
    cnn = CNN()
    print(cnn)

    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()

    try:
        from sklearn.manifold import TSNE;HAS_SK = True
    except:
        HAS_SK = False;
        print('Please install sklearn for layer visualization')

    def plot_with_labels(lowDWeights, labels):
        plt.cla()
        X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
        for x, y, s in zip(X, Y, labels):
            c = cm.rainbow(int(255 * s / 9));
            plt.text(x, y, s, backgroundcolor=c, fontsize=9)
        plt.xlim(X.min(), X.max());
        plt.ylim(Y.min(), Y.max());
        plt.title('Visualize last layer');
        plt.show();
        plt.pause(0.01)

    plt.ion()

    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

            output = cnn(b_x)[0]            # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output, last_layer = cnn(test_x)
                pred_y = torch.max(test_output, 1)[1].data.numpy()
                accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)
                if HAS_SK:
                    # Visualization of trained flatten layer (T-SNE)
                    tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
                    plot_only = 500
                    low_dim_embs = tsne.fit_transform(last_layer.data.numpy()[:plot_only, :])
                    labels = test_y.numpy()[:plot_only]
                    plot_with_labels(low_dim_embs, labels)
    plt.ioff()

    torch.save(cnn, './mnist_net/cnn_classification_net.pkl')  # 保存整个网络
    torch.save(cnn.state_dict(), './mnist_net/cnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output, _ = cnn(test_x[:10])
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:10].numpy(), 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/cnn_classification_net.pkl')
    test_output, _ = net2(test_x[:20])
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:20].numpy(), 'real number')

if __name__ == '__main__':
    train = True
    if train:
        train_save()
    else:
        restore_net()

十三、RNN_classification

1、MINIST手写数据

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm
import torchvision.transforms as transforms
torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28
INPUT_SIZE = 28
LR = 0.01          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
# test_x = test_data.test_data.type(torch.FloatTensor)[:2000]/255. 
test_y = test_data.targets[:2000]

2.RNN模型

和以前一样, 我们用一个 class 来建立 RNN 模型. 这个 RNN 整体流程是

(input0, state0) -> LSTM -> (output0, state1);
(input1, state1) -> LSTM -> (output1, state2);
…
(inputN, stateN)-> LSTM -> (outputN, stateN+1);
outputN -> Linear -> prediction. 通过LSTM分析每一时刻的值, 并且将这一时刻和前面时刻的理解合并在一起, 生成当前时刻对前面数据的理解或记忆. 传递这种理解给下一时刻分析.

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(  # input shape (1,28,28)LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点
            hidden_size=64,     # rnn hidden unit
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)  # 输出层

    def forward(self,x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) ## None 表示 hidden state 会用全0的 state
        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        return out

3、训练&完整代码

我们将图片数据看成一个时间上的连续数据, 每一行的像素点都是这个时刻的输入, 读完整张图片就是从上而下的读完了每行的像素点. 然后我们就可以拿出 RNN 在最后一步的分析值判断图片是哪一类了.

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块
import matplotlib.pyplot as plt
from matplotlib import cm
import torchvision.transforms as transforms
torch.manual_seed(1)    # reproducible

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 64
TIME_STEP = 28
INPUT_SIZE = 28
LR = 0.01          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False, transform=transforms.ToTensor())

# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个
test_x = test_data.data.type(torch.FloatTensor)[:2000]/255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets.numpy()[:2000]

# CNN模型搭建
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(  # input shape (1,28,28)LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点
            hidden_size=64,     # rnn hidden unit
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(64, 10)  # 输出层

    def forward(self,x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None) ## None 表示 hidden state 会用全0的 state
        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        return out

def train_save():
    rnn = RNN()
    print(rnn)

    optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()

    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

            b_x = b_x.view(-1, 28, 28)      # reshape x to (batch, time_step, input_size)

            output = rnn(b_x)           # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output = rnn(test_x)   # (samples, time_step, input_size)
                pred_y = torch.max(test_output, 1)[1].data.numpy()

                accuracy = float((pred_y == test_y).astype(int).sum()) / float(test_y.size)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)


    torch.save(rnn, './mnist_net/rnn_classification_net.pkl')  # 保存整个网络
    torch.save(rnn.state_dict(), './mnist_net/rnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output = rnn(test_x[:10].view(-1, 28, 28))
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    print(pred_y, 'prediction number')
    print(test_y[:10], 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/rnn_classification_net.pkl')
    test_output = net2(test_x[:100].view(-1, 28, 28))
    pred_y = torch.max(test_output, 1)[1].data.numpy()
    accuracy = float((pred_y == test_y[:100]).astype(int).sum()) / 100
    print(pred_y, 'prediction number')
    print(test_y[:100], 'real number')
    print('accuracy: ',accuracy)


if __name__ == '__main__':
    GO_train = False    # False表示不训练直接调用训练好的模型，True表示训练
    if GO_train:
        train_save()
    else:
        restore_net()

十四、RNN_regression

用 RNN 来及时预测时间序列

1、训练数据

用 sin 的曲线预测出 cos 的曲线

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)

# 超参数
TIME_STEP = 10      # rnn时间步长/图像高度
INPUT_SIZE = 1      # rnn输入大小/图像宽度
LR = 0.02           # 学习率

# 显示数据
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)
plt.plot(steps, y_np, 'r-', label = 'target(cos)')
plt.plot(steps, x_np, 'b-', label = 'input(sin)')
plt.legend(loc='best')
plt.show()

2、RNN网络

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(   # 一个普通的RNN就能胜任
            input_size=INPUT_SIZE,
            hidden_size=32,
            num_layers=1,
            batch_first=True,
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):    # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, output_size)
        r_out, h_state = self.rnn(x, h_state)  # h_state 也要做为RNN的输入  这次具有时间序列特征
        outs = []  # 保存所有时间点的预测值
        for time_step in range(r_out.size(1)):  # 对每一个时间点计算 output
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state

    # 其实熟悉RNN的朋友应该知道, forward过程中的对每个时间点求输出还有一招使得计算量比较小的.不过上面的内容主要是为了呈现
    # PyTorch在动态构图上的优势, 所以我用了一个for loop 来搭建那套输出系统.下面介绍一个替换方式.使用 reshape 的方式整批计算.
    # def forward(self, x, h_state):
    #     r_out, h_state = self.rnn(x, h_state)
    #     r_out = r_out.view(-1, 32)
    #     outs = self.out(r_out)
    #     return outs.view(-1, 32, TIME_STEP), h_state

rnn = RNN()
print(rnn)
"""
RNN(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)
"""

3、训练

可以看出, 我们使用 x 作为输入的 sin 值, 然后 y 作为想要拟合的输出, cos 值. 因为他们两条曲线是存在某种关系的, 所以我们就能用 sin 来预测 cos. rnn 会理解他们的关系, 并用里面的参数分析出来这个时刻 sin 曲线上的点如何对应上 cos 曲线上的点.

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all rnn parameters
loss_func = nn.MSELoss()

h_state = None   # 要使用初始 hidden state, 可以设成 None

for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time steps
    # sin 预测 cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

    # 原来只有一维的数据，利用np.newaxis，np.newaxis的作用就是在这一位置增加一个一维，这一位置指的是np.newaxis所在的位置
    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)  （1，10，1）
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn 对于每个 step 的 prediction, 还有最后一个 step 的 h_state
    # !!  下一步十分重要 !!
    h_state = h_state.data  # 要把 h_state 重新包装一下才能放入下一个 iteration, 不然会报错

    loss = loss_func(prediction, y)     # cross entropy loss
    optimizer.zero_grad()               # clear gradients for this training step
    loss.backward()                     # backpropagation, compute gradients
    optimizer.step()                    # apply gradients
    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

4、完整代码

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)

# 超参数
TIME_STEP = 10      # rnn时间步长/图像高度
INPUT_SIZE = 1      # rnn输入大小/图像宽度
LR = 0.02           # 学习率

# 显示数据
steps = np.linspace(0, np.pi*2, 100, dtype=np.float32)  # float32 for converting torch FloatTensor
x_np = np.sin(steps)
y_np = np.cos(steps)

# plt.plot(steps, y_np, 'r-', label = 'target(cos)')
# plt.plot(steps, x_np, 'b-', label = 'input(sin)')
# plt.legend(loc='best')
# plt.show()

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()
        self.rnn = nn.RNN(   # 一个普通的RNN就能胜任
            input_size=INPUT_SIZE,
            hidden_size=32,
            num_layers=1,
            batch_first=True,
        )
        self.out = nn.Linear(32, 1)

    def forward(self, x, h_state):    # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, output_size)
        r_out, h_state = self.rnn(x, h_state)  # h_state 也要做为RNN的输入  这次具有时间序列特征
        outs = []  # 保存所有时间点的预测值
        for time_step in range(r_out.size(1)):  # 对每一个时间点计算 output
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state

    # 其实熟悉RNN的朋友应该知道, forward过程中的对每个时间点求输出还有一招使得计算量比较小的.不过上面的内容主要是为了呈现
    # PyTorch在动态构图上的优势, 所以我用了一个for loop 来搭建那套输出系统.下面介绍一个替换方式.使用 reshape 的方式整批计算.
    # def forward(self, x, h_state):
    #     r_out, h_state = self.rnn(x, h_state)
    #     r_out = r_out.view(-1, 32)
    #     outs = self.out(r_out)
    #     return outs.view(-1, 32, TIME_STEP), h_state

rnn = RNN()
print(rnn)
"""
RNN(
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear(in_features=32, out_features=1, bias=True)
)
"""

optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)   # optimize all rnn parameters
loss_func = nn.MSELoss()

h_state = None   # 要使用初始 hidden state, 可以设成 None

for step in range(100):
    start, end = step * np.pi, (step+1)*np.pi   # time steps
    # sin 预测 cos
    steps = np.linspace(start, end, TIME_STEP, dtype=np.float32, endpoint=False)  # float32 for converting torch FloatTensor
    x_np = np.sin(steps)    # float32 for converting torch FloatTensor
    y_np = np.cos(steps)

    # 原来只有一维的数据，利用np.newaxis，np.newaxis的作用就是在这一位置增加一个一维，这一位置指的是np.newaxis所在的位置
    x = torch.from_numpy(x_np[np.newaxis, :, np.newaxis])    # shape (batch, time_step, input_size)  （1，10，1）
    y = torch.from_numpy(y_np[np.newaxis, :, np.newaxis])

    prediction, h_state = rnn(x, h_state)   # rnn 对于每个 step 的 prediction, 还有最后一个 step 的 h_state
    # !!  下一步十分重要 !!
    h_state = h_state.data  # 要把 h_state 重新包装一下才能放入下一个 iteration, 不然会报错

    loss = loss_func(prediction, y)     # cross entropy loss
    optimizer.zero_grad()               # clear gradients for this training step
    loss.backward()                     # backpropagation, compute gradients
    optimizer.step()                    # apply gradients
    # plotting
    plt.plot(steps, y_np.flatten(), 'r-')
    plt.plot(steps, prediction.data.numpy().flatten(), 'b-')
    plt.draw(); plt.pause(0.05)

plt.ioff()
plt.show()

十五、GAN生成对抗网络

https://mofanpy.com/tutorials/machine-learning/torch//intro-GAN/

https://mofanpy.com/tutorials/machine-learning/torch/GAN/

　　我的一句话介绍 GAN 就是: Generator 是新手画家, Discriminator 是新手鉴赏家, 你是高级鉴赏家. 你将著名画家的品和新手画家的作品都给新手鉴赏家评定, 并告诉新手鉴赏家哪些是新手画家画的, 哪些是著名画家画的, 新手鉴赏家就慢慢学习怎么区分新手画家和著名画家的画, 但是新手画家和新手鉴赏家是好朋友, 新手鉴赏家会告诉新手画家要怎么样画得更像著名画家, 新手画家就能将自己的突然来的灵感 (random noise) 画得更像著名画家。

　　下面是本节内容的效果, 绿线的变化是新手画家慢慢学习如何踏上画家之路的过程. 而能被认定为著名的画作在 upper bound 和 lower bound 之间.。

1、超参数设定

　　新手画家 (Generator) 在作画的时候需要有一些灵感 (random noise), 我们这些灵感的个数定义为 N_IDEAS. 而一幅画需要有一些规格, 我们将这幅画的画笔数定义一下, N_COMPONENTS 就是一条一元二次曲线(这幅画画)上的点个数. 为了进行批训练, 我们将一整批话的点都规定一下(PAINT_POINTS).

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)
np.random.seed(1)

# 超参数
BATCH_SIZE = 64
LR_G = 0.0001
LR_D = 0.0001
N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15   # 一条一元二次曲线(这幅画画)上的点个数 it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1,1,ART_COMPONENTS) for  _ in range(BATCH_SIZE)])

2、来自著名艺术家的绘画（真实目标）

def artist_works():   # 来自著名艺术家的绘画（真实目标）
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis]
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1)
    paintings = torch.from_numpy(paintings).float()
    return paintings

3、GAN网络

这里会创建两个神经网络, 分别是 Generator (新手画家), Discriminator(新手鉴赏家). G 会拿着自己的一些灵感当做输入, 输出一元二次曲线上的点 (G 的画).

D 会接收一幅画作 (一元二次曲线), 输出这幅画作到底是不是著名画家的画(是著名画家的画的概率).

G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
    nn.ReLU(),
    nn.Linear(128, ART_COMPONENTS),     # making a painting from these random ideas
)

D = nn.Sequential(                      # Discriminator
    nn.Linear(ART_COMPONENTS, 128),     # receive art work either from the famous artist or a newbie like G
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid(),                       # tell the probability that the art work is made by artist
)

opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)
# 有弹幕说 RMSprop 比较好

4、训练

接着我们来同时训练 D 和 G. 训练之前, 我们来看看G作画的原理. G 首先会有些灵感, G_ideas 就会拿到这些随机灵感 (可以是正态分布的随机数), 然后 G 会根据这些灵感画画. 接着我们拿着著名画家的画和 G 的画, 让 D 来判定这两批画作是著名画家画的概率

for step in range(10000):
    artist_paintings = artist_works()  # real painting from artist
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS, requires_grad=True)  # random ideas

    G_paintings = G(G_ideas)  # fake painting from G (random ideas)
    prob_artist1 = D(G_paintings)  # D try to reduce this prob
    G_loss = torch.mean(torch.log(1. - prob_artist1))
    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    prob_artist0 = D(artist_paintings)  # D try to increase this prob   D尝试增加这个概率
    prob_artist1 = D(G_paintings.detach())  # D try to reduce this prob D尝试减少这个概率

然后计算有多少来之画家的画猜对了, 有多少来自 G 的画猜对了, 我们想最大化这些猜对的次数. 这也就是 log(D(x)) + log(1-D(G(z)) 在论文中的形式. 而因为 torch 中提升参数的形式是最小化误差, 那我们把最大化 score 转换成最小化 loss, 在两个 score 的合的地方加一个符号就好. 而 G 的提升就是要减小 D 猜测 G 生成数据的正确率, 也就是减小 D_score1.

    D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    G_loss = torch.mean(torch.log(1. - prob_artist1))

最后我们在根据 loss 提升神经网络就好了.

    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)      # retain_graph 这个参数是为了再次使用计算图纸
    opt_D.step()

    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

5、完整代码

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(1)
np.random.seed(1)

# 超参数
BATCH_SIZE = 64
LR_G = 0.0001
LR_D = 0.0001
N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator)
ART_COMPONENTS = 15   # 一条一元二次曲线(这幅画画)上的点个数 it could be total point G can draw in the canvas
PAINT_POINTS = np.vstack([np.linspace(-1,1,ART_COMPONENTS) for  _ in range(BATCH_SIZE)])

# show our beautiful painting range
# plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
# plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
# plt.legend(loc='upper right')
# plt.show()

def artist_works():   # 来自著名艺术家的绘画（真实目标）
    a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis]
    paintings = a * np.power(PAINT_POINTS, 2) + (a-1)
    paintings = torch.from_numpy(paintings).float()
    return paintings


G = nn.Sequential(                      # Generator
    nn.Linear(N_IDEAS, 128),            # random ideas (could from normal distribution)
    nn.ReLU(),
    nn.Linear(128, ART_COMPONENTS),     # making a painting from these random ideas
)

D = nn.Sequential(                      # Discriminator
    nn.Linear(ART_COMPONENTS, 128),     # receive art work either from the famous artist or a newbie like G
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid(),                       # tell the probability that the art work is made by artist
)

opt_D = torch.optim.Adam(D.parameters(), lr=LR_D)
opt_G = torch.optim.Adam(G.parameters(), lr=LR_G)
# 有弹幕说 RMSprop 比较好

plt.ion()

for step in range(10000):
    artist_paintings = artist_works()  # real painting from artist
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS, requires_grad=True)  # random ideas

    G_paintings = G(G_ideas)  # fake painting from G (random ideas)
    prob_artist1 = D(G_paintings)  # D try to reduce this prob
    G_loss = torch.mean(torch.log(1. - prob_artist1))
    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    prob_artist0 = D(artist_paintings)  # D try to increase this prob   D尝试增加这个概率
    prob_artist1 = D(G_paintings.detach())  # D try to reduce this prob D尝试减少这个概率
    D_loss = - torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)  # reusing computational graph
    opt_D.step()

    if step % 50 == 0:  # plotting
        plt.cla()
        plt.plot(PAINT_POINTS[0], G_paintings.data.numpy()[0], c='#4AD631', lw=3, label='Generated painting', )
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
        plt.plot(PAINT_POINTS[0], 1 * np.power(PAINT_POINTS[0], 2) + 0, c='#FF9359', lw=3, label='lower bound')
        plt.text(-.5, 2.3, 'D accuracy=%.2f (0.5 for D to converge)' % prob_artist0.data.numpy().mean(),
                 fontdict={'size': 13})
        plt.text(-.5, 2, 'D score= %.2f (-1.38 for G to converge)' % -D_loss.data.numpy(), fontdict={'size': 13})
        plt.ylim((0, 3));
        plt.legend(loc='upper right', fontsize=10);
        plt.draw();
        plt.pause(0.01)

plt.ioff()
plt.show()

十六、GPU 加速运算

import os
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision      # 数据库模块

torch.manual_seed(1)    # reproducible

os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 使用编号为1，2号的GPU

# Hyper Parameters
EPOCH = 1           # 训练整批数据多少次, 为了节约时间, 我们只训练一次
BATCH_SIZE = 50
LR = 0.001          # 学习率
DOWNLOAD_MNIST = False  # 如果你已经下载好了mnist数据就写上 False

if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
    # not mnist dir or mnist is empyt dir
    DOWNLOAD_MNIST = True


# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
    root='./mnist/',    # 保存或者提取位置
    train=True,  # this is training data
    transform=torchvision.transforms.ToTensor(),    # 转换 PIL.Image or numpy.ndarray 成
                                                    # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
    download=DOWNLOAD_MNIST,          # 没下载就下载, 下载了就不用再下了
)

test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)


# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)

# 为了节约时间, 我们测试时只测试前2000个

# ############# 这里加cuda ###############
test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000].cuda()/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.targets[:2000].cuda()

# CNN模型搭建
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1,28,28)
            nn.Conv2d(
                in_channels=1,    # 输入高度
                out_channels=16,  # n_filters 输出高度
                kernel_size=5,    # 卷积核大小  filter size
                stride=1,         # 卷积核步进或者说步长，filter movement/step
                padding=2,        # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1

            ), ## output shape (16, 28, 28)
            nn.ReLU(),            # activation
            nn.MaxPool2d(kernel_size=2), #在2*2 空间里向下采样，output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(   # input shape (16,14,14)
            nn.Conv2d(16,32,5,1,2),   # output shape(32,14,14)
            nn.ReLU(),
            nn.MaxPool2d(2),          # output size (32,7,7)
        )
        self.out = nn.Linear(32*7*7,10)  # 全连接层输出10个类

    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output, x


def train_save():
    cnn = CNN()
    # ############# 这里加cuda ###############
    cnn.cuda()###将所有模型参数和缓冲区转移到GPU

    print(cnn)

    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # 优化整一个CNN的参数
    loss_func = nn.CrossEntropyLoss()



    # training and testing
    for epoch in range(EPOCH):
        for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader
            # !!!!!!!! 这里有修改 !!!!!!!!! #
            b_x = b_x.cuda()   # Tensor on GPU
            b_y = b_y.cuda()   # Tensor on GPU

            output = cnn(b_x)[0]            # cnn output
            loss = loss_func(output, b_y)   # cross entropy loss
            optimizer.zero_grad()           # clear gradients for this training step
            loss.backward()                 # backpropagation, compute gradients
            optimizer.step()                # apply gradients

            if step % 50 == 0:
                test_output, last_layer = cnn(test_x)

                # !!!!!!!! 这里有修改 !!!!!!!!! #
                pred_y = torch.max(test_output, 1)[1].cuda().data   # 将操作放去 GPU
                accuracy =  torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.2f' % accuracy)


    torch.save(cnn, './mnist_net/cnn_classification_net.pkl')  # 保存整个网络
    torch.save(cnn.state_dict(), './mnist_net/cnn_classification_net_params.pkl')  # 只保存网络中的参数 (速度快, 占内存少)

    """
    ...
    Epoch:  0 | train loss: 0.0306 | test accuracy: 0.97
    Epoch:  0 | train loss: 0.0147 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0427 | test accuracy: 0.98
    Epoch:  0 | train loss: 0.0078 | test accuracy: 0.98
    """

    # print 10 predictions from test data
    test_output, _ = cnn(test_x[:10])

    # !!!!!!!! 这里有修改 !!!!!!!!! #
    pred_y = torch.max(test_output, 1)[1].cuda().data
    print(pred_y, 'prediction number')
    print(test_y[:10], 'real number')
    """
    [7 2 1 0 4 1 4 9 5 9] prediction number
    [7 2 1 0 4 1 4 9 5 9] real number
    """


# 这种方式将会提取整个神经网络, 网络大的时候可能会比较慢.
def restore_net():
    # restore entire net1 to net2
    net2 = torch.load('./mnist_net/cnn_classification_net.pkl')
    net2.cuda()
    test_output, _ = net2(test_x[:20])
    pred_y = torch.max(test_output, 1)[1].cuda().data
    print(pred_y, 'prediction number')
    print(test_y[:20], 'real number')

if __name__ == '__main__':
    GO_train = False
    if GO_train:
        train_save()
    else:
        restore_net()

查看全文

相关阅读:
Python 包的概念
 EXCEL基础篇（二）
EXCEL基础篇（一）
JavaScript(四)Bom
JavaScript(三)Dom查找、设置标签属性及内容、绑定事件
 erlang并发编程（二）
OTP&ETS
erlang中http请求
 erlang证书加密
 erlang并发编程

原文地址：https://www.cnblogs.com/aimoboshu/p/13846744.html