目录
Pytorch_模型转Caffe(二)解析Pytorch模型*.pth
1. Pytorch模型保存于读取
a. 保存、加载权重
# 模型保存(仅保存权重)
torch.save(model_object.state_dict(), './weights.pth')
# 模型加载(先创建模型,、再导入权重)
model = AlexNet(**kwargs)
model.load_state_dict(torch.load('./weights.pth'))
b.保存、加载网络和权重
# 模型保存(仅保存权重)
torch.save(model_object, './model.pth')
# 模型加载(先创建模型,、再导入权重)
model = torch.load('./model.pth')
2. Pytorch模型结构
Pytorch生成的文件为.pth或.pt
1). summary查看网络整体结构
- 首先安装torchsummary
pip install torchsummary
- 以AelxNet为例,加载预训练模型,查看网络结构
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
print(type(net)) #<class 'torchvision.models.alexnet.AlexNet'>
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = net.to(device)
summary(model, (3,227,227))
"""
# 网络结构
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 56, 56] 23,296
ReLU-2 [-1, 64, 56, 56] 0
MaxPool2d-3 [-1, 64, 27, 27] 0
Conv2d-4 [-1, 192, 27, 27] 307,392
ReLU-5 [-1, 192, 27, 27] 0
MaxPool2d-6 [-1, 192, 13, 13] 0
Conv2d-7 [-1, 384, 13, 13] 663,936
ReLU-8 [-1, 384, 13, 13] 0
Conv2d-9 [-1, 256, 13, 13] 884,992
ReLU-10 [-1, 256, 13, 13] 0
Conv2d-11 [-1, 256, 13, 13] 590,080
ReLU-12 [-1, 256, 13, 13] 0
MaxPool2d-13 [-1, 256, 6, 6] 0
AdaptiveAvgPool2d-14 [-1, 256, 6, 6] 0
Dropout-15 [-1, 9216] 0
Linear-16 [-1, 4096] 37,752,832
ReLU-17 [-1, 4096] 0
Dropout-18 [-1, 4096] 0
Linear-19 [-1, 4096] 16,781,312
ReLU-20 [-1, 4096] 0
Linear-21 [-1, 1000] 4,097,000
================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.59
Forward/backward pass size (MB): 8.49
Params size (MB): 233.08
Estimated Total Size (MB): 242.16
----------------------------------------------------------------
"""
2). net.state_dict()解析权重值
net.state_dict()返回字典,key为layer名称,value为weights与bias
- 只有那些参数可以训练的layer才会被保存到模型的state_dict中
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
print(type(net.state_dict())) #<class 'collections.OrderedDict'>
# 只有那些参数可以训练的layer才会被保存到模型的state_dict中,如卷积层,线性层等等,像什么池化层、BN层这些本身没有参数的层是没有在这个字典中的;
for param_tensor in net.state_dict(): # 字典的遍历默认是遍历 key,所以param_tensor实际上是键值
print(param_tensor,' ',net.state_dict()[param_tensor].size())
"""
features.0.weight torch.Size([64, 3, 11, 11])
features.0.bias torch.Size([64])
features.3.weight torch.Size([192, 64, 5, 5])
features.3.bias torch.Size([192])
features.6.weight torch.Size([384, 192, 3, 3])
features.6.bias torch.Size([384])
features.8.weight torch.Size([256, 384, 3, 3])
features.8.bias torch.Size([256])
features.10.weight torch.Size([256, 256, 3, 3])
features.10.bias torch.Size([256])
classifier.1.weight torch.Size([4096, 9216])
classifier.1.bias torch.Size([4096])
classifier.4.weight torch.Size([4096, 4096])
classifier.4.bias torch.Size([4096])
classifier.6.weight torch.Size([1000, 4096])
classifier.6.bias torch.Size([1000])
"""
3). net.named_parameters()获取layer和weight
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
# 网络参数
for layer in net.named_parameters():
layer_name = layer[0]
layer_weight = layer[1].size()
print(layer_name,' ',layer_weight)
"""
features.0.weight torch.Size([64, 3, 11, 11])
features.0.bias torch.Size([64])
features.3.weight torch.Size([192, 64, 5, 5])
features.3.bias torch.Size([192])
features.6.weight torch.Size([384, 192, 3, 3])
features.6.bias torch.Size([384])
features.8.weight torch.Size([256, 384, 3, 3])
features.8.bias torch.Size([256])
features.10.weight torch.Size([256, 256, 3, 3])
features.10.bias torch.Size([256])
classifier.1.weight torch.Size([4096, 9216])
classifier.1.bias torch.Size([4096])
classifier.4.weight torch.Size([4096, 4096])
classifier.4.bias torch.Size([4096])
classifier.6.weight torch.Size([1000, 4096])
classifier.6.bias torch.Size([1000])
"""
4). net.named_modules()
import torch
from torch.autograd import Variable
from torchvision.models.alexnet import alexnet
from torchsummary import summary
if __name__=='__main__':
name='alexnet'
net=alexnet(True)
for name,layer in net.named_modules():
print(name,'-->',layer)
"""
--> AlexNet(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
(classifier): Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
features --> Sequential(
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
features.0 --> Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
features.1 --> ReLU(inplace=True)
features.2 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.3 --> Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
features.4 --> ReLU(inplace=True)
features.5 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
features.6 --> Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.7 --> ReLU(inplace=True)
features.8 --> Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.9 --> ReLU(inplace=True)
features.10 --> Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
features.11 --> ReLU(inplace=True)
features.12 --> MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
avgpool --> AdaptiveAvgPool2d(output_size=(6, 6))
classifier --> Sequential(
(0): Dropout(p=0.5, inplace=False)
(1): Linear(in_features=9216, out_features=4096, bias=True)
(2): ReLU(inplace=True)
(3): Dropout(p=0.5, inplace=False)
(4): Linear(in_features=4096, out_features=4096, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
classifier.0 --> Dropout(p=0.5, inplace=False)
classifier.1 --> Linear(in_features=9216, out_features=4096, bias=True)
classifier.2 --> ReLU(inplace=True)
classifier.3 --> Dropout(p=0.5, inplace=False)
classifier.4 --> Linear(in_features=4096, out_features=4096, bias=True)
classifier.5 --> ReLU(inplace=True)
classifier.6 --> Linear(in_features=4096, out_features=1000, bias=True)
"""