zoukankan html css js c++ java

Pytorch 构建迁移学习网络——以VGG16，ResNet18和MobileNet V2模型为例

本次迁移学习训练的是865种鱼的分类，使用的是WildFish数据集:

百度云盘链接：https://pan.baidu.com/s/1_kHg87LghgWT9_mVawGdYQ

提取码：a9pl

导入一些包：

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torchvision import datasets, models, transforms
import os
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset

把数据集分成Training和Testing两个部分：

# 把数据变成可读取的数据 
# 数据集中提供了Training和Testing两部分，但是以5：5分配，这里使用8:2的方式重新分配
tb1 = pd.read_table('./train.txt', sep=' ', names=['path', 'label'])
tb1['path'] = r'E:datawildfish\' + tb1['path']
tb2 = pd.read_table('./val.txt', sep=' ', names=['path', 'label'])
tb2['path'] = r'E:datawildfish\' + tb2['path']
# 把两个数据集合并成一个数据集
tb = pd.concat([tb1, tb2], sort=True).reset_index(drop=True)
tb = tb.loc[:, ['path', 'label']]
# 序号的尾号为9和0记为Testing，其余的记为Training
train_rows = [i for i in range(tb.shape[0]) if (i % 10 != 9 and i % 10 != 0)]
test_rows = [i for i in range(tb.shape[0]) if (i % 10 == 9 or i % 10 == 0)]
train_data = tb.iloc[train_rows]
test_data = tb.iloc[test_rows]
# 存储数据集
train_data.to_csv(r'./train_path.txt', sep=' ', header=None, index=False)
test_data.to_csv(r'./test_path.txt', sep=' ', header=None, index=False)

pytorch最后可读取的图片名称（以绝对路径显示）和类别名称如下图所示：

定义一些超参数：

# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
EPOCH = 10   #遍历数据集次数
pre_epoch = 0  # 定义已经遍历数据集的次数
BATCH_SIZE = 128   #批处理尺寸(batch_size)
LR = 0.0001        #学习率

对数据做预处理

# 准备数据集并预处理
transform_train = transforms.Compose([
    transforms.Resize((150, 150)),
    
    transforms.RandomHorizontalFlip(0.5),  # 图像一半的概率翻转，一半的概率不翻转
    transforms.RandomVerticalFlip(0.5),  # 竖直翻转
    transforms.RandomRotation(30),
    transforms.RandomCrop(128, padding=4),
#     transforms.ColorJitter(brightness=0.5),
#     transforms.ColorJitter(contrast=0),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #R,G,B每层的归一化用到的均值和方差
])

transform_test = transforms.Compose([
    transforms.Resize((128, 128)), # 调整图像大小
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

将数据放到TrainLoader和TestLoader中

class MyDataset(Dataset):
    def __init__(self, txt_path, transform = None, target_transform = None):
        fh = open(txt_path, 'r', encoding='utf-8')
        imgs = []
        for line in fh:
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))
            self.imgs = imgs 
            self.transform = transform
            self.target_transform = target_transform
    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB') 
        if self.transform is not None:
            img = self.transform(img) 
        return img, label
    def __len__(self):
        return len(self.imgs)

train_datasets = MyDataset(r'./train_path.txt', transform=transform_train)
test_datasets = MyDataset(r'./test_path.txt', transform=transform_test)
# 由于我使用的是Win10系统，num_workers只能设置为0，其他系统可以调大此参数，提高训练速度
trainloader = torch.utils.data.DataLoader(train_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(test_datasets, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

查看图片的代码，不执行不会影响后续的训练

# 查看图片
to_pil_image = transforms.ToPILImage()
cnt = 0
for image,label in trainloader:
    if cnt>=3: # 只显示3张图片
        break
    print(label) # 显示label
 
    img = image[0] # plt.imshow()只能接受3-D Tensor，所以也要用image[0]消去batch那一维
    img = img.numpy() # FloatTensor转为ndarray
    img = np.transpose(img, (1,2,0)) # 把channel那一维放到最后
 
    # 显示图片
    plt.imshow(img)
    plt.show()
    cnt += 1

调用VGG16的预训练模型

class VGGNet(nn.Module):
    def __init__(self, num_classes=685):   # num_classes，此处为 二分类值为2
        super(VGGNet, self).__init__()
        net = models.vgg16(pretrained=True)   # 从预训练模型加载VGG16网络参数
        net.classifier = nn.Sequential()  # 将分类层置空，下面将改变我们的分类层
        self.features = net  # 保留VGG16的特征层
        self.classifier = nn.Sequential(    # 定义自己的分类层
                nn.Linear(512 * 7 * 7, 1024),  #512 * 7 * 7不能改变 ，由VGG16网络决定的，第二个参数为神经元个数可以微调
                nn.ReLU(True),
                nn.Dropout(0.3),
                nn.Linear(1024, 1024),
                nn.ReLU(True),
                nn.Dropout(0.3),
                nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x) # 预训练提供的提取特征的部分
        x = x.view(x.size(0), -1)
        x = self.classifier(x) # 自定义的分类部分
        return x
net = VGGNet().to(device)

调用ResNet18的预训练模型

class ResNet(nn.Module):
    def __init__(self, num_classes=685):   # num_classes，此处为 二分类值为2
        super(ResNet, self).__init__()
        net = models.resnet18(pretrained=True)   # 从预训练模型加载VGG16网络参数
        net.classifier = nn.Sequential()  # 将分类层置空，下面将改变我们的分类层
        self.features = net  # 保留VGG16的特征层
        self.classifier = nn.Sequential(    # 定义自己的分类层
                nn.Linear(1000, 1000),  #1000不能改变 ，由VGG16网络决定的，第二个参数为神经元个数可以微调
                nn.ReLU(True),
                nn.Dropout(0.5),
#                 nn.Linear(1024, 1024),
#                 nn.ReLU(True),
#                 nn.Dropout(0.3),
                nn.Linear(1000, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
net = ResNet().to(device)

MobileNet V2的预训练模型

class MobileNet(nn.Module):
    def __init__(self, num_classes=685):   # num_classes，此处为 二分类值为2
        super(MobileNet, self).__init__()
        net = models.mobilenet_v2(pretrained=True)   # 从预训练模型加载VGG16网络参数
        net.classifier = nn.Sequential()  # 将分类层置空，下面将改变我们的分类层
        self.features = net  # 保留VGG16的特征层
        self.classifier = nn.Sequential(    # 定义自己的分类层
                nn.Linear(1280, 1000),  #512 * 7 * 7不能改变 ，由VGG16网络决定的，第二个参数为神经元个数可以微调
                nn.ReLU(True),
                nn.Dropout(0.5),
#                 nn.Linear(1024, 1024),
#                 nn.ReLU(True),
#                 nn.Dropout(0.3),
                nn.Linear(1000, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
net = MobileNet().to(device)

选择优化器和Loss

optimizer = optim.Adam(net.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss()
criterion.to(device=device)

定义两个函数，一个可以冻住features层，只训练FC层，另一个把features层解冻，训练所有参数

from collections.abc import Iterable
def set_freeze_by_names(model, layer_names, freeze=True):
    if not isinstance(layer_names, Iterable):
        layer_names = [layer_names]
    for name, child in model.named_children():
        if name not in layer_names:
            continue
        for param in child.parameters():
            param.requires_grad = not freeze

def freeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, True)

def unfreeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, False)

# 冻结 features层
freeze_by_names(net, ('features'))

# 解冻features层
unfreeze_by_names(net, ('features'))

定义两个数组，为了存储预测的y值和真实的y值

y_predict = []
y_true = []

# 我不导入这个包会报错，
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

训练过程

# 训练
print("Start Training!")  # 定义遍历数据集的次数
for epoch in range(pre_epoch, EPOCH):
    print('
Epoch: %d' % (epoch + 1))
    net.train()
    sum_loss = 0.0
    correct = 0.0
    total = 0.0
    for i, data in enumerate(trainloader, 0):
        # 准备数据
        length = len(trainloader)
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # forward + backward
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # 每训练1个batch打印一次loss和准确率
        sum_loss += loss.item()
        # 使用Top5分类
        maxk = max((1,5))
        label_resize = labels.view(-1, 1)
        _, predicted = outputs.topk(maxk, 1, True, True)
        total += labels.size(0)
        correct += torch.eq(predicted, label_resize).cpu().sum().float().item()
        print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
              % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
    
    # 每训练完一个epoch测试一下准确率
    print("Waiting Test!")
    with torch.no_grad():
        correct = 0
        total = 0
        for data in testloader:
            net.eval()
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            # 取得分最高的那个类 (outputs.data的索引号)
            
            maxk = max((1,5))
            label_resize = labels.view(-1, 1)
            _, predicted = outputs.topk(maxk, 1, True, True)
            total += labels.size(0)
            correct += torch.eq(predicted, label_resize).cpu().sum().float().item()
            
            y_predict.append(predicted)
            y_true.append(labels)
        print('测试分类准确率为：%.3f%%' % (100 * correct / total))
        acc = 100. * correct / total
print("Training Finished, TotalEPOCH=%d" % EPOCH)

保存模型

torch.save(net, './model/mobileNet freeze.pth')

加载模型

net = torch.load('./model/VGG16-2 freeze.pth')

训练过程

我是先把特征层冻住训练10个epoch，再解冻训练20个epoch，各个模型在Training上的准确率基本在98%左右，在Testing上的准确率在88%左右。

查看全文

相关阅读:
python数据分析学习(8)数据清洗与准备(2)
python机器学习(1)
python数据分析学习(7)数据清洗与准备(1)
python数据分析学习(6)输入载入，存储及文件格式(1)文本格式数据的读写
 python+opencv实现机器视觉基础技术(边缘提取，图像滤波，边缘检测算子，投影，车牌字符分割)
数据结构系统学习(2)算法
 数据结构系统学习(1)数据类型和数据结构的概念
 python数据分析学习(5)pandas描述性统计的概述与计算
 书单
 经济学人精读笔记19：凭一己之力引爆韩国疫情的邪教，究竟为何物

原文地址：https://www.cnblogs.com/abc23/p/12743085.html