zoukankan      html  css  js  c++  java
  • MxNet 迁移学习实现深度学习分类

    利用MxNet实现图像分类任务

    在这里插入图片描述

    这篇文章将利用MxNet以及其前端gluon 实现一个完整的图像分类任务,其中主要包括以下几个方面:

    • 图像I/O
    • 搭建网络
    • 进行训练
    • 验证算法
    • 输出结果
    定义辅助函数
    损失函数
    验证
    数据I/O
    定义网络模型
    训练
    测试
    生成结果

    1. 训练数据I/O

    将处理好的训练数据读入,进行训练。

    训练数据的格式基本按照一个子类一个子文件夹的形式保持,具体可以参考MXNet的数据I/O

    1.1 程序的第一步,首先导入相关的包
    #import some packages
    import sys
    import collections
    import datetime			#用于计时
    import gluonbook as gb		#用于导入一些功能函数
    import math
    import numpy as np
    import mxnet as mx    		#mxnet
    from mxnet import autograd, gluon, init, nd, image    #导入自动梯度,gluon前端,图像等模块
    from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn   #导入模型相关模块
    import os
    import shutil     		#用于预处理复制文件
    import zipfile
    import matplotlib.pyplot as plt	#绘图工具导入
    

    在这里插入图片描述

    1.2 随后定义精度计算函数、图像增广函数等辅助函数
    # 图像增广和辅助函数
    # 计算 Average Precision
    def calculate_ap(labels, outputs):
        cnt = 0
        ap = 0.
        for label, output in zip(labels, outputs):
            for lb, op in zip(label.asnumpy().astype(np.int),
                              output.asnumpy()):
                op_argsort = np.argsort(op)[::-1]    #输出排序后的index,最大概率的值对应的index
                lb_int = int(lb)    #标签对应的整数
                ap += 1.0 / (1+list(op_argsort).index(lb_int))    #精度计算 正确的个数
                cnt += 1
        return ((ap, cnt))
    
    # 训练集图片增广
    def transform_train(data, label):
        im = data.astype('float32') / 255		#归并到0~1之间
        #图像增强的函数组定义,并利用ImageNet的预训练均值、方差归一化输入图像
        auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
                                        rand_crop=True, rand_mirror=True,
                                        mean = np.array([0.485, 0.456, 0.406]),
                                        std = np.array([0.229, 0.224, 0.225]))			
        for aug in auglist:
            im = aug(im)
        im = nd.transpose(im, (2,0,1))    #改变
        return (im, nd.array([label]).asscalar())
    
    # 验证集图片增广,没有随机裁剪和翻转
    def transform_val(data, label):
        im = data.astype('float32') / 255
        auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=256,
                                        mean = np.array([0.485, 0.456, 0.406]),
                                        std = np.array([0.229, 0.224, 0.225]))
        for aug in auglist:
            im = aug(im)
        im = nd.transpose(im, (2,0,1))    #改变格式为 channel width height
        return (im, nd.array([label]).asscalar())
    
    # 在验证集上预测并评估
    def validate(net, val_data, ctx):
        metric = mx.metric.Accuracy()
        L = gluon.loss.SoftmaxCrossEntropyLoss()
        AP = 0.
        AP_cnt = 0
        val_loss = 0
        for i, batch in enumerate(val_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, 
                                              batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, 
                                               batch_axis=0, even_split=False)
            outputs = [net(X) for X in data]
            metric.update(label, outputs)
            loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            val_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)    #平均损失
            ap, cnt = calculate_ap(label, outputs)
            AP += ap
            AP_cnt += cnt    #精度也要求平均
        _, val_acc = metric.get()
        return ((val_acc, AP / AP_cnt, val_loss / len(val_data)))
    

    在这里插入图片描述

    1.3 读取训练和验证数据

    这时候可以利用gluon的内置函数来对数据进行读取了,只需要输入对应数据的文件夹即可,参考MXNet I/O

    #读取数据文件
    train_set = gdata.vision.ImageFolderDataset('./train_dis/',flag=1)
    valid_set = gdata.vision.ImageFolderDataset('./valid_dis/',flag=1)
    #check data classes
    print(train_set)  #check数据的长度是否正确,应为训练图像总数量
    print(train_set.synsets)  #also has items attributes,现实分类别是否正确,应为类别数目
    print(valid_set)
    print(valid_set.synsets)  #also has items attributes
    
    <mxnet.gluon.data.vision.datasets.ImageFolderDataset object at 0x7fb3d6e06710>
    ['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '7', '8', '9']
    <mxnet.gluon.data.vision.datasets.ImageFolderDataset object at 0x7fb3d6e06668>
    ['0', '1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '6', '60', '7', '8', '9']
    

    得到输入序列后,将图像读入迭代器中,根据显存设置批量的大小。

    #data into iter and realized argumentation
    batch_size = 64  #32--2821M  could be 64
    
    train_iter = gdata.DataLoader(train_set.transform(transform_train),
                                  batch_size, shuffle=True, last_batch='keep', num_workers=4)
    valid_iter = gdata.DataLoader(valid_set.transform(transform_val),
                                  batch_size, shuffle=True, last_batch='keep', num_workers=4)
    

    读入后check迭代器的数据,并显示目测

    # check the data set in iter
    print("trainiter lenght is: %d"%len(train_iter))
    import matplotlib.pyplot as plt
    for imgs, labels in train_iter:
        print(labels)    #打印label 对应类别label
        print(imgs.shape)    #查看batch图像的维度
        break    #读入一个batch
    #show images
    nor_parms = [[0.485, 0.456, 0.406],[0.229, 0.224, 0.225]]
    #_,figs = plt.subplots(8,4,figsize=(8,4))
    for i in range(8):
        for j in range(4):
            x = nd.transpose(imgs[i*4+j,:,:,:],(1,2,0)).asnumpy()
            print(x.shape,type(x))    #查看batch中图像的维度和类型
            #x[:,:,0]*nor_parms[0][0]+nor_parms[1][0]
            #x[:,:,1]*nor_parms[0][1]+nor_parms[1][1]
            #x[:,:,2]*nor_parms[0][2]+nor_parms[1][2]
            plt.imshow(x)
            plt.show()
            break
    
    trainiter lenght is: 512    #总共有512个batch,每个batch有64个训练数据
    
    Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
    
    [35.  0. 31. 19. 38. 33. 35. 33. 19. 25. 16. 26. 36. 52. 18. 16. 27. 23.
     19.  4. 19. 38. 38. 11. 41. 36. 22. 36. 29. 57. 26. 55. 18. 55. 55. 16.
     27. 26. 55. 10. 19. 21. 23. 19. 50. 56. 31. 14. 20. 19.  8. 54. 57.  8.
     52. 19. 56. 57. 17. 42. 18.  0. 23. 55.]
    <NDArray 64 @cpu_shared(0)>
    (64, 3, 224, 224)
    (224, 224, 3) <class 'numpy.ndarray'>
    

    在这里插入图片描述

    2.定义模型

    这里主要使用迁移学习的方式,利用预训练模型抽取图像的基本特征,而后只需要训练最后的输出层来进行分类。

    #define the net work by pre-train
    def get_net(ctx):
        resnet = model_zoo.vision.resnet50_v2(pretrained=True)  #ctx  使用resnet_50作为基本网络抽取特征
        resnet.output_new = nn.HybridSequential(prefix='')     #output is the origin  得到特征,新定义一个输出
        #add two fcn for finetune
        resnet.output_new.add(nn.Dense(256,activation = 'relu'))   #在模型基础上,定义最后两个全连接层
        resnet.output_new.add(nn.Dense(61))
        #initialize
        resnet.output_new.initialize(init.Xavier(),ctx=ctx)  #for fintune
        resnet.collect_params().reset_ctx(ctx)           #for whole net
        return resnet    
    

    定义损失函数,这里主要使用分类的softmax交叉熵来作为损失。

    #for loss
    loss = gloss.SoftmaxCrossEntropyLoss()    #分类损失交叉熵
    def get_loss(data,net,ctx):
        l=0.0  #loss
        for X,y in data:
            y = y.as_in_context(ctx)
            #计算预训练模型输出的特征
            out_features = net.features(X.as_in_context(ctx))
            outputs = net.output_new(out_features)    #final output
            l += loss(outputs,y).mean().asscalar()    #loss for the process
        return l/len(data)
    
    2.1定义训练过程

    完成了以上的准备工作,读入了数据、定义好了网络和损失,我们可以开始进行训练了,训练函数定义如下,输入为网络模型、数据、训练epochs、学习率、衰减等:

    #def trainning  process, trainer, epochscircles, lossback,  valide
    def train(net,train_iter,valid_iter,num_epochs, lr, wd, ctx, lr_period, lr_decay):
        trainer = gluon.Trainer(net.output_new.collect_params(), 'sgd', 
                               {'learning_rate':lr, 'momentum':0.9, 'wd': wd})
        plot_loss = []  #plot loss
        tic = datetime.datetime.now()
        print('Traing is begining, please waiting......')
        for epoch in range(num_epochs):
            train_l = 0.0    #存储训练loss
            counter = 0    #训练batch周期计数器
            #if epoch >0 and epoch %lr_period==0:    #every period step update lr
            trainer.set_learning_rate(trainer.learning_rate*lr_decay)    #every steps updata lr
            #print("There are %d data could train network"%len(train_iter))
            for X,y in train_iter:      #X~32(batch)*1024(iter)= 32768
                #output for process reminding
                counter +=1
                if counter % 256 ==0:
                    print('processd %d images'%(counter*batch_size))    #一定批量就显示处理过程
                #output finished
                
                y = y.astype('float32').as_in_context(ctx)
                #feature
                out_features = net.features(X.as_in_context(ctx))    #预训练直接前传得到特征,未来这一步可以一次性做
                #partly training fineturning
                with autograd.record():
                    #features to output, just use features as input
                    outputs = net.output_new(out_features)    #这里只bp最后两层,只训练最后新定义的部分
                    l = loss(outputs, y)
                l.backward()
                
                #for next batch
                trainer.step(batch_size)
                train_l += l.mean().asscalar()
            #log time into
            toc = datetime.datetime.now()
            h, remainder = divmod((toc - tic).seconds, 3600)
            m, s = divmod(remainder, 60)
            time_s = "time %02d:%02d:%02d" % (h, m, s)
            #validata
            if valid_iter is not None:   #验证数据,验证训练效果
                valid_loss = get_loss(valid_iter, net, ctx)
                epoch_s = ("epoch %d, train loss is %f, valid loss is %f :D "
                           %(epoch+1, train_l/len(train_iter),valid_loss))
            else:
                epoch_s = ("epoch %d, train loss is %f  :D"
                           %(epoch+1, train_l/len(train_iter)))
            tic = toc
            print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
            #plot loss
            plot_loss.append(train_l/len(train_iter))
            plt.plot(plot_loss)    #将损失优化结果保存到图里
            plt.savefig("./training_loss.png")
    

    在这里插入图片描述

    2.2 开始训练
    ctx = gb.try_gpu();num_epochs = 1000;lr = 0.01;wd = 1e-4;lr_period = 10;lr_decay = 0.99;
    net = get_net(ctx)    #将网络和数据定义到gpu上
    train(net,train_iter,valid_iter,num_epochs, lr, wd, ctx, lr_period, lr_decay)    #训练
    net.output_new.collect_params().save('./output_new_2_1000.params')       #训练结束后保存参数
    #net.output_new.save_params('./output_new_50.params')
    
    Traing is begining, please waiting......
    processd xxxxx images
    processd xxxxx images
    epoch 1, train loss is 1.234988, valid loss is 0.776764 :Dtime 00:04:10, lr 0.0099
    

    在这里插入图片描述

    3.测试

    在训练完成得到模型后,我们需要对数据进行测试。同样需要读入数据,并利用网络进行分类。

    #prepaer data
    test_set = gdata.vision.ImageFolderDataset('./test_dis/',flag=1)
    print("There are %d test imgs"%len(test_set))
    
    There are xxxx test imgs
    

    定义图像读入函数

    def plot_image(img_path):
        with open(img_path, 'rb') as f:
            img = image.imdecode(f.read())    #读入输入
        #plt.imshow(img.asnumpy())
        return img
    

    接下来就是测试过程了:

    #predict process
    preds = []
    count_p=0
    for img_path,label in test_set.items:    #将加载列表中每一张测试图进行分类
        img = plot_image(img_path)
        data, _ = transform_val(img, 0)
        data = data.expand_dims(axis=0)
        
        #plt.imshow(img.asnumpy())
        #plt.show()
        #print(img_path)
        #break
        
        # 计算预训练模型输出层的输入,即特征。
        output_features = net.features(data.as_in_context(mx.gpu()))
        # 将特征作为我们定义的输出网络的输入,计算输出。
        output = nd.softmax(net.output_new(output_features))
    
        preds.extend(output.asnumpy())
        count_p +=1
        #print(count_p)
        if count_p%100==0:
            print("processed %d imgs"%count_p)
    
    processed 100 imgs
    

    在这里插入图片描述


    可以根据需要将生成的预测结果preds保存为json文件:

    # use the tese_set name and predict results
    with open('submission.json', 'w') as f:
        f.write("[")
        for i in range(len(preds)):
            if i==len(preds)-1:
                f.write("{"+""image_id": "+"""+test_set.items[i][0].split('/')[-1]+"""+','+""xxxx_class":"+str(preds[i].argmax())+'}')
            else:
                f.write("{"+""image_id": "+"""+test_set.items[i][0].split('/')[-1]+"""+','+""xxxx_class":"+str(preds[i].argmax())+'}'+',')
        f.write("]")
    

    最后检查生成的数据长度,是否和测试集数据长度相同,然后就大功告成啦~~~~

    #check format
    import json
    user_result_list = json.load(open('./submission.json', encoding='utf-8'))
    len(user_result_list)
    

    代码:
    1.gluon
    2.论坛
    3.代码1代码2
    4. Logo from zcool.com

  • 相关阅读:
    PS转换图片——我教你
    通过Ajax——异步获取相关问题解答
    Spring的线程安全
    Spring MVC的工作机制
    Annotation的语法和使用
    Spring Bean的生命周期
    浅谈Spring
    Spring的事务管理
    行为型模式
    结构型模式
  • 原文地址:https://www.cnblogs.com/Tom-Ren/p/9897808.html
Copyright © 2011-2022 走看看