zoukankan      html  css  js  c++  java
  • 基于深度学习和迁移学习的识花实践

    基于深度学习和迁移学习的识花实践

    transfer_learning.py代码如下

    #coding=utf8
    import os
    import numpy as np
    import tensorflow as tf
    import csv
    
    from sklearn.preprocessing import LabelBinarizer
    
    from tensorflow_vgg import vgg16
    from tensorflow_vgg import utils
    
    
    data_dir = 'flower_photos/'
    contents = os.listdir(data_dir)
    classes = [each for each in contents if os.path.isdir(data_dir + each)]
    
    
    # 首先设置计算batch的值,如果运算平台的内存越大,这个值可以设置得越高
    batch_size = 10
    # 用codes_list来存储特征值
    codes_list = []
    # 用labels来存储花的类别
    labels = []
    # batch数组用来临时存储图片数据
    batch = []
    
    codes = None
    
    with tf.Session() as sess:
        # 构建VGG16模型对象
        vgg = vgg16.Vgg16()
        input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
        with tf.name_scope("content_vgg"):
            # 载入VGG16模型
            vgg.build(input_)
    
        # 对每个不同种类的花分别用VGG16计算特征值
        for each in classes:
            print("Starting {} images".format(each))
            class_path = data_dir + each
            files = os.listdir(class_path)
            for ii, file in enumerate(files, 1):
                # 载入图片并放入batch数组中
                img = utils.load_image(os.path.join(class_path, file))
                batch.append(img.reshape((1, 224, 224, 3)))
                labels.append(each)
    
                # 如果图片数量到了batch_size则开始具体的运算
                if ii % batch_size == 0 or ii == len(files):
                    images = np.concatenate(batch)
    
                    feed_dict = {input_: images}
                    # 计算特征值
                    codes_batch = sess.run(vgg.relu6, feed_dict=feed_dict)
    
                    # 将结果放入到codes数组中
                    if codes is None:
                        codes = codes_batch
                    else:
                        codes = np.concatenate((codes, codes_batch))
    
                    # 清空数组准备下一个batch的计算
                    batch = []
                    print('{} images processed'.format(ii))
    
        with open('codes', 'w') as f:
            codes.tofile(f)
    
        with open('labels', 'w') as f:
            writer = csv.writer(f, delimiter='
    ')
            writer.writerow(labels)

    training.py代码如下

    #coding=utf8
    import numpy as np
    import tensorflow as tf
    
    from sklearn.preprocessing import LabelBinarizer
    from sklearn.model_selection import StratifiedShuffleSplit
    
    codes = []
    labels = []
    
    codes = np.fromfile("codes", dtype=np.int32)
    
    codes.shape = 3670, -1 
    print len(codes)
    
    
    with open('labels', 'r') as f:
        for line in f:
            line = line.strip()
            labels.append(line)
    
    print len(labels)
    
    
    lb = LabelBinarizer()
    lb.fit(labels)
    labels_vecs = lb.transform(labels)
    
    ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
    
    train_idx, val_idx = next(ss.split(codes, labels))
    
    half_val_len = int(len(val_idx)/2)
    val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]
    
    train_x, train_y = codes[train_idx], labels_vecs[train_idx]
    val_x, val_y = codes[val_idx], labels_vecs[val_idx]
    test_x, test_y = codes[test_idx], labels_vecs[test_idx]
    
    print("Train shapes (x, y):", train_x.shape, train_y.shape)
    print("Validation shapes (x, y):", val_x.shape, val_y.shape)
    print("Test shapes (x, y):", test_x.shape, test_y.shape)
    
    # 输入数据的维度
    inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]])
    # 标签数据的维度
    labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])
    
    # 加入一个256维的全连接的层
    fc = tf.contrib.layers.fully_connected(inputs_, 256)
    
    # 加入一个5维的全连接层
    logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)
    
    # 计算cross entropy值
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)
    
    # 计算损失函数
    cost = tf.reduce_mean(cross_entropy)
    
    # 采用用得最广泛的AdamOptimizer优化器
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    # 得到最后的预测分布
    predicted = tf.nn.softmax(logits)
    
    # 计算准确度
    correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    
    def get_batches(x, y, n_batches=10):
        """ 这是一个生成器函数,按照n_batches的大小将数据划分了小块 """
        batch_size = len(x)//n_batches
        
        for ii in range(0, n_batches*batch_size, batch_size):
            # 如果不是最后一个batch,那么这个batch中应该有batch_size个数据
            if ii != (n_batches-1)*batch_size:
                X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size] 
            # 否则的话,那剩余的不够batch_size的数据都凑入到一个batch中
            else:
                X, Y = x[ii:], y[ii:]
            # 生成器语法,返回X和Y
            yield X, Y
    
    
    # 运行多少轮次
    epochs = 20
    # 统计训练效果的频率
    iteration = 0
    # 保存模型的保存器
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(epochs):
            for x, y in get_batches(train_x, train_y):
                feed = {inputs_: x,
                        labels_: y}
                # 训练模型
                loss, _ = sess.run([cost, optimizer], feed_dict=feed)
                print("Epoch: {}/{}".format(e+1, epochs),
                      "Iteration: {}".format(iteration),
                      "Training loss: {:.5f}".format(loss))
                iteration += 1
                
                if iteration % 5 == 0:
                    feed = {inputs_: val_x,
                            labels_: val_y}
                    val_acc = sess.run(accuracy, feed_dict=feed)
                    # 输出用验证机验证训练进度
                    print("Epoch: {}/{}".format(e, epochs),
                          "Iteration: {}".format(iteration),
                          "Validation Acc: {:.4f}".format(val_acc))
        # 保存模型
        saver.save(sess, "checkpoints/flowers.ckpt")
    
    
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
        
        feed = {inputs_: test_x,
                labels_: test_y}
        test_acc = sess.run(accuracy, feed_dict=feed)
        print("Test accuracy: {:.4f}".format(test_acc))

    结果:

  • 相关阅读:
    【转】C++ 实现线程池
    【转】C++ 实现reactor 模式
    【转】C++ 单例模式
    高并发的内存池——TLS MEMMORY POOL
    爬取某东商品数据
    查看解放号的一些Python岗
    查看前程无忧Python岗及一些可视化
    爬某牙小姐姐视频
    爬取某云音乐热歌榜
    键盘及鼠标记录程序
  • 原文地址:https://www.cnblogs.com/TMatrix52/p/9664209.html
Copyright © 2011-2022 走看看