zoukankan      html  css  js  c++  java
  • Keras之 cifar10数据集使用keras generator读取、模型训练、预测

      本文将介绍:

      使用keras实现resnet50模型

      实现迁移学习-finetune

      一,下载kaggle-cifar10数据

      下载dataset到本地目录cifar10中

      二,实现tensorflow动态按需分配GPU

      import matplotlib as mpl

      import matplotlib.pyplot as plt

      import numpy as np

      import os

      import pandas as pd

      import sklearn

      import sys

      import tensorflow as tf

      import time

      from tensorflow import keras

      print(tf.__version__)

      print(sys.version_info)

      for module in mpl, np, pd, sklearn, tf, keras:

      print(module.__name__, module.__version__)

      # 一,实现tensorflow动态按需分配GPU

      from tensorflow.compat.v1 import ConfigProto

      from tensorflow.compat.v1 import InteractiveSession

      config = ConfigProto()

      config.gpu_options.allow_growth = True

      session = InteractiveSession(config=config)

      三,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系

      # 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系

      class_names = [

      'airplane',

      'automobile',

      'bird',

      'cat',

      'deer',

      'dog',

      'frog',

      'horse',

      'ship',

      'truck',

      ]

      train_lables_file = './cifar10/trainLabels.csv'

      test_csv_file = './cifar10/sampleSubmission.csv'

      train_folder = './cifar10/train/'

      test_folder = './cifar10/test'

      def parse_csv_file(filepath, folder):

      """Parses csv files into (filename(path), label) format"""

      results = []

      with open(filepath, 'r') as f:

      lines = f.readlines()[1:]

      for line in lines:

      image_id, label_str = line.strip(' ').split(',')

      image_full_path = os.path.join(folder, image_id + '.png')

      results.append((image_full_path, label_str))

      return results

      train_labels_info = parse_csv_file(train_lables_file, train_folder)

      test_csv_info = parse_csv_file(test_csv_file, test_folder)

      import pprint

      pprint.pprint(train_labels_info[0:5])

      pprint.pprint(test_csv_info[0:5])

      print(len(train_labels_info), len(test_csv_info))

      四,将对应关系转换为dataframe类型

      # 四,将对应关系转换为dataframe类型

      # train_df = pd.DataFrame(train_labels_info)

      train_df = pd.DataFrame(train_labels_info[0:45000])

      valid_df = pd.DataFrame(train_labels_info[45000:])

      test_df = pd.DataFrame(test_csv_info)

      train_df.columns = ['filepath', 'class']

      valid_df.columns = ['filepath', 'class']

      test_df.columns = ['filepath', 'class']

      print(train_df.head())

      print(valid_df.head())

      print(test_df.head())

      五,使用ImageDataGenerator加载数据并做数据增强

      # 五,使用ImageDataGenerator加载数据并做数据增强

      height = 32

      width = 32

      channels = 3

      batch_size = 32

      num_classes = 10

      train_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255,

      rotation_range = 40,

      width_shift_range = 0.2,

      height_shift_range = 0.2,

      shear_range = 0.2,

      zoom_range = 0.2,

      horizontal_flip = True,

      fill_mode = 'nearest',

      )

      train_generator = train_datagen.flow_from_dataframe(

      train_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = True,

      class_mode = 'sparse',

      )

      valid_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255)

      valid_generator = valid_datagen.flow_from_dataframe(

      valid_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = False,

      class_mode = "sparse")

      train_num = train_generator.samples

      valid_num = valid_generator.samples

      print(train_num, valid_num)

      六,查看generator数据

      for i in range(2):

      x, y = train_generator.next()

      print(x.shape, y.shape)

      print(y)

      七,构建模型

      # 七,构建模型

      model = keras.models.Sequential([

      keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

      activation='relu',

      input_shape=[width, height, channels]),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Flatten(),

      keras.layers.Dense(512, activation='relu'),

      keras.layers.Dense(num_classes, activation='softmax'),

      ])

      model.compile(loss="sparse_categorical_crossentropy",

      optimizer="adam", metrics=['accuracy'])

      model.summary()

      八,训练模型

      # 八,训练模型

      epochs = 20

      history = model.fit_generator(train_generator,

      steps_per_epoch = train_num // batch_size,

      epochs = epochs,

      validation_data = valid_generator,

      validation_steps = valid_num // batch_size)

      九,打印模型训练曲线

      # 九,打印模型训练曲线

      def plot_learning_curves(history, label, epcohs, min_value, max_value):

      data = {}

      data[label] = history.history[label]

      data['val_'+label] = history.history['val_'+label]

      pd.DataFrame(data).plot(figsize=(8, 5))

      plt.grid(True)

      plt.axis([0, epochs, min_value, max_value])

      plt.show()

      plot_learning_curves(history, 'accuracy', epochs, 0, 1)

      plot_learning_curves(history, 'loss', epochs, 0, 2)

      十,使用keras.ImageDataGenerator加载测试集数据

      # 十,使用keras.ImageDataGenerator加载测试集数据

      test_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255)

      test_generator = valid_datagen.flow_from_dataframe(

      test_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = False,

      class_mode = "sparse")

      test_num = test_generator.samples

      print(test_num)

      十一,使用测试集预测模型结果

      # 十一,使用测试集预测模型结果

      test_predict = model.predict_generator(test_generator,

      workers = 10,

      use_multiprocessing = True)

      1,测试集预测模型结果维度形状

      print(test_predict.shape)

      2,抽取前5条数据查看

      print(test_predict[0:5])

      3,取结果数值为最大的为预测结果

      test_predict_class_indices = np.argmax(test_predict, axis = 1)

      4,取前5条结果查看

      print(test_predict_class_indices[0:5])

      5,将结果转化为特征名称

      test_predict_class = [class_names[index]

      for index in test_predict_class_indices]

      查看前五条结果

      print(test_predict_class[0:5])

      十二,将预测结果写入到submission.csv文件中,并在kaggle上提交

      # 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交

      def generate_submissions(filename, predict_class):

      with open(filename, 'w') as f:

      f.write('id,label ')

      for i in range(len(predict_class)):

      f.write('%d,%s ' % (i+1, predict_class[i]))

      output_file = "./cifar10/submission.csv"

      generate_submissions(output_file, test_predict_class)

      十三,总结代码

      #!/usr/bin/env python3

      # -*- coding: utf-8 -*-

      import matplotlib as mpl

      import matplotlib.pyplot as plt

      import numpy as np

      import os

      import pandas as pd

      import sklearn

      import sys

      import tensorflow as tf

      import time

      from tensorflow import keras

      print(tf.__version__)

      print(sys.version_info)

      for module in mpl, np, pd, sklearn, tf, keras:

      print(module.__name__, module.__version__)

      # 一,实现tensorflow动态按需分配GPU

      from tensorflow.compat.v1 import ConfigProto

      from tensorflow.compat.v1 import InteractiveSession

      config = ConfigProto()

      config.gpu_options.allow_growth = True

      session = InteractiveSession(config=config)

      # 二,读取训练集、测试集的csv文件数据和训练集、测试集数据对应关系

      class_names = [

      'airplane',

      'automobile',

      'bird',

      'cat',

      'deer',

      'dog',

      'frog',

      'horse',

      'ship',

      'truck',

      ]

      train_lables_file = './cifar10/trainLabels.csv'

      test_csv_file = './cifar10/sampleSubmission.csv'

      train_folder = './cifar10/train/'

      test_folder = './cifar10/test'

      def parse_csv_file(filepath, folder):

      """Parses csv files into (filename(path), label) format"""

      results = []

      with open(filepath, 'r') as f:

      lines = f.readlines()[1:]

      for line in lines:

      image_id, label_str = line.strip(' ').split(',')

      image_full_path = os.path.join(folder, image_id + '.png')

      results.append((image_full_path, label_str))

      return results

      train_labels_info = parse_csv_file(train_lables_file, train_folder)

      test_csv_info = parse_csv_file(test_csv_file, test_folder)

      import pprint

      pprint.pprint(train_labels_info[0:5])

      pprint.pprint(test_csv_info[0:5])

      print(len(train_labels_info), len(test_csv_info))

      # 四,将对应关系转换为dataframe类型

      # train_df = pd.DataFrame(train_labels_info)

      train_df = pd.DataFrame(train_labels_info[0:45000])

      valid_df = pd.DataFrame(train_labels_info[45000:])

      test_df = pd.DataFrame(test_csv_info)

      train_df.columns = ['filepath', 'class']

      valid_df.columns = ['filepath', 'class']

      test_df.columns = ['filepath', 'class']

      print(train_df.head())

      print(valid_df.head())

      print(test_df.head())

      # 五,使用ImageDataGenerator加载数据并做数据增强

      height = 32

      width = 32

      channels = 3

      batch_size = 32

      num_classes = 10

      train_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255,

      rotation_range = 40,

      width_shift_range = 0.2,

      height_shift_range = 0.2,

      shear_range = 0.2,

      zoom_range = 0.2,

      horizontal_flip = True,

      fill_mode = 'nearest',

      )

      train_generator = train_datagen.flow_from_dataframe(

      train_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = True,

      class_mode = 'sparse',

      )枣庄妇科医院 http://mobile.zzdffkyy.com/

      valid_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255)

      valid_generator = valid_datagen.flow_from_dataframe(

      valid_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = False,

      class_mode = "sparse")

      train_num = train_generator.samples

      valid_num = valid_generator.samples

      print(train_num, valid_num)

      # 六,查看generator数据

      for i in range(2):

      x, y = train_generator.next()

      print(x.shape, y.shape)

      print(y)

      # 七,构建模型

      model = keras.models.Sequential([

      keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

      activation='relu',

      input_shape=[width, height, channels]),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',

      activation='relu'),

      keras.layers.BatchNormalization(),

      keras.layers.MaxPool2D(pool_size=2),

      keras.layers.Flatten(),

      keras.layers.Dense(512, activation='relu'),

      keras.layers.Dense(num_classes, activation='softmax'),

      ])

      model.compile(loss="sparse_categorical_crossentropy",

      optimizer="adam", metrics=['accuracy'])

      model.summary()

      # 八,训练模型

      epochs = 20

      history = model.fit_generator(train_generator,

      steps_per_epoch = train_num // batch_size,

      epochs = epochs,

      validation_data = valid_generator,

      validation_steps = valid_num // batch_size)

      # 九,打印模型训练曲线

      def plot_learning_curves(history, label, epcohs, min_value, max_value):

      data = {}

      data[label] = history.history[label]

      data['val_'+label] = history.history['val_'+label]

      pd.DataFrame(data).plot(figsize=(8, 5))

      plt.grid(True)

      plt.axis([0, epochs, min_value, max_value])

      plt.show()

      plot_learning_curves(history, 'accuracy', epochs, 0, 1)

      plot_learning_curves(history, 'loss', epochs, 0, 2)

      # 十,使用keras.ImageDataGenerator加载测试集数据

      test_datagen = keras.preprocessing.image.ImageDataGenerator(

      rescale = 1./255)

      test_generator = valid_datagen.flow_from_dataframe(

      test_df,

      directory = './',

      x_col = 'filepath',

      y_col = 'class',

      classes = class_names,

      target_size = (height, width),

      batch_size = batch_size,

      seed = 7,

      shuffle = False,

      class_mode = "sparse")

      test_num = test_generator.samples

      print(test_num)

      # 十一,使用测试集预测模型结果

      test_predict = model.predict_generator(test_generator,

      workers = 10,

      use_multiprocessing = True)

      # 1,测试集预测模型结果维度形状

      print(test_predict.shape)

      # 2,抽取前5条数据查看

      print(test_predict[0:5])

      # 3,取结果数值为最大的为预测结果

      test_predict_class_indices = np.argmax(test_predict, axis = 1)

      # 4,取前5条结果查看

      print(test_predict_class_indices[0:5])

      # 5,将结果转化为特征名称

      test_predict_class = [class_names[index]

      for index in test_predict_class_indices]

      # 查看前五条结果

      print(test_predict_class[0:5])

      # 十二,将预测结果写入到submission.csv文件中,并在kaggle上提交

      def generate_submissions(filename, predict_class):

      with open(filename, 'w') as f:

      f.write('id,label ')

      for i in range(len(predict_class)):

      f.write('%d,%s ' % (i+1, predict_class[i]))

      output_file = "./cifar10/submission.csv"

      generate_submissions(output_file, test_predict_class)

  • 相关阅读:
    spring2.5 mvc使用注解upload上传文件
    从5点来分析搜索引擎算法
    搜索引擎算法研究专题六:HITS算法
    搜索引擎算法研究专题五:TFIDF详解
    搜索引擎算法研究专题二:HITS算法及其衍生算法分析
    搜索引擎算法研究专题一:基于页面分块的搜索引擎排序算法改进
    搜索引擎算法研究专题三:聚集索引与非聚集索引介绍
    Spring最佳实践9.1 集成邮件服务
    搜索引擎算法研究专题四:随机冲浪模型介绍
    搜索引擎算法研究专题七:Hilltop算法
  • 原文地址:https://www.cnblogs.com/djw12333/p/14469220.html
Copyright © 2011-2022 走看看