zoukankan      html  css  js  c++  java
  • 基于TensorFlow Object Detection API进行迁移学习训练自己的人脸检测模型(一)

    前言

    已完成TensorFlow Object Detection API环境搭建,具体搭建过程请参照:

    安装运行谷歌开源的TensorFlow Object Detection API视频物体识别系统

    Ubuntu系统安装配置tensorflow开发环境

     

    准备工作

    下载训练数据和验证数据

    香港中文大学(Chinese University of Hong Kong)有大量的标注图像数据集。WIDER FACE数据集是一个人脸检测基准数据集。我用labelImg(https://github.com/tzutalin/labelImg)来显示边框。所选的文本是人脸检测注释。

    结果集下载地址:http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/

    labelImg安装命令

    pip install labelImg

    其他安装方式请参照上面提供的地址自行尝试

    lableImg使用命令

    labelImg

    效果

    结果集中的图片与标注文件XML一一对应

    下载模型

    模型下载地址:https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md

    我选择的模型是faster_rcnn_inception_v2_coco,下载地址是:

    http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz

    编写代码进行相关操作001_down_data.py

    # -*- coding: utf-8 -*-
    '''
    结果集下载与模型下载
    '''
    import requests
    import os
    import shutil
    # unzip the files
    import zipfile
    import tarfile
    
    
    def download_file_from_google_drive(id, destination):
    
        def get_confirm_token(response):
            for key, value in response.cookies.items():
                if key.startswith('download_warning'):
                    return value
    
            return None
    
        def save_response_content(response, destination):
            CHUNK_SIZE = 32768
    
            with open(destination, "wb") as f:
                for chunk in response.iter_content(CHUNK_SIZE):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
    
        URL = "https://docs.google.com/uc?export=download"
    
        session = requests.Session()
    
        response = session.get(URL, params={ 'id' : id }, stream=True)
        token = get_confirm_token(response)
    
        if token:
            params = { 'id' : id, 'confirm' : token }
            response = session.get(URL, params=params, stream=True)
    
        save_response_content(response, destination)
    
    
    # The script
    curr_path = os.getcwd()
    models_path = os.path.join(curr_path, "data")
    
    # make dir => wider_data in folder
    try:
        os.makedirs(models_path)
    except Exception as e:
        pass
    
    print("files download start")
    
    if os.path.exists(os.path.join(models_path, "train.zip")) == False:
        print("downloading.. train.zip -- 1.47GB")
        download_file_from_google_drive("0B6eKvaijfFUDQUUwd21EckhUbWs", os.path.join(models_path, "train.zip"))
    
    if os.path.exists(os.path.join(models_path, "val.zip")) == False:
        print("downloading.. val.zip -- 362.8MB")
        download_file_from_google_drive("0B6eKvaijfFUDd3dIRmpvSk8tLUk", os.path.join(models_path, "val.zip"))
    
    print("files download end")
    
    print("files unzip start")
    
    if os.path.exists(os.path.join(models_path, "WIDER_train")) == False:
        with zipfile.ZipFile(os.path.join(models_path, "train.zip"), "r") as zip_ref:
            zip_ref.extractall(models_path)
    
    if os.path.exists(os.path.join(models_path, "WIDER_val")) == False:
        with zipfile.ZipFile(os.path.join(models_path, "val.zip"), "r") as zip_ref:
            zip_ref.extractall(models_path)
    
    print("files unzip end")
    
    print("annotation download start")
    
    url = 'http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip'
    r = requests.get(url) 
    with open(os.path.join(models_path, "wider_face_split.zip"), "wb") as code:
        code.write(r.content)
    
    if os.path.exists(os.path.join(models_path, "wider_face_split")) == False:
        with zipfile.ZipFile(os.path.join(models_path, "wider_face_split.zip"), "r") as zip_ref:
            zip_ref.extractall(models_path) 
    print("annotation download end")   
    
    # downloading from: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
    url = 'http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz'
    
    if os.path.exists(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")) == False:
        response = requests.get(url, stream=True)
        with open(os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz"), 'wb') as out_file:
            shutil.copyfileobj(response.raw, out_file)
        del response
    
    filePath = os.path.join(models_path, "faster_rcnn_inception_v2_coco_2018_01_28.tar.gz")
    os.chdir(models_path)
    
    if (filePath.endswith("tar.gz")):
        tar = tarfile.open(filePath, "r:gz")
        tar.extractall()
        tar.close()
    elif (filePath.endswith("tar")):
        tar = tarfile.open(filePath, "r:")
        tar.extractall()
        tar.close()
    
    print("done")

    数据预处理

    将WIDERFace转换为Pascal XML

    首先,我们需要将人脸检测数据集转换为Pascal XML。Tensorflow和labelImg使用不同的格式。这些人脸检测图像将下载到WIDER_train文件夹中。我们将使用002 _data-to-pascal-xml.py转换WIDERFace数据并且将数据复制到一个不同的子文件夹中。我的电脑需要5分钟处理9263张图片。

    002_data-to-pascal-xml.py

    #!/usr/bin/env python3
    
    
    """
    This script crawls over 9263 training images and 1873 items
    On my Macbook pro this takes: 4 minutes
    
    """
    import cv2
    import os
    import numpy as np
    from glob import iglob # python 3.5 or newer
    from shutil import copyfile
    
    
    # The script
    curr_path = os.getcwd()
    
    import xml.etree.cElementTree as ET
    
    # settings
    cnt = 0
    hog = cv2.HOGDescriptor((80, 80), (16, 16), (8,8), (8,8), 9)
    # data = []
    # label = []
    
    
    def newXMLPASCALfile(imageheight, imagewidth, path, basename):
        # print(filename)
        annotation = ET.Element("annotation", verified="yes")
        ET.SubElement(annotation, "folder").text = "images"
        ET.SubElement(annotation, "filename").text = basename
        ET.SubElement(annotation, "path").text = path
    
        source = ET.SubElement(annotation, "source")
        ET.SubElement(source, "database").text = "test"
    
        size = ET.SubElement(annotation, "size")
        ET.SubElement(size, "width").text = str(imagewidth)
        ET.SubElement(size, "height").text = str(imageheight)
        ET.SubElement(size, "depth").text = "3"
    
        ET.SubElement(annotation, "segmented").text = "0"
    
        tree = ET.ElementTree(annotation)
        # tree.write("filename.xml")
        return tree
    
    def appendXMLPASCAL(curr_et_object,x1, y1, w, h, filename):
        et_object = ET.SubElement(curr_et_object.getroot(), "object")
        ET.SubElement(et_object, "name").text = "face"
        ET.SubElement(et_object, "pose").text = "Unspecified"
        ET.SubElement(et_object, "truncated").text = "0"
        ET.SubElement(et_object, "difficult").text = "0"
        bndbox = ET.SubElement(et_object, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(x1)
        ET.SubElement(bndbox, "ymin").text = str(y1)
        ET.SubElement(bndbox, "xmax").text = str(x1+w)
        ET.SubElement(bndbox, "ymax").text = str(y1+h)
        filename = filename.strip().replace(".jpg",".xml")
        curr_et_object.write(filename)
        return curr_et_object
    
    
    
    
    def readAndWrite(bbx_gttxtPath):
        cnt = 0
        with open(bbx_gttxtPath, 'r') as f:
            curr_img = ''
    
            curr_filename = ""
            curr_path = ""
    
            curr_et_object = ET.ElementTree()
    
    
            img = np.zeros((80, 80))
            for line in f:
                inp = line.split(' ')
    
                # if line.find("--") != -1:
                #     curr_filename = line.split('--')[1]
                #     # reset elements
                #     # emptyEl = ET.Element("")
                #     curr_et_object = ET.ElementTree()
    
                if len(inp)==1:
                    img_path = inp[0]
                    img_path = img_path[:-1]
                    curr_img = img_path
                    if curr_img.isdigit():
                        continue
                    # print(Train_path+'/'+curr_img)
                    img = cv2.imread(Train_path + '/' + curr_img, 2) # POSIX only
                    # print( len(list(curr_et_object.getroot()) )  )
                    curr_filename = curr_img.split("/")[1].strip()
                    curr_path = os.path.join(Train_path, os.path.dirname(curr_img))
                    curr_et_object = newXMLPASCALfile(img.shape[0],img.shape[1],curr_path, curr_filename )
                    # print( curr_et_object  )
    
                else:
                    # print(img)
                    inp = [int(i) for i in inp[:-1]]
                    x1, y1, w, h, blur, expression, illumination, invalid, occlusion, pose = inp
                    n = max(w,h)
                    if invalid == 1 or blur > 0 or n < 50:
                        continue
                    img2 = img[y1:y1+n, x1:x1+n]
                    img3 = cv2.resize(img2, (80, 80))
                    vec = hog.compute(img3)
                    # data.append(vec)
                    # label.append(1)
                    cnt += 1
    
                    fileNow = os.path.join(curr_path,curr_filename)
                    print("{}: {} {} {} {}".format(len(vec),x1, y1, w, h) + " " + fileNow)
    
                    curr_et_object = appendXMLPASCAL(curr_et_object,x1, y1, w, h, fileNow )
    
    
    # ################################ TRAINING DATA 9263 ITEMS ##################################
    # # # Run Script for Training data
    Train_path = os.path.join(curr_path, "data", "WIDER_train", "images" )
    ## comment this out
    bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_train_bbx_gt.txt" )
    readAndWrite(bbx_gttxtPath)
    
    
    # To folders:
    to_xml_folder = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "xmls" )
    to_image_folder = os.path.join(curr_path, "data", "tf_wider_train", "images" )
    
    # make dir => wider_data in folder
    try:
        os.makedirs(to_xml_folder)
        os.makedirs(to_image_folder)
    except Exception as e:
        pass
    
    rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
    file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
    
    train_annotations_index = os.path.join(curr_path, "data", "tf_wider_train", "annotations", "train.txt" )
    
    with open(train_annotations_index, "a") as indexFile:
        for f in file_list:
            if ".xml" in f:
                print(f)
                copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
                img = f.replace(".xml",".jpg")
                copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
                indexFile.write(os.path.basename(f.replace(".xml","")) + "
    ")
    
    
    ################################ VALIDATION DATA 1873 ITEMS ##################################
    
    # Run Script for Validation data
    Train_path = os.path.join(curr_path, "data", "WIDER_val", "images" )
    bbx_gttxtPath = os.path.join(curr_path, "data", "wider_face_split", "wider_face_val_bbx_gt.txt" )
    readAndWrite(bbx_gttxtPath)
    
    
    # To folders:
    to_xml_folder = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "xmls" )
    to_image_folder = os.path.join(curr_path, "data", "tf_wider_val", "images" )
    
    # make dir => wider_data in folder
    try:
        os.makedirs(to_xml_folder)
        os.makedirs(to_image_folder)
    except Exception as e:
        pass
    
    
    rootdir_glob = Train_path + '/**/*' # Note the added asterisks # This will return absolute paths
    file_list = [f for f in iglob(rootdir_glob, recursive=True) if os.path.isfile(f)]
    
    train_annotations_index = os.path.join(curr_path, "data", "tf_wider_val", "annotations", "val.txt" )
    
    with open(train_annotations_index, "a") as indexFile:
        for f in file_list:
            if ".xml" in f:
                print(f)
                copyfile(f, os.path.join(to_xml_folder, os.path.basename(f) ))
                img = f.replace(".xml",".jpg")
                copyfile(img, os.path.join(to_image_folder, os.path.basename(img) ))
                indexFile.write(os.path.basename(f.replace(".xml","")) + "
    ")

    运行

    python 002_data-to-pascal-xml.py

    效果

    创建Pascal XML到Tensorflow CSV的索引

    当数据转换为Pascal XML时,索引已经被创建。通过训练和验证数据集,我们将这些文件作为输入来制作TFRecords。也可以用labelImg这样的工具来手动标记图像,并使用这个步骤在这里创建一个索引。

    003_xml-to-csv.py

    import os
    import glob
    import pandas as pd
    import xml.etree.ElementTree as ET
    
    # source and credits:
    # https://raw.githubusercontent.com/datitran/raccoon_dataset/master/xml_to_csv.py
    
    def xml_to_csv(path):
        xml_list = []
        for xml_file in glob.glob(path + '/*.xml'):
            tree = ET.parse(xml_file)
            root = tree.getroot()
            for member in root.findall('object'):
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                         )
                xml_list.append(value)
        column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
        xml_df = pd.DataFrame(xml_list, columns=column_name)
        return xml_df
    
    
    def train():
        image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train', 'annotations','xmls')
        xml_df = xml_to_csv(image_path)
        labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_train','train.csv')
        xml_df.to_csv(labels_path, index=None)
        print('> tf_wider_train - Successfully converted xml to csv.')
    
    def val():
        image_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'annotations','xmls')
        xml_df = xml_to_csv(image_path)
        labels_path = os.path.join(os.getcwd(), 'data', 'tf_wider_val', 'val.csv')
        xml_df.to_csv(labels_path, index=None)
        print('> tf_wider_val -  Successfully converted xml to csv.')
    
    train()
    val()

    运行

    python 003_xml-to-csv.py

    效果

    创建TFRecord文件

    TFRecords文件是一个大型的二进制文件,该文件被读取以训练机器学习模型。在下一步中,该文件将被Tensorflow按顺序读取。训练和验证数据将被转换成二进制文件。

    004_generate_tfrecord.py

    """
    Usage:
      # From tensorflow/models/
      # Create train data:
      python3 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record
      # creates 847.6MB train.record
    
      # Create test/validation data:
      python3 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record
      # creates 213.1MB  val.record
    
      source without adjustments: https://raw.githubusercontent.com/datitran/raccoon_dataset/master/generate_tfrecord.py
    """
    
    from __future__ import division
    from __future__ import print_function
    from __future__ import absolute_import
    
    import os
    import io
    import pandas as pd
    import tensorflow as tf
    
    from PIL import Image
    from object_detection.utils import dataset_util # from path
    from collections import namedtuple, OrderedDict # tf slim
    
    flags = tf.app.flags
    flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
    flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
    flags.DEFINE_string('images_path', '', 'Path to images_folder')
    
    FLAGS = flags.FLAGS
    
    
    # TO-DO replace this with label map
    def class_text_to_int(row_label):
        if row_label == 'face':
            return 1
        else:
            None
    
    
    def split(df, group):
        data = namedtuple('data', ['filename', 'object'])
        gb = df.groupby(group)
        return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
    
    
    def create_tf_example(group, path):
        with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
            encoded_jpg = fid.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)
        image = Image.open(encoded_jpg_io)
        width, height = image.size
    
        filename = group.filename.encode('utf8')
        image_format = b'jpg'
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []
    
        for index, row in group.object.iterrows():
            xmins.append(row['xmin'] / width)
            xmaxs.append(row['xmax'] / width)
            ymins.append(row['ymin'] / height)
            ymaxs.append(row['ymax'] / height)
            classes_text.append(row['class'].encode('utf8'))
            classes.append(class_text_to_int(row['class']))
    
        tf_example = tf.train.Example(features=tf.train.Features(feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label': dataset_util.int64_list_feature(classes),
        }))
        return tf_example
    
    
    def main(_):
        writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
        path = os.path.join(os.getcwd(), FLAGS.images_path)
        examples = pd.read_csv(FLAGS.csv_input)
        grouped = split(examples, 'filename')
        for group in grouped:
            tf_example = create_tf_example(group, path)
            writer.write(tf_example.SerializeToString())
    
        writer.close()
        output_path = os.path.join(os.getcwd(), FLAGS.output_path)
        print('Successfully created the TFRecords: {}'.format(output_path))
    
    
    if __name__ == '__main__':
        tf.app.run()
    

    TFRecord的训练数据(847.6 MB)

    python 004_generate_tfrecord.py --images_path=data/tf_wider_train/images --csv_input=data/tf_wider_train/train.csv  --output_path=data/train.record

    TFRecord 的验证数据(213.1MB)

    python 004_generate_tfrecord.py --images_path=data/tf_wider_val/images --csv_input=data/tf_wider_val/val.csv  --output_path=data/val.record

    至此数据预处理已全部完成,效果如下:

  • 相关阅读:
    linux中的find命令——查找文件名
    int main(int argc,char* argv[])详解
    VIM进阶学习之几种模式和按键映射
    Fortran编译多个文件(转载)
    Vimdiff---VIM的比较和合并工具
    两篇很牛的vim使用技巧
    程序员软件的罪恶:从不清楚地汇报事故原因
    Free symbol is Harmful
    return语言结构 VS. scala默认返回值
    踩过的“坑”: 命令行指定Java class path
  • 原文地址:https://www.cnblogs.com/gmhappy/p/9472362.html
Copyright © 2011-2022 走看看