zoukankan      html  css  js  c++  java
  • 第十一节,利用yolov3训练自己的数据集

    1、环境配置

    tensorflow1.12.0

    Opencv3.4.2

    keras

    pycharm

    2、配置yolov3

    • 下载yolov3代码:https://github.com/qqwweee/keras-yolo3
    • 下载权重:https://pjreddie.com/media/files/yolov3.weights,并将权重文件放在keras-yolo3-master文件下
    • 执行如下命令将darknet下的yolov3配置文件转换成keras适用的h5文件。

                   python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5

    更改了一下代码:重新编写了一个测试代码object_detection_yolo.py

    # This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html
    
    # Usage example:  python3 object_detection_yolo.py --video=run.mp4
    #                 python3 object_detection_yolo.py --image=bird.jpg
    
    import cv2 as cv
    import argparse
    import sys
    import numpy as np
    import os.path
    
    # Initialize the parameters
    confThreshold = 0.5  # Confidence threshold
    nmsThreshold = 0.4   #Non-maximum suppression threshold
    inpWidth = 416       #Width of network's input image
    inpHeight = 416      #Height of network's input image
    
    parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
    parser.add_argument('--image', help='Path to image file.')
    parser.add_argument('--video', help='Path to video file.')
    args = parser.parse_args()
            
    # Load names of classes
    classesFile = "model_data/coco_classes.txt";
    classes = None
    # with open(classesFile, 'rt') as f:
    #     classes = f.read().rstrip('
    ').split('
    ')
    classes_path = os.path.expanduser(classesFile)
    with open(classes_path) as f:
        class_names = f.readlines()
        classes = [c.strip() for c in class_names]
    
    # Give the configuration and weight files for the model and load the network using them.
    modelConfiguration = "yolov3.cfg";
    modelWeights = "yolov3.weights";
    
    net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
    net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
    net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
    
    # Get the names of the output layers
    def getOutputsNames(net):
        # Get the names of all the layers in the network
        layersNames = net.getLayerNames()
        # Get the names of the output layers, i.e. the layers with unconnected outputs
        return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    
    # Draw the predicted bounding box
    def drawPred(classId, conf, left, top, right, bottom):
        # Draw a bounding box.
        cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
        
        label = '%.2f' % conf
            
        # Get the label for the class name and its confidence
        if classes:
            assert(classId < len(classes))
            label = '%s:%s' % (classes[classId], label)
    
        #Display the label at the top of the bounding box
        labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        top = max(top, labelSize[1])
        cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
        cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)
    
    # Remove the bounding boxes with low confidence using non-maxima suppression
    def postprocess(frame, outs):
        frameHeight = frame.shape[0]
        frameWidth = frame.shape[1]
    
        classIds = []
        confidences = []
        boxes = []
        # Scan through all the bounding boxes output from the network and keep only the
        # ones with high confidence scores. Assign the box's class label as the class with the highest score.
        classIds = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                classId = np.argmax(scores)
                confidence = scores[classId]
                if confidence > confThreshold:
                    center_x = int(detection[0] * frameWidth)
                    center_y = int(detection[1] * frameHeight)
                    width = int(detection[2] * frameWidth)
                    height = int(detection[3] * frameHeight)
                    left = int(center_x - width / 2)
                    top = int(center_y - height / 2)
                    classIds.append(classId)
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])
    
        # Perform non maximum suppression to eliminate redundant overlapping boxes with
        # lower confidences.
        indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
        for i in indices:
            i = i[0]
            box = boxes[i]
            left = box[0]
            top = box[1]
            width = box[2]
            height = box[3]
            drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
    
    # Process inputs
    winName = 'Deep learning object detection in OpenCV'
    #cv.namedWindow(winName, cv.WINDOW_NORMAL)
    
    outputFile = "yolo_out_py.avi"
    if (args.image):
        # Open the image file
        if not os.path.isfile(args.image):
            print("Input image file ", args.image, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.image)
        outputFile = args.image[:-4]+'_yolo_out_py.jpg'
    elif (args.video):
        # Open the video file
        if not os.path.isfile(args.video):
            print("Input video file ", args.video, " doesn't exist")
            sys.exit(1)
        cap = cv.VideoCapture(args.video)
        outputFile = args.video[:-4]+'_yolo_out_py.avi'
    else:
        # Webcam input
        cap = cv.VideoCapture(0)
    
    # Get the video writer initialized to save the output video
    if (not args.image):
        vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))
    
    while cv.waitKey(1) < 0:
        
        # get frame from the video
        hasFrame, frame = cap.read()
        
        # Stop the program if reached end of video
        if not hasFrame:
            print("Done processing !!!")
            print("Output file is stored as ", outputFile)
            cv.waitKey(3000)
            break
    
        # Create a 4D blob from a frame.
        blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)
    
        # Sets the input to the network
        net.setInput(blob)
    
        # Runs the forward pass to get output of the output layers
        outs = net.forward(getOutputsNames(net))
    
        # Remove the bounding boxes with low confidence
        postprocess(frame, outs)
    
        # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
        t, _ = net.getPerfProfile()
        label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
        cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    
        # Write the frame with the detection boxes
        if (args.image):
            cv.imwrite(outputFile, frame.astype(np.uint8));
        else:
            vid_writer.write(frame.astype(np.uint8))
    
        #cv.imshow(winName, frame)
    View Code

     

    3、用自己的数据集训练

    • 在工程下新建一个文件夹VOCdevkit,结构与VOC数据集格式保持一致,目录结构如下所示:

    将自己的数据图片放入JPEFImages文件中,

    • 生成ImageSet/Main/4个文件,在VOC2007下新建一个test.py文件:
    import os
    
    import random
    
    trainval_percent = 0.2
    
    train_percent = 0.8
    
    xmlfilepath = 'Annotations'
    
    txtsavepath = 'ImageSetsMain'
    
    total_xml = os.listdir(xmlfilepath)
    
    num = len(total_xml)
    
    list = range(num)
    
    tv = int(num * trainval_percent)
    
    tr = int(tv * train_percent)
    
    trainval = random.sample(list, tv)
    
    train = random.sample(trainval, tr)
    
    ftrainval = open('ImageSets/Main/trainval.txt', 'w')
    
    ftest = open('ImageSets/Main/test.txt', 'w')
    
    ftrain = open('ImageSets/Main/train.txt', 'w')
    
    fval = open('ImageSets/Main/val.txt', 'w')
    
    for i in list:
    
        name = total_xml[i][:-4] + '
    '
    
        if i in trainval:
    
            ftrainval.write(name)
    
            if i in train:
    
                ftest.write(name)
    
            else:
    
                fval.write(name)
    
        else:
    
            ftrain.write(name)
    
    ftrainval.close()
    
    ftrain.close()
    
    fval.close()
    
    ftest.close()
    View Code

     运行代码之后,生成如下文件,VOC2007数据集制作完成。

    • 生成yolo3所需的train.txt,val.txt,test.txt

      生成的数据集不能供yolov3直接使用。需要运行voc_annotation.py(迁移项目时必须重新运行,涉及路径问题) ,classes以检测两个类为例(redlight和greenlight),在voc_annotation.py需改你的数据集为:

      运行之后,生成如下三个文件:

     文件内容如图所示:

     

    • 修改参数文件yolo3.cfg

                       打开yolo3.cfg文件。搜索yolo(共出现三次),每次按下图都要修改:

    filter:3*(5+len(classes))

                      classes:你要训练的类别数(我这里是训练两类) 

                      random:原来是1,显存小改为0

    • 修改model_data下的voc_classes.txt为自己训练的类别

    •  修改train.py代码(用下面代码直接替换原来的代码)
    """
    
    Retrain the YOLO model for your own dataset.
    
    """
    
    import numpy as np
    
    import keras.backend as K
    
    from keras.layers import Input, Lambda
    
    from keras.models import Model
    
    from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
    
    
    
    from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
    
    from yolo3.utils import get_random_data
    
    
    
    
    
    def _main():
    
        annotation_path = '2007_train.txt'
    
        log_dir = 'logs/000/'
    
        classes_path = 'model_data/voc_classes.txt'
    
        anchors_path = 'model_data/yolo_anchors.txt'
    
        class_names = get_classes(classes_path)
    
        anchors = get_anchors(anchors_path)
    
        input_shape = (416,416) # multiple of 32, hw
    
        model = create_model(input_shape, anchors, len(class_names) )
    
        train(model, annotation_path, input_shape, anchors, len(class_names), log_dir=log_dir)
    
    
    
    def train(model, annotation_path, input_shape, anchors, num_classes, log_dir='logs/'):
    
        model.compile(optimizer='adam', loss={
    
            'yolo_loss': lambda y_true, y_pred: y_pred})
    
        logging = TensorBoard(log_dir=log_dir)
    
        checkpoint = ModelCheckpoint(log_dir + "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5",
    
            monitor='val_loss', save_weights_only=True, save_best_only=True, period=1)
    
        batch_size = 10
    
        val_split = 0.1
    
        with open(annotation_path) as f:
    
            lines = f.readlines()
    
        np.random.shuffle(lines)
    
        num_val = int(len(lines)*val_split)
    
        num_train = len(lines) - num_val
    
        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
    
    
    
        model.fit_generator(data_generator_wrap(lines[:num_train], batch_size, input_shape, anchors, num_classes),
    
                steps_per_epoch=max(1, num_train//batch_size),
    
                validation_data=data_generator_wrap(lines[num_train:], batch_size, input_shape, anchors, num_classes),
    
                validation_steps=max(1, num_val//batch_size),
    
                epochs=500,
    
                initial_epoch=0)
    
        model.save_weights(log_dir + 'trained_weights.h5')
    
    
    
    def get_classes(classes_path):
    
        with open(classes_path) as f:
    
            class_names = f.readlines()
    
        class_names = [c.strip() for c in class_names]
    
        return class_names
    
    
    
    def get_anchors(anchors_path):
    
        with open(anchors_path) as f:
    
            anchors = f.readline()
    
        anchors = [float(x) for x in anchors.split(',')]
    
        return np.array(anchors).reshape(-1, 2)
    
    
    
    def create_model(input_shape, anchors, num_classes, load_pretrained=False, freeze_body=False,
    
                weights_path='model_data/yolo_weights.h5'):
    
        K.clear_session() # get a new session
    
        image_input = Input(shape=(None, None, 3))
    
        h, w = input_shape
    
        num_anchors = len(anchors)
    
        y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], 
    
            num_anchors//3, num_classes+5)) for l in range(3)]
    
    
    
        model_body = yolo_body(image_input, num_anchors//3, num_classes)
    
        print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
    
    
    
        if load_pretrained:
    
            model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
    
            print('Load weights {}.'.format(weights_path))
    
            if freeze_body:
    
                # Do not freeze 3 output layers.
    
                num = len(model_body.layers)-7
    
                for i in range(num): model_body.layers[i].trainable = False
    
                print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
    
    
    
        model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
    
            arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
    
            [*model_body.output, *y_true])
    
        model = Model([model_body.input, *y_true], model_loss)
    
        return model
    
    def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    
        n = len(annotation_lines)
    
        np.random.shuffle(annotation_lines)
    
        i = 0
    
        while True:
    
            image_data = []
    
            box_data = []
    
            for b in range(batch_size):
    
                i %= n
    
                image, box = get_random_data(annotation_lines[i], input_shape, random=True)
    
                image_data.append(image)
    
                box_data.append(box)
    
                i += 1
    
            image_data = np.array(image_data)
    
            box_data = np.array(box_data)
    
            y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
    
            yield [image_data, *y_true], np.zeros(batch_size)
    
    
    
    def data_generator_wrap(annotation_lines, batch_size, input_shape, anchors, num_classes):
    
        n = len(annotation_lines)
    
        if n==0 or batch_size<=0: return None
    
        return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
    
    
    
    if __name__ == '__main__':
    
        _main()
    View Code

     替换完成后,千万千万值得注意的是,因为程序中有logs/000/目录,你需要创建这样一个目录,这个目录的作用就是存放自己的数据集训练得到的模型。不然程序运行到最后会因为找不到该路径而发生错误。

  • 相关阅读:
    1_Flask开启debug
    29_使用celery发送短信
    00_celery介绍(处理耗时任务)
    28_django限制请求方法装饰器
    27_扩展User模型
    05-3_单链表的实现
    05-2_单向链表
    05-1_链表的定义
    04-2_Python中的线性表
    04-1_线性表的操作
  • 原文地址:https://www.cnblogs.com/wyx501/p/10644248.html
Copyright © 2011-2022 走看看