zoukankan      html  css  js  c++  java
  • 目标检测之yolo源码分析

    三、配置文件详解(config.py)

    import os
    
    
    # 数据集路径,和模型检查点路径
    #
    # path and dataset parameter
    #
    
    DATA_PATH = 'data'        # 所有数据所在的根目录
    
    PASCAL_PATH = os.path.join(DATA_PATH, 'pascal_voc')  # VOC2012数据集所在的目录
    
    CACHE_PATH = os.path.join(PASCAL_PATH, 'cache')      # 保存生成的数据集标签缓冲文件所在文件夹
    
    OUTPUT_DIR = os.path.join(PASCAL_PATH, 'output')     # 保存生成的网络模型和日志文件所在的文件夹
    
    WEIGHTS_DIR = os.path.join(PASCAL_PATH, 'weights')   # 检查点文件所在的目录
    
    WEIGHTS_FILE = None
    # WEIGHTS_FILE = os.path.join(DATA_PATH, 'weights', 'YOLO_small.ckpt')
    
    # voc2012数据集类别名
    CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
               'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
               'train', 'tvmonitor']
    
    # 使用水平镜像,扩大一倍数据集?
    FLIPPED = True
    
    
    # 网络模型参数
    #
    # model parameter
    #
    
    # 图片大小
    IMAGE_SIZE = 448
    
    # 单元格大小,一共有7x7个单元格
    CELL_SIZE = 7
    
    # 每个单元格边界框的个数B = 2
    BOXES_PER_CELL = 2
    
    # 泄露修正线性激活函数的系数,就是lReLU的系数
    ALPHA = 0.1
    
    # 控制台输出信息
    DISP_CONSOLE = False
    
    # 损失函数的权重设置
    OBJECT_SCALE = 1.0    # 有目标时,置信度权重
    NOOBJECT_SCALE = 1.0  # 没有目标时,置信度权重
    CLASS_SCALE = 2.0     # 类别权重
    COORD_SCALE = 5.0     # 边界框权重
    
    # 训练参数设置
    #
    # solver parameter
    #
    
    GPU = ''
    # 学习率
    LEARNING_RATE = 0.0001
    # 退化学习率衰减步数
    DECAY_STEPS = 30000
    # 衰减率
    DECAY_RATE = 0.1
    
    STAIRCASE = True
    
    #批量大小
    BATCH_SIZE = 45
    # 最大迭代次数
    MAX_ITER = 15000
    
    # 日志文件保存间隔步
    SUMMARY_ITER = 10
    # 模型保存间隔步
    SAVE_ITER = 1000
    
    # 测试时的相关参数
    #
    # test parameter
    #
    # 格子有目标的置信度阈值
    THRESHOLD = 0.2
    # 非极大值抑制 IOU阈值
    IOU_THRESHOLD = 0.5

    四、yolo文件夹详解

    yolo网络的建立是通过yolo文件夹中的yolo_net.py文件的代码实现的,yolo_net.py文件定义了YOLONet类,该类包含了网络初始化(__init__()),建立网络(build_networks)和loss函数(loss_layer())等方法

    import numpy as np
    import tensorflow as tf
    import yolo.config as cfg
    
    slim = tf.contrib.slim
    
    
    class YOLONet(object):

    1. 网络参数初始化

    网络的所有初始化参数包含于__init__()方法中

        # 网络参数初始化
        def __init__(self, is_training=True):
            '''
            构造函数利用cfg文件对网络参数进行初始化,同时定义网络的输入和输出size等信息,
            其中offset的作用应该是一个定长的偏移,boundery1和boundery2 作用是在输出中确定每种信息的长度(如类别,置信度等)
            其中 boundery1指的是对于所有cell的类别的预测的张量维度,所以是self.cell_size * self.cell_size * self.num_class
            boundery2指的是在类别之后每个cell所对应的bounding boxes的数量的总合,所以是
            '''
            # voc 2012数据集类别名
            self.classes = cfg.CLASSES
            # 类别个数C 20
            self.num_class = len(self.classes)
            
            # 网络输入图像大小448, 448 x 448
            self.image_size = cfg.IMAGE_SIZE
            # 单元格大小S=7,将图像分为SxS的格子
            self.cell_size = cfg.CELL_SIZE
            # 每个网格边界框的个数B=2
            self.boxes_per_cell = cfg.BOXES_PER_CELL
            
            # 网络输出的大小 S*S*(B*5 + C) = 1470
            self.output_size = (self.cell_size * self.cell_size) *
                (self.num_class + self.boxes_per_cell * 5)
                
            # 图片的缩放比例 64
            self.scale = 1.0 * self.image_size / self.cell_size
            
            # 将网络输出分离为类别和置信度以及边界框的大小,输出维度为7*7*20 + 7*7*2 + 7*7*2*4=1470
            # 7*7*20
            self.boundary1 = self.cell_size * self.cell_size * self.num_class
            # 7*7*20 + 7*7*2
            self.boundary2 = self.boundary1 +
                self.cell_size * self.cell_size * self.boxes_per_cell
            
            # 代价函数 权重
            self.object_scale = cfg.OBJECT_SCALE      # 1
            self.noobject_scale = cfg.NOOBJECT_SCALE  # 1
            self.class_scale = cfg.CLASS_SCALE        # 2.0
            self.coord_scale = cfg.COORD_SCALE        # 2.0
    
            # 学习率0.0001
            self.learning_rate = cfg.LEARNING_RATE
            # 批大小 45
            self.batch_size = cfg.BATCH_SIZE
            #泄露修正线性激活函数 系数0.1
            self.alpha = cfg.ALPHA
    
            # 偏置 形状[7,7,2]
            self.offset = np.transpose(np.reshape(np.array(
                [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
                (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
    
            # 输入图片占位符 [None,image_size,image_size,3]
            self.images = tf.placeholder(
                tf.float32, [None, self.image_size, self.image_size, 3],
                name='images')
            # 构建网络,获取YOLO网络的输出(不经过激活函数的输出) 形状[None,1470]
            self.logits = self.build_network(
                self.images, num_outputs=self.output_size, alpha=self.alpha,
                is_training=is_training)
    
            if is_training:
                # 设置标签占位符 [None,S,S,5+C] 即[None,7,7,25]
                self.labels = tf.placeholder(
                    tf.float32,
                    [None, self.cell_size, self.cell_size, 5 + self.num_class])
                # 设置损失函数
                self.loss_layer(self.logits, self.labels)
                # 加入权重正则化之后的损失
                self.total_loss = tf.losses.get_total_loss()
                # 将损失以标量形式显示,该变量命名为total_loss
                tf.summary.scalar('total_loss', self.total_loss)

    2. 构建网络

    网络的建立是通过build_network()函数实现的,网络由卷积层,池化层和全连接层组成,网络的输入维度是[None,448,448,3],输出维度为[None,1470]

        def build_network(self,
                          images,
                          num_outputs,
                          alpha,
                          keep_prob=0.5,
                          is_training=True,
                          scope='yolo'):
            '''
            构建YOLO网络
            args:
                images:输入图片占位符[None,image_size,image_size,3]  这里是[None,448,448,3]
                num_outputs: 标量,网络输出节点数 1470
                alpha:泄露修正线性激活函数 系数0.1
                keep_prob: 弃权 保留率
                is_training:训练?
                scope:命名空间名
            return:
                返回网络最后一层,激活函数处理之前的值 形状[None,1470]
            '''
            # 定义变量命名空间
            with tf.variable_scope(scope):
                # 定义共享参数,使用L2正则化
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                    activation_fn=leaky_relu(alpha),
                    weights_regularizer=slim.l2_regularizer(0.0005),
                    weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
                ):
                    # pad_1 填充 454x454x3
                    net = tf.pad(
                        images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
                        name='pad_1')
                    # 卷积层 conv_2 s=2     (n-f+1)/s向上取整   224x224x64
                    net = slim.conv2d(
                        net, 64, 7, 2, padding='VALID', scope='conv_2')
                    # 池化层 pool_3 112x112x64
                    net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
                    
                    # 卷积层 conv_4 3x3x192 s=1 n/s向上取整  112x112x192
                    net = slim.conv2d(net, 192, 3, scope='conv_4')
                    # 池化层 pool_5 56x56x192
                    net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
                    
                    # 卷积层conv_6 1x1x128 s=1 n/s向上取整  56x56x128
                    net = slim.conv2d(net, 128, 1, scope='conv_6')
                    # 卷积层conv_7 3x3x256 s=1 n/s向上取整  56x56x256
                    net = slim.conv2d(net, 256, 3, scope='conv_7')
                    # 卷积层conv_8 1x1x256 s=1 n/s向上取整  56x56x256
                    net = slim.conv2d(net, 256, 1, scope='conv_8')
                    # 卷积层conv_9 3x3x512 s=1 n/s向上取整  56x56x512
                    net = slim.conv2d(net, 512, 3, scope='conv_9')
                    # 池化层 pool_10 28x28x512
                    net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
                    
                    # 卷积层conv_11 1x1x256 s=1 n/s向上取整  28x28x256
                    net = slim.conv2d(net, 256, 1, scope='conv_11')
                    # 卷积层conv_12 3x3x512 s=1 n/s向上取整  28x28x512
                    net = slim.conv2d(net, 512, 3, scope='conv_12')
                    
                    # 卷积层conv_13 1x1x256 s=1 n/s向上取整  28x28x256
                    net = slim.conv2d(net, 256, 1, scope='conv_13')
                    # 卷积层conv_14 3x3x512 s=1 n/s向上取整  28x28x512
                    net = slim.conv2d(net, 512, 3, scope='conv_14')
                    
                    
                    net = slim.conv2d(net, 256, 1, scope='conv_15')
                    net = slim.conv2d(net, 512, 3, scope='conv_16')
                    
                    net = slim.conv2d(net, 256, 1, scope='conv_17')
                    net = slim.conv2d(net, 512, 3, scope='conv_18')
                    
                    # 卷积层conv_19 3x3x512 s=1 n/s向上取整  28x28x512
                    net = slim.conv2d(net, 512, 1, scope='conv_19')
                    # 卷积层conv_20 3x3x1024 s=1 n/s向上取整  28x28x1024
                    net = slim.conv2d(net, 1024, 3, scope='conv_20')
                    # 池化层 pool_21 14x14x1024
                    net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
                    
                    # 卷积层conv_22 1x1x512 s=1 n/s向上取整  14x14x512
                    net = slim.conv2d(net, 512, 1, scope='conv_22')
                    # 卷积层conv_23 3x3x1024 s=1 n/s向上取整  14x14x1024
                    net = slim.conv2d(net, 1024, 3, scope='conv_23')
                    
                    net = slim.conv2d(net, 512, 1, scope='conv_24')
                    net = slim.conv2d(net, 1024, 3, scope='conv_25')
                    net = slim.conv2d(net, 1024, 3, scope='conv_26')
                    
                    # pad_27 填充 16x16x2014
                    net = tf.pad(
                        net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),
                        name='pad_27')
                        
                    # 卷积层conv_28 3x3x1024 s=2 (n-f+1)/s向上取整 7x7x1024
                    net = slim.conv2d(
                        net, 1024, 3, 2, padding='VALID', scope='conv_28')
                    
                    # 卷积层 conv_29 3x3x1024 s=1 n/s向上取整  7x7x1024
                    net = slim.conv2d(net, 1024, 3, scope='conv_29')
                    net = slim.conv2d(net, 1024, 3, scope='conv_30')
                    
                    # trans_31 转置 [None,1024,7,7]
                    net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
                    # flat_32 展开 50716=1024*7*7
                    net = slim.flatten(net, scope='flat_32')
                    # 全连接层 fc_33 512
                    net = slim.fully_connected(net, 512, scope='fc_33')
                    # 全连接层 fc_34 4096
                    net = slim.fully_connected(net, 4096, scope='fc_34')
                    # 惩罚项 dropout_35 4096
                    net = slim.dropout(
                        net, keep_prob=keep_prob, is_training=is_training,
                        scope='dropout_35')
                    
                    # 全连接层fc_36 num_outputs=1470
                    net = slim.fully_connected(
                        net, num_outputs, activation_fn=None, scope='fc_36')
            return net

    3、代价函数

  • 相关阅读:
    面试中变相考算法复杂度
    黑马程序猿——JAVA面向对象的特性:封装,继承,多态
    Supermap 组合单值专题图与标签专题图演示样例
    线段树 hdu3642 Get The Treasury
    servlet学习(1)
    Androidbutton事件的五中写法总结
    Java多线程的调度策略
    linux命令行学习-dig(DNS查询器)
    kettle(一)概述
    学习C语言,困难吗?
  • 原文地址:https://www.cnblogs.com/gezhuangzhuang/p/10498275.html
Copyright © 2011-2022 走看看