本文是基于TensorRT 5.0.2基础上,关于其内部的yolov3_onnx例子的分析和介绍。
本例子展示一个完整的ONNX的pipline,在tensorrt 5.0的ONNX-TensorRT基础上,基于Yolov3-608网络进行inference,包含预处理和后处理。
- 首先,从作者网站下载yolov3,然后将其转换成onnx形式,接着基于onnx的graph生成一个tensorrt engine;
- 然后,在样本图片上进行预处理,并将结果作为engine的输入;
- 在inference之后,开始关于包含bounding-box聚类的后处理,然后最终得到一个新的图像文件,并将其存放在磁盘上,以便后续肉眼观察。
1 引言
假设当前路径为:
TensorRT-5.0.2.6/samples
其对应当前例子文件目录树为:
# tree python
python
├── common.py
└── yolov3_onnx
├── coco_labels.txt
├── data_processing.py
├── onnx_to_tensorrt.py
├── README.md
├── requirements.txt
├── yolov3.cfg
├── yolov3.weights
├── dog.jpg
└── yolov3_to_onnx.py
其中:
- yolov3_to_onnx.py:将原始yolov3模型转换成onnx结构。该脚本会自动下载所需要依赖文件;
- onnx_to_tensorrt.py:将onnx的yolov3转换成engine然后进行inference。
2 darknet转onnx
首先运行:
python yolov3_to_onnx.py
就会自动从作者网站下载yolo3的所需依赖
from __future__ import print_function
from collections import OrderedDict
import hashlib
import os.path
import wget
import onnx # github网址为https://github.com/onnx/onnx
from onnx import helper
from onnx import TensorProto
import numpy as np
import sys
'''main第二步:解析yolov3.cfg '''
class DarkNetParser(object):
"""定义一个基于DarkNet YOLOv3-608的解析器."""
def __init__(self, supported_layers):
"""初始化DarkNetParser对象.
Keyword argument:
supported_layers -- 一个list,其中每个元素为字符串,表示支持的层,以DarkNet的命名习惯,
"""
self.layer_configs = OrderedDict()
self.supported_layers = supported_layers
self.layer_counter = 0
def parse_cfg_file(self, cfg_file_path):
"""逐层解析yolov3.cfg文件,以字典形式追加每层的参数到layer_configs
Keyword argument:
cfg_file_path -- yolov3.cfg文件的路径
"""
with open(cfg_file_path, 'rb') as cfg_file:
remainder = cfg_file.read()
remainder = remainder.decode('utf-8') # 这一行for py3
while remainder:
# 一次次的去处理字符串,如果返回的layer_dict有值,则表示当前已经处理完一个字典
layer_dict, layer_name, remainder = self._next_layer(remainder)
if layer_dict:
self.layer_configs[layer_name] = layer_dict
return self.layer_configs
def _next_layer(self, remainder):
"""将其视为一个字符串,然后以DarkNet的分隔符来逐段处理.
在最近的分隔符之后,返回层参数和剩下的字符串
如文件中第一个Conv层 ...
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
... 会变成如下形式字典:
{'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
'001_convolutional' 是层名layer_name, 后续所有字符以remainder表示的字符串返回
Keyword argument:
remainder -- 仍需要处理的字符串
"""
# head,tail方式
# 读取'[',然后获取tail
remainder = remainder.split('[', 1)
if len(remainder) == 2:
remainder = remainder[1]
else:
return None, None, None
# 读取‘]’,然后获取tail
remainder = remainder.split(']', 1)
if len(remainder) == 2:
layer_type, remainder = remainder
else:
return None, None, None
# 过滤注释行
if remainder.replace(' ', '')[0] == '#':
remainder = remainder.split('
', 1)[1]
# 1空行视为分块的分隔符,这里读取head表示的分块
layer_param_block, remainder = remainder.split('
', 1)
# 处理得到的分块,并以'
'将该块划分成行为元素的列表,等待处理
layer_param_lines = layer_param_block.split('
')[1:]
layer_name = str(self.layer_counter).zfill(3) + '_' + layer_type # 当前块命名
layer_dict = dict(type=layer_type)
# 如果当前层是支持的,则进行处理,如yolo就不支持
if layer_type in self.supported_layers:
for param_line in layer_param_lines:
if param_line[0] == '#':
continue
# 解析每一行
param_type, param_value = self._parse_params(param_line)
layer_dict[param_type] = param_value
self.layer_counter += 1
return layer_dict, layer_name, remainder
def _parse_params(self, param_line):
"""解析每一行参数,当遇到layers时,返回list,其余返回字符串,整数,浮点数类型.
Keyword argument:
param_line -- 块中的一行需要解析的参数行
"""
param_line = param_line.replace(' ', '') # 紧凑一下
param_type, param_value_raw = param_line.split('=') # 以‘=’划分
param_value = None
# 如果当前参数是layers,则以列表形式返回
if param_type == 'layers':
layer_indexes = list()
for index in param_value_raw.split(','):
layer_indexes.append(int(index))
param_value = layer_indexes
# 否则先检测是否是整数,还是浮点数,不然就返回字符串类型
elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
condition_param_value_positive = param_value_raw.isdigit()
condition_param_value_negative = param_value_raw[0] == '-' and
param_value_raw[1:].isdigit()
if condition_param_value_positive or condition_param_value_negative:
param_value = int(param_value_raw)
else:
param_value = float(param_value_raw)
else:
param_value = str(param_value_raw)
return param_type, param_value
'''main第四步:被第三步类的_make_onnx_node方法调用 '''
class MajorNodeSpecs(object):
"""Helper class用于存储ONNX输出节点的信息,对应DarkNet 层的输出和该层输出通道,
一些DarkNet层并未被创建,因此没有对应的ONNX 节点,
不过仍然需要对其进行追踪以建立skip 连接
"""
def __init__(self, name, channels):
""" 初始化一个MajorNodeSpecs对象
Keyword arguments:
name -- ONNX节点的名称
channels -- 该节点的输出通道的数量
"""
self.name = name
self.channels = channels
# 对于yolov3.cfg中三层yolo层,这里表示该节点并非onnx节点,默认复制false
# 其他如卷积,上采样等都是被赋予true
self.created_onnx_node = False
if name is not None and isinstance(channels, int) and channels > 0:
self.created_onnx_node = True
'''main第四步:被第三步类的_make_conv_node方法调用 '''
class ConvParams(object):
"""Helper class用于存储卷积层的超参数,包括在ONNX graph中的前置name和
为了卷积,偏置,BN等权重期望的维度
另外该类还扮演着为所有权重生成安全名称的封装,并检查合适的组合搭配
"""
def __init__(self, node_name, batch_normalize, conv_weight_dims):
"""基于base 节点名称 (e.g. 101_convolutional),BN设置,卷积权重shape的构造器
Keyword arguments:
node_name -- YOLO卷积层的base名称
batch_normalize -- bool值,表示是否使用BN
conv_weight_dims -- 该层的卷积权重的维度
"""
self.node_name = node_name
self.batch_normalize = batch_normalize
assert len(conv_weight_dims) == 4
self.conv_weight_dims = conv_weight_dims
def generate_param_name(self, param_category, suffix):
"""基于两个字符串输入生成一个名称,并检查组合搭配是否合理"""
assert suffix
assert param_category in ['bn', 'conv']
assert(suffix in ['scale', 'mean', 'var', 'weights', 'bias'])
if param_category == 'bn':
assert self.batch_normalize
assert suffix in ['scale', 'bias', 'mean', 'var']
elif param_category == 'conv':
assert suffix in ['weights', 'bias']
if suffix == 'bias':
assert not self.batch_normalize
param_name = self.node_name + '_' + param_category + '_' + suffix
return param_name
'''man第四步:被第三步类的build_onnx_graph方法调用 '''
class WeightLoader(object):
"""Helper class用于载入序列化的权重,
"""
def __init__(self, weights_file_path):
"""读取YOLOv3权重文件
Keyword argument:
weights_file_path --权重文件的路径.
"""
self.weights_file = self._open_weights_file(weights_file_path)
def load_conv_weights(self, conv_params):
"""返回权重文件的初始化器和卷积层的输入tensor
Keyword argument:
conv_params -- a ConvParams object
"""
initializer = list()
inputs = list()
if conv_params.batch_normalize:
# 创建BN需要的bias,scale,mean,var等参数
bias_init, bias_input = self._create_param_tensors(
conv_params, 'bn', 'bias')
bn_scale_init, bn_scale_input = self._create_param_tensors(
conv_params, 'bn', 'scale')
bn_mean_init, bn_mean_input = self._create_param_tensors(
conv_params, 'bn', 'mean')
bn_var_init, bn_var_input = self._create_param_tensors(
conv_params, 'bn', 'var')
# 初始化器扩展; 当前层输入的扩展
initializer.extend(
[bn_scale_init, bias_init, bn_mean_init, bn_var_init])
inputs.extend([bn_scale_input, bias_input,
bn_mean_input, bn_var_input])
else:
# 处理卷积层; 初始化器扩展; 当前层输入的扩展
bias_init, bias_input = self._create_param_tensors(
conv_params, 'conv', 'bias')
initializer.append(bias_init)
inputs.append(bias_input)
# 创建卷积层权重; 初始化器扩展; 当前层输入的扩展
conv_init, conv_input = self._create_param_tensors(
conv_params, 'conv', 'weights')
initializer.append(conv_init)
inputs.append(conv_input)
return initializer, inputs
def _open_weights_file(self, weights_file_path):
"""打开Yolov3 DarkNet文件流,并跳过开头.
Keyword argument:
weights_file_path -- 权重文件路径
"""
weights_file = open(weights_file_path, 'rb')
length_header = 5
np.ndarray(
shape=(length_header, ), dtype='int32', buffer=weights_file.read(
length_header * 4))
return weights_file
def _create_param_tensors(self, conv_params, param_category, suffix):
"""用权重文件中,与输入tensors一起的权重去初始化一个初始化器.
Keyword arguments:
conv_params -- a ConvParams object
param_category -- the category of parameters to be created ('bn' or 'conv')
suffix -- a string determining the sub-type of above param_category (e.g.,
'weights' or 'bias')
"""
param_name, param_data, param_data_shape = self._load_one_param_type(
conv_params, param_category, suffix)
# 调用onnx.helper.make_tensor
initializer_tensor = helper.make_tensor(
param_name, TensorProto.FLOAT, param_data_shape, param_data)
# 调用onnx.helper.make_tensor_value_info
input_tensor = helper.make_tensor_value_info(
param_name, TensorProto.FLOAT, param_data_shape)
return initializer_tensor, input_tensor
def _load_one_param_type(self, conv_params, param_category, suffix):
"""基于DarkNet顺序进行文件流的反序列化.
Keyword arguments:
conv_params -- a ConvParams object
param_category -- the category of parameters to be created ('bn' or 'conv')
suffix -- a string determining the sub-type of above param_category (e.g.,
'weights' or 'bias')
"""
# 生成合理的名称
param_name = conv_params.generate_param_name(param_category, suffix)
channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
if param_category == 'bn':
param_shape = [channels_out]
elif param_category == 'conv':
if suffix == 'weights':
param_shape = [channels_out, channels_in, filter_h, filter_w]
elif suffix == 'bias':
param_shape = [channels_out]
param_size = np.product(np.array(param_shape)) # 计算参数的size
# 用weights_file.read去逐字节的读取数据并转换
param_data = np.ndarray(
shape=param_shape,
dtype='float32',
buffer=self.weights_file.read(param_size * 4))
param_data = param_data.flatten().astype(float)
return param_name, param_data, param_shape
'''main第三步 '''
class GraphBuilderONNX(object):
"""用于创建ONNX graph的类,基于之前从yolov3.cfg读取的网络结构。该类函数方法有:
build_onnx_graph : 构建
_make_onnx_node
_make_input_tensor
_get_previous_node_specs
_make_conv_node
_make_shortcut_node
_make_route_node
_make_upsample_node
"""
def __init__(self, output_tensors):
"""用所有DarkNet默认参数来初始化;
然后基于output_tensors指定输出维度;
以他们的name为key
Keyword argument:
output_tensors -- 一个 OrderedDict类型
"""
self.output_tensors = output_tensors
self._nodes = list()
self.graph_def = None
self.input_tensor = None
self.epsilon_bn = 1e-5
self.momentum_bn = 0.99
self.alpha_lrelu = 0.1
self.param_dict = OrderedDict()
self.major_node_specs = list()
self.batch_size = 1
def build_onnx_graph(
self,
layer_configs,
weights_file_path,
verbose=True):
"""基于所有的层配置进行迭代,创建一个ONNX graph,
然后用下载的yolov3 权重文件进行填充,最后返回该graph定义.
Keyword arguments:
layer_configs -- OrderedDict对象,包含所有解析的层的配置
weights_file_path -- 权重文件的位置
verbose -- 是否在创建之后显示该graph(default: True)
"""
for layer_name in layer_configs.keys():
layer_dict = layer_configs[layer_name]
# 读取yolov3.cfg中每一层,并将其作为onnx的节点
major_node_specs = self._make_onnx_node(layer_name, layer_dict)
# 如果当前为主要节点,则追加起来
if major_node_specs.name:
self.major_node_specs.append(major_node_specs)
outputs = list()
for tensor_name in self.output_tensors.keys():
# 将输出节点进行维度扩充
output_dims = [self.batch_size, ] +
self.output_tensors[tensor_name]
# 调用onnx的helper.make_tensor_value_info构建onnx张量,此时并未填充权重
output_tensor = helper.make_tensor_value_info(
tensor_name, TensorProto.FLOAT, output_dims)
outputs.append(output_tensor)
inputs = [self.input_tensor]
weight_loader = WeightLoader(weights_file_path)
initializer = list()
# self.param_dict在_make_onnx_node中已处理
for layer_name in self.param_dict.keys():
_, layer_type = layer_name.split('_', 1) # 如001_convolutional
conv_params = self.param_dict[layer_name]
assert layer_type == 'convolutional'
initializer_layer, inputs_layer = weight_loader.load_conv_weights(
conv_params)
initializer.extend(initializer_layer)
inputs.extend(inputs_layer)
del weight_loader
# 调用onnx的helper.make_graph进行onnx graph的构建
self.graph_def = helper.make_graph(
nodes=self._nodes,
name='YOLOv3-608',
inputs=inputs,
outputs=outputs,
initializer=initializer
)
if verbose:
print(helper.printable_graph(self.graph_def))
# 调用onnx的helper.make_model进行模型的构建
model_def = helper.make_model(self.graph_def,
producer_name='NVIDIA TensorRT sample')
return model_def
def _make_onnx_node(self, layer_name, layer_dict):
"""输入一个layer参数字典,选择对应的函数来创建ONNX节点,然后将为图创建的重要的信息存储为
MajorNodeSpec对象
Keyword arguments:
layer_name -- layer的名称 (即layer_configs中的key)
layer_dict -- 一个layer参数字典 (layer_configs的value)
"""
layer_type = layer_dict['type']
# 先检查self.input_tensor是否为空,为空且第一个块不是net,则报错,否则处理该net
# 可以看出 这里只在最开始执行一次,因为后续self.input_tensor都不为空。
if self.input_tensor is None:
if layer_type == 'net':
major_node_output_name, major_node_output_channels = self._make_input_tensor(
layer_name, layer_dict)
major_node_specs = MajorNodeSpecs(major_node_output_name,
major_node_output_channels)
else:
raise ValueError('The first node has to be of type "net".')
else:
node_creators = dict()
node_creators['convolutional'] = self._make_conv_node
node_creators['shortcut'] = self._make_shortcut_node
node_creators['route'] = self._make_route_node
node_creators['upsample'] = self._make_upsample_node
# 依次处理不同的层,并调用对应node_creators[layer_type]()函数进行处理
if layer_type in node_creators.keys():
major_node_output_name, major_node_output_channels =
node_creators[layer_type](layer_name, layer_dict)
major_node_specs = MajorNodeSpecs(major_node_output_name,
major_node_output_channels)
else:
# 跳过三个yolo层
print(
'Layer of type %s not supported, skipping ONNX node generation.' %
layer_type)
major_node_specs = MajorNodeSpecs(layer_name,
None)
return major_node_specs
def _make_input_tensor(self, layer_name, layer_dict):
"""为net layer创建输入tensor,并存储对应batch size.可以看出,该函数只被调用一次
Keyword arguments:
layer_name -- 层的名字 (如 layer_configs中key)
layer_dict -- 一个layer参数字典( layer_configs中的value)
"""
batch_size = layer_dict['batch']
channels = layer_dict['channels']
height = layer_dict['height']
width = layer_dict['width']
self.batch_size = batch_size
# 用onnx.helper.make_tensor_value_info构建onnx张量节点
input_tensor = helper.make_tensor_value_info(
str(layer_name), TensorProto.FLOAT, [
batch_size, channels, height, width])
self.input_tensor = input_tensor
return layer_name, channels
def _get_previous_node_specs(self, target_index=-1):
"""获取之前创建好的onnx节点(跳过那些没生成的节点,比如yolo节点).
target_index可以能够直接跳到对应节点.
Keyword arguments:
target_index -- 可选的参数,帮助跳到具体索引(default: -1 表示跳到前一个元素)
"""
# 通过反向遍历,找到最后一个(这里是第一个)created_onnx_node为真的节点
previous_node = None
for node in self.major_node_specs[target_index::-1]:
if node.created_onnx_node:
previous_node = node
break
assert previous_node is not None
return previous_node
def _make_conv_node(self, layer_name, layer_dict):
"""用可选的bn和激活函数nonde去创建一个onnx的卷积node
Keyword arguments:
layer_name -- 层的名字 (如 layer_configs中key)
layer_dict -- 一个layer参数字典( layer_configs中的value)
"""
# 先找最近的一个节点
previous_node_specs = self._get_previous_node_specs()
''' i) 处理卷积层'''
# 构建该层的inputs,通道等等信息
inputs = [previous_node_specs.name]
previous_channels = previous_node_specs.channels
kernel_size = layer_dict['size']
stride = layer_dict['stride']
filters = layer_dict['filters']
# 检测该层是否有bn
batch_normalize = False
if 'batch_normalize' in layer_dict.keys(
) and layer_dict['batch_normalize'] == 1:
batch_normalize = True
kernel_shape = [kernel_size, kernel_size]
weights_shape = [filters, previous_channels] + kernel_shape
# 构建卷积层的参数层的实例
conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
strides = [stride, stride]
dilations = [1, 1]
# 调用ConvParams.generate_param_name生成合适的参数名称
weights_name = conv_params.generate_param_name('conv', 'weights')
inputs.append(weights_name)
if not batch_normalize:
bias_name = conv_params.generate_param_name('conv', 'bias')
inputs.append(bias_name)
# 用onnx.helper.make_node构建onnx的卷积节点
conv_node = helper.make_node(
'Conv',
inputs=inputs,
outputs=[layer_name],
kernel_shape=kernel_shape,
strides=strides,
auto_pad='SAME_LOWER',
dilations=dilations,
name=layer_name
)
self._nodes.append(conv_node)
inputs = [layer_name]
layer_name_output = layer_name
''' ii) 处理BN层'''
if batch_normalize:
layer_name_bn = layer_name + '_bn'
bn_param_suffixes = ['scale', 'bias', 'mean', 'var']
for suffix in bn_param_suffixes:
bn_param_name = conv_params.generate_param_name('bn', suffix)
inputs.append(bn_param_name)
batchnorm_node = helper.make_node(
'BatchNormalization',
inputs=inputs,
outputs=[layer_name_bn],
epsilon=self.epsilon_bn,
momentum=self.momentum_bn,
name=layer_name_bn
)
self._nodes.append(batchnorm_node)
inputs = [layer_name_bn]
layer_name_output = layer_name_bn
''' iii) 处理激活函数'''
if layer_dict['activation'] == 'leaky':
layer_name_lrelu = layer_name + '_lrelu'
lrelu_node = helper.make_node(
'LeakyRelu',
inputs=inputs,
outputs=[layer_name_lrelu],
name=layer_name_lrelu,
alpha=self.alpha_lrelu
)
self._nodes.append(lrelu_node)
inputs = [layer_name_lrelu]
layer_name_output = layer_name_lrelu
elif layer_dict['activation'] == 'linear':
pass
else:
print('Activation not supported.')
self.param_dict[layer_name] = conv_params
return layer_name_output, filters
def _make_shortcut_node(self, layer_name, layer_dict):
"""从DarkNet graph中读取信息,基于onnx 的add 节点创建shortcut 节点.
Keyword arguments:
layer_name -- 层的名字 (如 layer_configs中key)
layer_dict -- 一个layer参数字典( layer_configs中的value)
"""
shortcut_index = layer_dict['from'] # 当前层与前面哪层shorcut
activation = layer_dict['activation']
assert activation == 'linear'
first_node_specs = self._get_previous_node_specs() # 最近一层
second_node_specs = self._get_previous_node_specs(
target_index=shortcut_index) # 前面具体需要shorcut的层
assert first_node_specs.channels == second_node_specs.channels
channels = first_node_specs.channels
inputs = [first_node_specs.name, second_node_specs.name]
# 用onnx.helper.make_node创建节点
shortcut_node = helper.make_node(
'Add',
inputs=inputs,
outputs=[layer_name],
name=layer_name,
)
self._nodes.append(shortcut_node)
return layer_name, channels
def _make_route_node(self, layer_name, layer_dict):
"""如果来自DarkNet配置的layer参数只有一个所以,那么接着在指定(负)索引上创建节点
否则,创建一个onnx concat 节点来实现路由特性.
Keyword arguments:
layer_name -- 层的名字 (如 layer_configs中key)
layer_dict -- 一个layer参数字典( layer_configs中的value)
"""
# 处理yolov3.cfg中[route]
route_node_indexes = layer_dict['layers']
if len(route_node_indexes) == 1:
split_index = route_node_indexes[0]
assert split_index < 0
# Increment by one because we skipped the YOLO layer:
split_index += 1
self.major_node_specs = self.major_node_specs[:split_index]
layer_name = None
channels = None
else:
inputs = list()
channels = 0
for index in route_node_indexes:
if index > 0:
# Increment by one because we count the input as a node (DarkNet
# does not)
index += 1
route_node_specs = self._get_previous_node_specs(
target_index=index)
inputs.append(route_node_specs.name)
channels += route_node_specs.channels
assert inputs
assert channels > 0
route_node = helper.make_node(
'Concat',
axis=1,
inputs=inputs,
outputs=[layer_name],
name=layer_name,
)
self._nodes.append(route_node)
return layer_name, channels
def _make_upsample_node(self, layer_name, layer_dict):
"""创建一个onnx的Upsample节点.
Keyword arguments:
layer_name -- 层的名字 (如 layer_configs中key)
layer_dict -- 一个layer参数字典( layer_configs中的value)
"""
upsample_factor = float(layer_dict['stride'])
previous_node_specs = self._get_previous_node_specs()
inputs = [previous_node_specs.name]
channels = previous_node_specs.channels
assert channels > 0
upsample_node = helper.make_node(
'Upsample',
mode='nearest',
# For ONNX versions <0.7.0, Upsample nodes accept different parameters than 'scales':
scales=[1.0, 1.0, upsample_factor, upsample_factor],
inputs=inputs,
outputs=[layer_name],
name=layer_name,
)
self._nodes.append(upsample_node)
return layer_name, channels
def generate_md5_checksum(local_path):
"""计算本地文件的md5
Keyword argument:
local_path -- 本地文件路径
"""
with open(local_path) as local_file:
data = local_file.read()
return hashlib.md5(data).hexdigest()
def download_file(local_path, link, checksum_reference=None):
"""下载指定url到本地,并进行摘要校对.
Keyword arguments:
local_path -- 本地文件存储路径
link -- 需要下载的url
checksum_reference -- expected MD5 checksum of the file
"""
if not os.path.exists(local_path):
print('Downloading from %s, this may take a while...' % link)
wget.download(link, local_path)
print()
if checksum_reference is not None:
checksum = generate_md5_checksum(local_path)
if checksum != checksum_reference:
raise ValueError(
'The MD5 checksum of local file %s differs from %s, please manually remove
the file and try again.' %
(local_path, checksum_reference))
return local_path
def main():
"""Run the DarkNet-to-ONNX conversion for YOLOv3-608."""
# 注释掉下面的部分,
# if sys.version_info[0] > 2:
# raise Exception("This is script is only compatible with python2, please re-run this script
# with python2. The rest of this sample can be run with either version of python")
''' 1 - 下载yolov3的配置文件,并进行摘要验证'''
cfg_file_path = download_file(
'yolov3.cfg',
'https://raw.githubusercontent.com/pjreddie/darknet/f86901f6177dfc6116360a13cc06ab680e0c86b0/cfg/yolov3.cfg',
'b969a43a848bbf26901643b833cfb96c')
# DarkNetParser将会只提取这些层的参数,类型为'yolo'的这三层不能很好的解析,
# 因为他们包含在后续的后处理中;
supported_layers = ['net', 'convolutional', 'shortcut',
'route', 'upsample']
''' 2 - 创建一个DarkNetParser对象,并生成一个OrderedDict,包含cfg文件读取的所有层配置'''
parser = DarkNetParser(supported_layers)
layer_configs = parser.parse_cfg_file(cfg_file_path)
# 在解析完之后,不再需要该对象
del parser
''' 3 - 实例化一个GraphBuilderONNX类对象,用已知输出tensor维度进行初始化'''
# 在上面的layer_config,有三个输出是需要知道的,CHW格式
output_tensor_dims = OrderedDict()
output_tensor_dims['082_convolutional'] = [255, 19, 19]
output_tensor_dims['094_convolutional'] = [255, 38, 38]
output_tensor_dims['106_convolutional'] = [255, 76, 76]
# 内置yolov3的一些默认参数来进行实例化
builder = GraphBuilderONNX(output_tensor_dims)
''' 4 - 调用GraphBuilderONNX的build_onnx_graph方法
用之前解析好的层配置信息和权重文件,生成ONNX graph'''
''' 从作者官网下载yolov3的权重文件,以此填充tensorrt的network '''
weights_file_path = download_file(
'yolov3.weights',
'https://pjreddie.com/media/files/yolov3.weights',
'c84e5b99d0e52cd466ae710cadf6d84c')
yolov3_model_def = builder.build_onnx_graph(
layer_configs=layer_configs,
weights_file_path=weights_file_path,
verbose=True)
# 模型定义结束,删除builder对象
del builder
''' 5 - 在ONNX模型定义上进行健全检查'''
onnx.checker.check_model(yolov3_model_def)
''' 6 - 序列化生成的ONNX graph到文件'''
output_file_path = 'yolov3.onnx'
onnx.save(yolov3_model_def, output_file_path)
if __name__ == '__main__':
main()
结果如下:
[root@30d4bceec4c4 yolov3_onnx]# python yolov3_to_onnx.py
Layer of type yolo not supported, skipping ONNX node generation.
Layer of type yolo not supported, skipping ONNX node generation.
Layer of type yolo not supported, skipping ONNX node generation.
graph YOLOv3-608 (
%000_net[FLOAT, 64x3x608x608]
) initializers (
%001_convolutional_bn_scale[FLOAT, 32]
%001_convolutional_bn_bias[FLOAT, 32]
%001_convolutional_bn_mean[FLOAT, 32]
%001_convolutional_bn_var[FLOAT, 32]
%001_convolutional_conv_weights[FLOAT, 32x3x3x3]
%002_convolutional_bn_scale[FLOAT, 64]
......
%105_convolutional_conv_weights[FLOAT, 256x128x3x3]
%106_convolutional_conv_bias[FLOAT, 255]
%106_convolutional_conv_weights[FLOAT, 255x256x1x1]
) {
%001_convolutional = Conv[auto_pad = 'SAME_LOWER', dilations = [1, 1], kernel_shape = [3, 3], strides = [1, 1]](%000_net, %001_convolutional_conv_weights)
%001_convolutional_bn = BatchNormalization[epsilon = 9.99999974737875e-06, momentum = 0.990000009536743](%001_convolutional,
......
%105_convolutional_bn = BatchNormalization[epsilon = 9.99999974737875e-06, momentum = 0.990000009536743](%105_convolutional, %105_convolutional_bn_scale, %105_convolutional_bn_bias, %105_convolutional_bn_mean, %105_convolutional_bn_var)
%105_convolutional_lrelu = LeakyRelu[alpha = 0.100000001490116](%105_convolutional_bn)
%106_convolutional = Conv[auto_pad = 'SAME_LOWER', dilations = [1, 1], kernel_shape = [1, 1], strides = [1, 1]](%105_convolutional_lrelu, %106_convolutional_conv_weights, %106_convolutional_conv_bias)
return %082_convolutional, %094_convolutional, %106_convolutional
}
ps:在该例子中onnx不要安装1.4.1版本,可以安装如1.2.1版本,否则会出现
此时会生成文件yolov3.onnx。
3 onnx转trt并进行inference
接下来看onnx_to_tensorrt.py
from __future__ import print_function
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import ImageDraw
from yolov3_to_onnx import download_file
from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES
import sys, os
#sys.path.insert(1, os.path.join(sys.path[0], ".."))
#import common
'''main第3.2步 '''
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# 分配host和device端的buffer
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# 将device端的buffer追加到device的bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
'''main中第3.3步 '''
# 该函数可以适应多个输入/输出;输入和输出格式为HostDeviceMem对象组成的列表
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
# 将数据移动到GPU
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# 执行inference.
context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
# 将结果从 GPU写回到host端
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# 同步stream
stream.synchronize()
# 返回host端的输出结果
return [out.host for out in outputs]
#------------
TRT_LOGGER = trt.Logger()
def draw_bboxes(image_raw, bboxes, confidences, categories, all_categories, bbox_color='blue'):
"""在原始输入图片上标记bounding box没然后返回结果.
Keyword arguments:
image_raw -- a raw PIL Image
bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
categories -- NumPy array containing the corresponding category for each object,
with shape (N,)
confidences -- NumPy array containing the corresponding confidence for each object,
with shape (N,)
all_categories -- a list of all categories in the correct ordered (required for looking up
the category name)
bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
"""
draw = ImageDraw.Draw(image_raw)
print(bboxes, confidences, categories)
for box, score, category in zip(bboxes, confidences, categories):
x_coord, y_coord, width, height = box
left = max(0, np.floor(x_coord + 0.5).astype(int))
top = max(0, np.floor(y_coord + 0.5).astype(int))
right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))
draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
draw.text((left, top - 12), '{0} {1:.2f}'.format(all_categories[category], score), fill=bbox_color)
return image_raw
def get_engine(onnx_file_path, engine_file_path=""):
"""如果已经有序列化engine,则直接用,否则构建新的tensorrt engine然后保存."""
# 闭包
def build_engine():
"""Takes an ONNX file and creates a TensorRT engine to run inference with"""
with trt.Builder(TRT_LOGGER) as builder,
builder.create_network() as network,
trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = 1 << 30 # 1GB
builder.max_batch_size = 1
# 解析模型文件
if not os.path.exists(onnx_file_path):
print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
exit(0)
print('Loading ONNX file from path {}...'.format(onnx_file_path))
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print("Completed creating Engine")
with open(engine_file_path, "wb") as f:
f.write(engine.serialize())
return engine
if os.path.exists(engine_file_path):
# 如果序列化engine已经存在,那么就直接跳过构建部分.
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f,
trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())
else:
return build_engine()
def main():
"""Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
''' 1 - 装载之前转换好的onnx,准备测试图片'''
onnx_file_path = 'yolov3.onnx'
engine_file_path = "yolov3.trt"
# 下载测试图片
input_image_path = download_file('dog.jpg',
'https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg', checksum_reference=None)
''' 2 - 对图片进行预处理'''
# yolov3网络的输入size,HW顺序
input_resolution_yolov3_HW = (608, 608)
# 创建一个预处理来处理任意图片,以符合yolov3的输入
preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
# 载入图像,并进行预处理
image_raw, image = preprocessor.process(input_image_path)
# 将该预处理好的图像以WH格式存储,以备后续使用
shape_orig_WH = image_raw.size
''' 3 - 基于tensorrt进行yolov3模型的运行'''
# yolov3输出的三个map的shapeOutput shapes expected by the post-processor
output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
# 用 TensorRT进行inference
trt_outputs = []
''' 3.1 - 基于get_engine生成engine'''
with get_engine(onnx_file_path, engine_file_path) as engine,
engine.create_execution_context() as context:
''' 3.2 - 分配host,device端的buffer'''
inputs, outputs, bindings, stream = allocate_buffers(engine)
print('Running inference on image {}...'.format(input_image_path))
''' 3.3 - 进行inference'''
inputs[0].host = image
trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
''' 4 - 对tensorrt在onnx结构的yolov3上得到的结果进行后处理'''
trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]
postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],
"yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
(59, 119), (116, 90), (156, 198), (373, 326)],
"obj_threshold": 0.6, # 对象覆盖的阈值,[0,1]之间
"nms_threshold": 0.5, # nms的阈值,[0,1]之间
"yolo_input_resolution": input_resolution_yolov3_HW}
# 创建后处理类的实例
postprocessor = PostprocessYOLO(**postprocessor_args)
# 运行后处理算法,并得到检测到对象的bounding box
boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
''' 5 - 在原始输入图像上将检测框标记,并保存png文件 '''
obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
output_image_path = 'dog_bboxes.png'
obj_detected_img.save(output_image_path, 'PNG')
print('Saved image with bounding boxes of detected objects to {}.'.format(output_image_path))
if __name__ == '__main__':
main()
运行程序及结果:
python onnx_to_tensorrt.py
此时文件目录为:
.
├── coco_labels.txt
├── data_processing.py
├── dog_bboxes.png
├── dog.jpg
├── onnx_to_tensorrt.py
├── __pycache__
│ ├── data_processing.cpython-35.pyc
│ └── yolov3_to_onnx.cpython-35.pyc
├── README.md
├── requirements.txt
├── yolov3.cfg
├── yolov3.onnx
├── yolov3_to_onnx.py
├── yolov3.trt
└── yolov3.weights
最后我们来看下data_processing.py
import math
from PIL import Image
import numpy as np
# YOLOv3-608 has been trained with these 80 categories from COCO:
# Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context."
# European Conference on Computer Vision. Springer, Cham, 2014.
def load_label_categories(label_file_path):
categories = [line.rstrip('
') for line in open(label_file_path)]
return categories
LABEL_FILE_PATH = 'coco_labels.txt'
ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)
# 确定有80个类别:
CATEGORY_NUM = len(ALL_CATEGORIES)
assert CATEGORY_NUM == 80
class PreprocessYOLO(object):
"""装载图像,然后reshape成yolov3-608需要的分辨率.
"""
def __init__(self, yolo_input_resolution):
"""指定yolov3的输入分辨率.
Keyword arguments:
yolo_input_resolution -- two-dimensional tuple with the target network's (spatial)
input resolution in HW order
"""
self.yolo_input_resolution = yolo_input_resolution
def process(self, input_image_path):
"""载入图像,然后进行预处理,如resize,归一化等等
Keyword arguments:
input_image_path -- string path of the image to be loaded
"""
image_raw, image_resized = self._load_and_resize(input_image_path)
image_preprocessed = self._shuffle_and_normalize(image_resized)
return image_raw, image_preprocessed
def _load_and_resize(self, input_image_path):
"""对图像进行resize,然后返回numpy对象
Keyword arguments:
input_image_path -- string path of the image to be loaded
"""
image_raw = Image.open(input_image_path)
new_resolution = (
self.yolo_input_resolution[1],
self.yolo_input_resolution[0])
image_resized = image_raw.resize(
new_resolution, resample=Image.BICUBIC)
image_resized = np.array(image_resized, dtype=np.float32, order='C')
return image_raw, image_resized
def _shuffle_and_normalize(self, image):
"""将图像归一化到[0,1]之间,然后将HWC结构转换成NCHW结构
Keyword arguments:
image -- image as three-dimensional NumPy float array, in HWC format
"""
image /= 255.0
# HWC -> CHW :
image = np.transpose(image, [2, 0, 1])
# CHW -> NCHW
image = np.expand_dims(image, axis=0)
# j将图像转换成row-major order,如 "C order":
image = np.array(image, dtype=np.float32, order='C')
return image
class PostprocessYOLO(object):
"""后处理yolov3-608的三个输出tensor."""
def __init__(self,
yolo_masks,
yolo_anchors,
obj_threshold,
nms_threshold,
yolo_input_resolution):
"""Initialize with all values that will be kept when processing several frames.
Assuming 3 outputs of the network in the case of (large) YOLOv3.
Keyword arguments:
yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
object_threshold -- threshold for object coverage, float value between 0 and 1
nms_threshold -- threshold for non-max suppression algorithm,
float value between 0 and 1
input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
input resolution in HW order
"""
self.masks = yolo_masks
self.anchors = yolo_anchors
self.object_threshold = obj_threshold
self.nms_threshold = nms_threshold
self.input_resolution_yolo = yolo_input_resolution
def process(self, outputs, resolution_raw):
"""Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
and return a list of bounding boxes for detected object together with their category
and their confidences in separate lists.
Keyword arguments:
outputs -- outputs from a TensorRT engine in NCHW format
resolution_raw -- the original spatial resolution from the input PIL image in WH order
"""
outputs_reshaped = list()
for output in outputs:
outputs_reshaped.append(self._reshape_output(output))
boxes, categories, confidences = self._process_yolo_output(
outputs_reshaped, resolution_raw)
return boxes, categories, confidences
def _reshape_output(self, output):
"""Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
and then return it in (height,width,3,85) dimensionality after further reshaping.
Keyword argument:
output -- an output from a TensorRT engine after inference
"""
output = np.transpose(output, [0, 2, 3, 1])
_, height, width, _ = output.shape
dim1, dim2 = height, width
dim3 = 3
# There are CATEGORY_NUM=80 object categories:
dim4 = (4 + 1 + CATEGORY_NUM)
return np.reshape(output, (dim1, dim2, dim3, dim4))
def _process_yolo_output(self, outputs_reshaped, resolution_raw):
"""Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
return a list of bounding boxes for detected object together with their category and their
confidences in separate lists.
Keyword arguments:
outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
with shape (height,width,3,85)
resolution_raw -- the original spatial resolution from the input PIL image in WH order
"""
# E.g. in YOLOv3-608, there are three output tensors, which we associate with their
# respective masks. Then we iterate through all output-mask pairs and generate candidates
# for bounding boxes, their corresponding category predictions and their confidences:
boxes, categories, confidences = list(), list(), list()
for output, mask in zip(outputs_reshaped, self.masks):
box, category, confidence = self._process_feats(output, mask)
box, category, confidence = self._filter_boxes(box, category, confidence)
boxes.append(box)
categories.append(category)
confidences.append(confidence)
boxes = np.concatenate(boxes)
categories = np.concatenate(categories)
confidences = np.concatenate(confidences)
# Scale boxes back to original image shape:
width, height = resolution_raw
image_dims = [width, height, width, height]
boxes = boxes * image_dims
# Using the candidates from the previous (loop) step, we apply the non-max suppression
# algorithm that clusters adjacent bounding boxes to a single bounding box:
nms_boxes, nms_categories, nscores = list(), list(), list()
for category in set(categories):
idxs = np.where(categories == category)
box = boxes[idxs]
category = categories[idxs]
confidence = confidences[idxs]
keep = self._nms_boxes(box, confidence)
nms_boxes.append(box[keep])
nms_categories.append(category[keep])
nscores.append(confidence[keep])
if not nms_categories and not nscores:
return None, None, None
boxes = np.concatenate(nms_boxes)
categories = np.concatenate(nms_categories)
confidences = np.concatenate(nscores)
return boxes, categories, confidences
def _process_feats(self, output_reshaped, mask):
"""Take in a reshaped YOLO output in height,width,3,85 format together with its
corresponding YOLO mask and return the detected bounding boxes, the confidence,
and the class probability in each cell/pixel.
Keyword arguments:
output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
mask -- 2-dimensional tuple with mask specification for this output
"""
# Two in-line functions required for calculating the bounding box
# descriptors:
def sigmoid(value):
"""Return the sigmoid of the input."""
return 1.0 / (1.0 + math.exp(-value))
def exponential(value):
"""Return the exponential of the input."""
return math.exp(value)
# Vectorized calculation of above two functions:
sigmoid_v = np.vectorize(sigmoid)
exponential_v = np.vectorize(exponential)
grid_h, grid_w, _, _ = output_reshaped.shape
anchors = [self.anchors[i] for i in mask]
# Reshape to N, height, width, num_anchors, box_params:
anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
box_xy = sigmoid_v(output_reshaped[..., :2])
box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
box_confidence = sigmoid_v(output_reshaped[..., 4])
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = sigmoid_v(output_reshaped[..., 5:])
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy /= (grid_w, grid_h)
box_wh /= self.input_resolution_yolo
box_xy -= (box_wh / 2.)
boxes = np.concatenate((box_xy, box_wh), axis=-1)
# boxes: centroids, box_confidence: confidence level, box_class_probs:
# class confidence
return boxes, box_confidence, box_class_probs
def _filter_boxes(self, boxes, box_confidences, box_class_probs):
"""Take in the unfiltered bounding box descriptors and discard each cell
whose score is lower than the object threshold set during class initialization.
Keyword arguments:
boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
x,y,height,width coordinates of the boxes
box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
confidence scalar per element
box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
"""
box_scores = box_confidences * box_class_probs
box_classes = np.argmax(box_scores, axis=-1)
box_class_scores = np.max(box_scores, axis=-1)
pos = np.where(box_class_scores >= self.object_threshold)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
return boxes, classes, scores
def _nms_boxes(self, boxes, box_confidences):
"""Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
confidence scores and return an array with the indexes of the bounding boxes we want to
keep (and display later).
Keyword arguments:
boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
with shape (N,4); 4 for x,y,height,width coordinates of the boxes
box_confidences -- a Numpy array containing the corresponding confidences with shape N
"""
x_coord = boxes[:, 0]
y_coord = boxes[:, 1]
width = boxes[:, 2]
height = boxes[:, 3]
areas = width * height
ordered = box_confidences.argsort()[::-1]
keep = list()
while ordered.size > 0:
# Index of the current element:
i = ordered[0]
keep.append(i)
xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]])
yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]])
width1 = np.maximum(0.0, xx2 - xx1 + 1)
height1 = np.maximum(0.0, yy2 - yy1 + 1)
intersection = width1 * height1
union = (areas[i] + areas[ordered[1:]] - intersection)
# Compute the Intersection over Union (IoU) score:
iou = intersection / union
# The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
# candidates to a minimum. In this step, we keep only those elements whose overlap
# with the current bounding box is lower than the threshold:
indexes = np.where(iou <= self.nms_threshold)[0]
ordered = ordered[indexes + 1]
keep = np.array(keep)
return keep