最近标注,数据集一直很头疼,所以想着预训练下然后自动生成再微调数据集会省事很多,
虽然达不到很好的效果,自动标注的情况主要还是看识别的准确率怎么样
下面做下笔记记录,将预测出来的类别直接生成xml文件,省去重复标注的过程,只需要手动微调就行
import cv2
import numpy as np
import time,os
import xml.dom.minidom
import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
FONT = cv2.FONT_HERSHEY_PLAIN
# 定义检测的置信阈值.
conf_thresh = 0.5
time_start = time.time()
class xmlvalue():
def __init__(self,fname,folder,path,img):
"""
:param fname: 图片名称
:param folder: 父级文件夹
:param p: 图片路径
:param img: 图片
#:param label: 类别词典,(类别:标注框)
"""
self.filname = fname
self.folder = folder
self.path = path
self.Img = img
def setlabel(self, label):
self.Label = label
# -------------------------类别-----------------------
def labelobject(doc,DOCUMENT,labelname,coor):
object = doc.createElement('object')
# object.appendChild(doc.createTextNode("JPEGImages"))
DOCUMENT.appendChild(object)
name = doc.createElement('name')
name.appendChild(doc.createTextNode(str(labelname)))
object.appendChild(name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode("Unspecified"))
object.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode("0"))
object.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode("0"))
object.appendChild(difficult)
# """------------------*******坐标*******---------------------------"""
bndbox = doc.createElement('bndbox')
object.appendChild(bndbox)
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(str(coor[1])))
bndbox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(str(coor[2])))
bndbox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(str(coor[3])))
bndbox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(str(coor[4])))
bndbox.appendChild(ymax)
return object
def creatxml(Cxml):
doc = Document() # 创建DOM文档对象
DOCUMENT = doc.createElement('annotation') # 创建根元素
# DOCUMENT.setAttribute('content_method',"full")#设置命名空间
# DOCUMENT.setAttribute('xsi:noNamespaceSchemaLocation','DOCUMENT.xsd')#引用本地XML Schema
doc.appendChild(DOCUMENT)
############item:Python处理XML之Minidom################
# 创建节点
folder = doc.createElement('folder')
# 添加值
folder.appendChild(doc.createTextNode(Cxml.folder))
# 添加至父节点中
DOCUMENT.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode(Cxml.filname))
DOCUMENT.appendChild(filename)
path = doc.createElement('path')
path.appendChild(doc.createTextNode(Cxml.path))
DOCUMENT.appendChild(path)
"""
<source>
<database>Unknown</database>
</source>
"""
"""----------------***source***-----------------------"""
source = doc.createElement('source')
DOCUMENT.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode("Unknown"))
source.appendChild(database)
"""----------------***size***-----------------------"""
size = doc.createElement('size')
DOCUMENT.appendChild(size)
imgshape = Cxml.Img.shape
width = doc.createElement('width')
width.appendChild(doc.createTextNode(str(imgshape[1])))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(str(imgshape[0])))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(str(imgshape[2])))
size.appendChild(depth)
"""
<segmented>0</segmented>
"""
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode("0"))
DOCUMENT.appendChild(segmented)
for value in Cxml.Label:
DOCUMENT.appendChild(labelobject(doc,DOCUMENT, value[0],value))
########### 将DOM对象doc写入文件
xmlname = Cxml.path[:-3]+"xml"
print("save to :" ,xmlname)
f = open(xmlname, 'w')
# f.write(doc.toprettyxml(indent = ' ', newl = '
', encoding = 'utf-8'))
doc.writexml(f, indent=' ', newl='
', addindent=' ', encoding='utf-8')
f.close()
def readimg(frame,Cxml):
frame_id = 0
frame_id += 1
# frame = cv2.resize(frame, (640, 480))
blob = cv2.dnn.blobFromImage(frame, 1 / 255, (416, 416), swapRB=True)
net.setInput(blob)
outs = net.forward(output_layers)
confidences = []
class_ids = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > conf_thresh:
height, width, channels = frame.shape
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
confidences.append(float(confidence))
class_ids.append(class_id)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, 0.4)
mdict = []
for i in range(len(boxes)):
# If the box remained after NMS.
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
mlist = [label,x,y,x+w,y+h]
mdict.append(mlist)
confidence = confidences[i]
color = colors[class_ids[i]]
# Draw the box.
cv2.rectangle(frame, (x, y), (x + w, y + h), (255,0,0), 2)
cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2)
Cxml.setlabel(mdict)
creatxml(Cxml)
elapsed_time = time.time() - time_start
fps = frame_id / elapsed_time
cv2.putText(frame, "FPS: " + str(round(fps, 2)), (8, 30), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 2)
# cv2.imshow("Camera", frame)
# cv2.waitKey(2)
if __name__ == '__main__':
JPEGpath = 'image'
# 测试图片,
path = os.getcwd()
path = os.path.join(path,JPEGpath)
net = cv2.dnn.readNet("./model/yolov4-tiny.weights", "./model/yolov4-tiny.cfg")
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
with open("./model/coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in os.listdir(path): if i.endswith('jpg') or i.endswith('png'): zpath = os.path.join(path, i) img = cv2.imread(zpath) # newpath = zpath.replace('test', 'voc2007de') Cxml = xmlvalue(i,JPEGpath,zpath,img) readimg(img,Cxml) # rtsp = "rtsp://admin:cqits12345@192.168.1.108/cam/realmonitor?channel=1&subtype=0" # # Initialise a video capture object with the first camera. # cap = cv2.VideoCapture("hat.mp4") # fps = int(cap.get(cv2.CAP_PROP_FPS)) # width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # # fourcc = cv2.VideoWriter_fourcc(*'XVID') # # videoWriter = cv2.VideoWriter('11.avi', fourcc, int(fps), (int(width), int(height))) # sucess, frame = cap.read() # while sucess: # sucess, frame = cap.read() # readimg(frame) # cv2.waitKey(2)