# It is based on the OpenCV project. import cv2 as cv import argparse import sys import numpy as np import os.path # Initialize the parameters confThreshold = 0.5 # Confidence threshold nmsThreshold = 0.4 # Non-maximum suppression threshold inpWidth = 416 # Width of network's input image inpHeight = 416 # Height of network's input image parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV') parser.add_argument('--image', help='Path to image file.') parser.add_argument('--video',default='test3_video.mp4', help='Path to video file.') args = parser.parse_args() # Load names of classes classesFile = "coco.names"; classes = None with open(classesFile, 'rt') as f: classes = f.read().rstrip(' ').split(' ') # Give the configuration and weight files for the model and load the network using them. modelConfiguration = "yolov3.cfg"; modelWeights = "yolov3.weights"; net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) # Get the names of the output layers def getOutputsNames(net): # Get the names of all the layers in the network layersNames = net.getLayerNames() # Get the names of the output layers, # i.e. the layers with unconnected outputs return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()] # Draw the predicted bounding box def drawPred(classId, conf, left, top, right, bottom): # Draw a bounding box. # print(classId) cv.rectangle(frame, (left, top), (right, bottom), (int(classId)*66+10, int(classId)*33+int(classId)*10-10+33, int(classId)*50+85), 3) label = '%.2f' % conf # Get the label for the class name and its confidence if classes: assert (classId < len(classes)) label = '%s:%s' % (classes[classId], label) # Display the label at the top of the bounding box labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) top = max(top, labelSize[1]) cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED) cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 0), 1) # Remove the bounding boxes with low confidence using nms def postprocess(frame, outs): frameHeight = frame.shape[0] frameWidth = frame.shape[1] classIds = [] confidences = [] boxes = [] # Scan through all the bounding boxes output from the network and # keep only the ones with high confidence scores. # Assign the box's class label as the class with the highest score. classIds = [] confidences = [] boxes = [] for out in outs: for detection in out: scores = detection[5:] classId = np.argmax(scores) confidence = scores[classId] if confidence > confThreshold: center_x = int(detection[0] * frameWidth) center_y = int(detection[1] * frameHeight) width = int(detection[2] * frameWidth) height = int(detection[3] * frameHeight) left = int(center_x - width / 2) top = int(center_y - height / 2) classIds.append(classId) confidences.append(float(confidence)) boxes.append([left, top, width, height]) # Perform nms to eliminate redundant overlapping boxes with # lower confidences. indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) for i in indices: i = i[0] box = boxes[i] left = box[0] top = box[1] width = box[2] height = box[3] drawPred(classIds[i], confidences[i], left, top, left + width, top + height) def postprocess2(frame, outs): frameHeight = frame.shape[0] frameWidth = frame.shape[1] classIds = [] confidences = [] boxes = [] # Scan through all the bounding boxes output from the network and # keep only the ones with high confidence scores. # Assign the box's class label as the class with the highest score. classIds = [] confidences = [] boxes = [] for out in outs: for detection in out: scores = detection[5:] classId = np.argmax(scores) confidence = scores[classId] if confidence > confThreshold: center_x = int(detection[0] * frameWidth) center_y = int(detection[1] * frameHeight) width = int(detection[2] * frameWidth) height = int(detection[3] * frameHeight) left = int(center_x - width / 2) top = int(center_y - height / 2) classIds.append(classId) confidences.append(float(confidence)) boxes.append([left, top, width, height]) # Perform nms to eliminate redundant overlapping boxes with # lower confidences. indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) num=[0,0,0,0,0,0] person = 0 car=0 motorbike=0 bus=0 bicycle=0 for i in indices: i = i[0] box = boxes[i] left = box[0] top = box[1] width = box[2] height = box[3] print(str(classes[classIds[i]])) if(str(classes[classIds[i]])=="person"): person+=1 num=[person,car,motorbike,bus,bicycle] if(str(classes[classIds[i]])=="car"): car+=1 num=[person,car,motorbike,bus,bicycle] if (str(classes[classIds[i]]) == "motorbike"): motorbike += 1 num = [person, car, motorbike, bus,bicycle] if (str(classes[classIds[i]]) == "bus"): bus += 1 num = [person, car, motorbike, bus,bicycle] if (str(classes[classIds[i]]) == "bicycle"): bicycle += 1 num = [person, car, motorbike, bus,bicycle] return num # Process inputs winName = 'Deep learning object detection in OpenCV' cv.namedWindow(winName, cv.WINDOW_NORMAL) outputFile = "yolo_out_py.avi" if (args.image): # Open the image file if not os.path.isfile(args.image): print("Input image file ", args.image, " doesn't exist") sys.exit(1) cap = cv.VideoCapture(args.image) outputFile = args.image[:-4] + '_yolo_out_py.jpg' elif (args.video): # Open the video file if not os.path.isfile(args.video): print("Input video file ", args.video, " doesn't exist") sys.exit(1) cap = cv.VideoCapture(args.video) outputFile = args.video[:-4] + 'test_video_out.avi' else: # Webcam input cap = cv.VideoCapture(0) # Get the video writer initialized to save the output video if (not args.image): vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)), round(cap.get(cv.CAP_PROP_FRAME_HEIGHT)))) while cv.waitKey(1) < 0: # get frame from the video hasFrame, frame = cap.read() # Stop the program if reached end of video if not hasFrame: print("Done processing !!!") print("Output file is stored as ", outputFile) cv.waitKey(3000) # Release device cap.release() break # Create a 4D blob from a frame. blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False) # Sets the input to the network net.setInput(blob) # Runs the forward pass to get output of the output layers outs = net.forward(getOutputsNames(net)) # Remove the bounding boxes with low confidence postprocess(frame, outs) # Put efficiency information. # The function getPerfProfile returns the overall time for inference(t) # and the timings for each of the layers(in layersTimes) t, _ = net.getPerfProfile() label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) label = 'Person num:%s' % (postprocess2(frame,outs)[0]) cv.putText(frame, label, (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0)) label = 'Car num:%s' % (postprocess2(frame, outs)[1]) cv.putText(frame, label, (0, 45), cv.FONT_HERSHEY_SIMPLEX, 0.5, (240, 255, 255)) label = 'Motorbike num:%s' % (postprocess2(frame, outs)[2]) cv.putText(frame, label, (0, 60), cv.FONT_HERSHEY_SIMPLEX, 0.5, (245, 245, 245)) label = 'Bus num:%s' % (postprocess2(frame, outs)[3]) cv.putText(frame, label, (0, 75), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 97, 0)) label = 'Bicycle num:%s' % (postprocess2(frame, outs)[4]) cv.putText(frame, label, (0, 90), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 230, 201)) # Write the frame with the detection boxes if (args.image): cv.imwrite(outputFile, frame.astype(np.uint8)); else: vid_writer.write(frame.astype(np.uint8)) cv.imshow(winName, frame)
截图: