yolov5测试
import time start = time.time() import numpy as np import os import six.moves.urllib as urllib import sys import tarfile import socket import zipfile from collections import defaultdict from io import StringIO from PIL import Image import pandas as pd #import cv2 ################################################# import argparse import time from pathlib import Path import cv2 import torch import torch.backends.cudnn as cudnn from numpy import random from models.experimental import attempt_load from utils.datasets import LoadStreams,LoadStreams2, LoadImages,LoadWebcam,letterbox from utils.general import check_img_size, check_requirements, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path from utils.plots import plot_one_box,plot_one_box2 from utils.torch_utils import select_device, load_classifier, time_synchronized import numpy as np import pyrealsense2 as rs import math ################################################# class AppState: def __init__(self, *args, **kwargs): self.WIN_NAME = 'RealSense' self.pitch, self.yaw = math.radians(-10), math.radians(-15) self.translation = np.array([0, 0, -1], dtype=np.float32) self.distance = 2 self.prev_mouse = 0, 0 self.mouse_btns = [False, False, False] self.paused = False self.decimate = 1 self.scale = True self.color = True def reset(self): self.pitch, self.yaw, self.distance = 0, 0, 2 self.translation[:] = 0, 0, -1 def rotation(self): Rx, _ = cv2.Rodrigues((self.pitch, 0, 0)) Ry, _ = cv2.Rodrigues((0, self.yaw, 0)) return np.dot(Ry, Rx).astype(np.float32) def pivot(self): return self.translation + np.array((0, 0, self.distance), dtype=np.float32) # Helper code def load_image_into_numpy_array(image_param): img_shape = image_param.shape im_width = img_shape[1] im_height = img_shape[0] image_param1 = Image.fromarray(image_param.astype('uint8')).convert('RGB') #(im_width, im_height) = image_param.size #im_width = 640 #im_height = 480 return np.array(image_param1.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) def bytesToHexString(bs): return ''.join(['%02X' % b for b in bs]) class listDlg(): def __init__(self, name="Dlg"): # 构造函数 super().__init__() self.cnt_tmp = 0 #init camera id self.CAM_ID = 1 #init QTimer #self.timer_camera = QtCore.QTimer() self.timer_camera = time.time() #init camera self.camera = cv2.VideoCapture() #self.initConnect() self.device = select_device('') # half precision only supported on CUDA self.half = self.device.type != 'cpu' #self.modelc = load_classifier(name='resnet101', n=2) # initialize #self.modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=self.device)['model']).to(self.device).eval() # load FP32 model #self.model = attempt_load('yolov5s.pt', map_location=device) self.model = attempt_load('yolov5s.pt', map_location=self.device) self.conf_thres=0.25 self.iou_thres=0.45 self.classes='' self.agnostic=False self.augment = False # Get names and colors self.names = self.model.module.names if hasattr(self.model, 'module') else self.model.names self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] self.img_size = 640 self.checkObj=False #################realsense############### # Configure depth and color streams self.pipeline = rs.pipeline() self.config = rs.config() self.config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) self.config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) self.state = AppState() self.socket_client = socket.socket(socket.AF_INET,socket.SOCK_STREAM) self.rec_code = self.socket_client.connect_ex(("192.168.2.200",9000)) # Start streaming #self.pipeline.start(self.config) # Get stream profile and camera intrinsics #self.profile = self.pipeline.get_active_profile() #self.depth_profile = rs.video_stream_profile(self.profile.get_stream(rs.stream.depth)) #self.depth_intrinsics = self.depth_profile.get_intrinsics() #self.w, self.h = self.depth_intrinsics.width, self.depth_intrinsics.height # Processing blocks #self.pc = rs.pointcloud() #self.decimate = rs.decimation_filter() ##self.decimate.set_option(rs.option.filter_magnitude, 2 ** state.decimate) #self.colorizer = rs.colorizer() self.pc = rs.pointcloud() self.decimate = rs.decimation_filter() #self.decimate.set_option(rs.option.filter_magnitude, 2 ** self.state.decimate) self.colorizer = rs.colorizer() #cv2.setMouseCallback(self.state.WIN_NAME, self.mouse_cb) self.pipeline.start(self.config) self.show_camera_action() def show_camera_action(self): while True: #flag, self.image = self.camera.read() frames = self.pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() depth_frame = self.decimate.process(depth_frame) # Grab new intrinsics (may be changed by decimation) depth_intrinsics = rs.video_stream_profile(depth_frame.profile).get_intrinsics() self.w, self.h = depth_intrinsics.width, depth_intrinsics.height depth_image = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) depth_colormap = np.asanyarray(self.colorizer.colorize(depth_frame).get_data()) if self.state.color: mapped_frame, color_source = color_frame, color_image else: mapped_frame, color_source = depth_frame, depth_colormap #mapped_frame, color_source = depth_frame, depth_colormap points = self.pc.calculate(depth_frame) self.pc.map_to(mapped_frame) # Pointcloud data to arrays v, t = points.get_vertices(), points.get_texture_coordinates() verts = np.asanyarray(v).view(np.float32).reshape(-1, 3) # xyz texcoords = np.asanyarray(t).view(np.float32).reshape(-1, 2) # uv #npy_vtx = np.zeros((len(verts), 3), float) #npy_vtx[i][0] = np.float(vtx[i][0]) #print("======================================") #print(verts[26800][0],verts[26800][1],verts[26800][2]) #print(verts[6800][0],verts[6800][1],verts[6800][2]) rec_data=self.socket_client.recv(1024) str1=bytesToHexString(rec_data) if str1.startswith('01'): #print(len(str1)) if len(str1)==40: print(int(str1[0:2],16),int(str1[12:14],16),int(str1[18:20]+''+str1[16:18],16),int(str1[26:28]+''+str1[24:26],16),int(str1[34:36]+''+str1[32:34],16)) #print(str1) self.img_test2 = cv2.resize(depth_colormap, (320, 240)) #images1 = np.hstack((color_image,depth_image)) ##################################################################### self.im0 = color_image.copy() self.cameraImg= color_image.copy() #cv2.imshow(self.state.WIN_NAME, self.out) #cv2.imshow(self.state.WIN_NAME, color_image) self.detectionObjectFunction() cv2.waitKey(5) #time.sleep(0.2) pass def detectionObjectFunction(self): #print("check") img = letterbox(self.cameraImg, new_shape=self.img_size)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) #################################################### img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.augment)[0] #print('thres:%d '%self.conf_thres) # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres) #def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()): t2 = time_synchronized() # Apply Classifier # Process detections for i, det in enumerate(pred): # detections per image # batch_size >= 1 #if webcam: # p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count #else: # p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) # #p = Path(p) # to Path #save_path = str(save_dir / p.name) # img.jpg #txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt #s += '%gx%g ' % img.shape[2:] # print string # normalization gain whwh #gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], self.im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class #s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *rect1, conf, cls in reversed(det): if self.names[int(cls)] !='person': continue print(self.names[int(cls)]) c1, c2 = (int(rect1[0]), int(rect1[1])), (int(rect1[2]), int(rect1[3])) label = f'{self.names[int(cls)]} {conf:.2f}' print(c1,' ',c2) plot_one_box(rect1, self.im0, label=label, color=self.colors[int(cls)], line_thickness=2) # Print time (inference + NMS) print(f'detection time. ({t2 - t1:.3f}s)') # Stream results #if view_img: cv2.imshow('win2', self.im0) #self.img2 = self.im0.copy() #self.checkImgShow() #################################################### pass dlg = listDlg('')
detection time. (0.348s)
1 20 1973 690 1017
person
(51, 6) (64, 49)
person
(308, 343) (527, 480)
detection time. (0.330s)
1 20 2250 563 485
person
(311, 339) (525, 480)
detection time. (0.322s)
1 20 2280 574 396
person
(315, 337) (526, 480)
detection time. (0.324s)
person
(316, 337) (532, 480)
detection time. (0.309s)
person
(50, 6) (64, 50)
person
(317, 337) (529, 480)
detection time. (0.329s)
1 20 2296 576 298
person
(51, 5) (64, 49)
person
(318, 337) (533, 480)
detection time. (0.329s)
1 20 2281 591 277
person
(317, 336) (529, 480)
detection time. (0.332s)
1 20 2283 586 258
person
(318, 336) (532, 480)
detection time. (0.363s)
1 20 2290 567 250
person
(51, 7) (63, 48)
person
(314, 337) (521, 480)
detection time. (0.315s)
1 20 2426 475 354
person
(52, 6) (65, 48)
person
(316, 337) (525, 480)
detection time. (0.360s)
1 20 2361 505 299
person
(51, 7) (64, 49)
person
(316, 336) (529, 480)
detection time. (0.335s)
1 20 2338 521 269
person
(51, 6) (64, 48)
person
(320, 338) (539, 480)
detection time. (0.392s)
1 20 2338 520 248
person
(51, 6) (64, 49)
person
(323, 340) (545, 480)
detection time. (0.358s)
1 20 2310 513 232
person
(50, 5) (64, 49)
person
(316, 344) (547, 480)
detection time. (0.362s)
1 20 2281 478 206
person
(351, 358) (531, 480)
detection time. (0.405s)
1 20 2259 531 205
person
(50, 6) (64, 48)
person
(377, 370) (538, 480)
detection time. (0.324s)
1 20 2420 391 301
person
(359, 371) (522, 480)