from data.dataloader_detection import ListDataset from data.collate_function import collate_function from config.config import load_config, cfg from net.resnet import ResNet, ResnetBasic,ResnetBasicSlim from utils.nms_np_simple import nms from data.dataloader_test_detection import ListDataset as testListDataset from data.resize_uniform import resizeUniform from dataY.yolov1_dataY import DataY from net.yolov1 import YOLOv1 import numpy as np import cv2 from loss.yololoss import yoloLoss from dataY.yolov1_dataY import DataY import time import torch def plotGride(img, grideHW=(7,7), stride=64): # plot gride h,w,_ = img.shape for li in range(grideHW[1]): cv2.line(img, (int(li *stride), 0), (int(li * stride), int(w)), (0, 255, 0), 1) for li in range(grideHW[0]): cv2.line(img, (0, int(li * stride)), (int(h), int(li * stride)), (0, 255, 0), 1) return img def plotBox(img, x1, y1,x2,y2,txts): cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 1) for i in range(len(txts)): cv2.putText(img, txts[i], (int(x1), int(y1 + i*15)), 1, 1, (0, 0, 255)) cv2.circle(img, (int((x1 +x2) / 2), int((y1 + y2) / 2)), color=(0, 0, 255), radius=2, thickness=-1) return img def post(pred, dic): # target contain object mask, b, h, w, c = pred.size() alldets= []# [[一个图片的dets],[],...] for i in range(b): apred = pred[i] """根据置信度来判断哪个网络有""" coobjMask1 = apred[:,:,4] > dic["scoreThresh"] for j in range(dic["bboxPredNum"]): coobjMask1 = torch.logical_or(coobjMask1, apred[:, :, 4 + i * 5] > dic["scoreThresh"]) """每个cell 选择最大置信度的哪个""" confPred = apred[:, :, 4:5 * dic["bboxPredNum"]:5] _, maxIndex = torch.max(confPred, dim=-1, keepdim=True) h_, w_, c_ = apred.size() oneHot = torch.zeros(h_, w_, dic["bboxPredNum"]).to("cuda:0").scatter_(-1, maxIndex, 1).type( torch.bool) """最终选择""" coobjMask = torch.logical_and(oneHot, torch.unsqueeze(coobjMask1, -1)) chioceIndex = torch.nonzero(coobjMask, as_tuple=False) chioceIndex = chioceIndex.to('cpu').numpy() dets = [] for k in range(chioceIndex.shape[0]): hid, wid, boxid = chioceIndex[k][0], chioceIndex[k][1], chioceIndex[k][2] fullVector = apred[hid][wid].to("cpu").numpy() clsVec = fullVector[-dic["clsNum"]:] boxVector = fullVector[boxid * 5: (boxid + 1) * 5] deltax, deltay, w, h, score = boxVector[0], boxVector[1], boxVector[2], boxVector[3], boxVector[4] cy = (hid + deltay) * dic["stride"] cx = (wid + deltax) * dic["stride"] w = w * w * dic["netInputHw"][1] h = h * h * dic["netInputHw"][0] c = clsVec.argmax() dets.append([cx, cy, w, h, score, c]) dets = np.array(dets) dets = dets.reshape(-1, 6) dets[:, :2] -= dets[:, 2:4] / 2 dets[:, 2:4] += dets[:, :2] #boarder in the image dets[:,:2] = np.where(dets[:, :2] < 0, 0, dets[:,:2] ) dets[:, 2] = np.where(dets[:, 2] > dic["netInputHw"][1], dic["netInputHw"][1], dets[:, 2]) dets[:, 3] = np.where(dets[:, 3] > dic["netInputHw"][0], dic["netInputHw"][0], dets[:, 3]) dets = nms(dets, dic["iouThresh"]) alldets.append(dets) return alldets if __name__ == '__main__': """config""" load_config(cfg, "./config/config.yaml") print(cfg) device = torch.device('cuda:0') batchsize = 2 showFlag = 1 saveFlag = 0 saveDir = "" mode = 1 # 如果是test mode 没有label 信息, 只能是预测 # cam Mode 调用摄像头 # val model这个有label信息,在show的时候会展示label modeDict = {0:"testMode", 1:"valMode", 2:"camMode"} postDict = {"scoreThresh": 0.3, "iouThresh": 0.3, "netInputHw":(448,448), "bboxPredNum": cfg.model.bboxPredNum, "clsNum":cfg.model.clsNum, "stride":cfg.model.stride, } """dataset""" if modeDict[mode] == "testMode": dataset = testListDataset( imgPath = cfg.dir.testImgDir, # images root / netInputSizehw = cfg.model.netInput, imgChannelNumber=cfg.model.imgChannelNumber, clsname= cfg.clsname ) if modeDict[mode] == "valMode": dataset = ListDataset(trainAnnoPath=cfg.dir.valAnnoDir, trainImgPath=cfg.dir.valImgDir, netInputSizehw=cfg.model.netInput, augFlag=False, clsname = cfg.clsname, imgChannelNumber=cfg.model.imgChannelNumber) if modeDict[mode] == "valMode" or modeDict[mode] == "testMode": dataLoader = torch.utils.data.DataLoader( dataset, collate_fn=collate_function, batch_size= batchsize, #cfg.train.batchSize, shuffle= False, num_workers=cfg.train.workers, pin_memory=True, # 如果机器计算能力好的话,就可以设置为True, ) datay = DataY(inputHW=cfg.model.netInput, # 指定了inputsize 这是因为输入的是经过resize后的图片 gride=cfg.model.featSize, # 将网络输入成了多少个网格 stride=cfg.model.stride, boxNum=cfg.model.bboxPredNum, clsNum=cfg.model.clsNum) """准备网络""" # network = ResNet(ResnetBasic, [2, 2, 2, 2], channel_out = 15) network = ResNet(ResnetBasicSlim, [2, 2, 2, 2], # [3,4,6,3], channel_in=cfg.data.imgChannelNumber, channel_out=(cfg.model.bboxPredNum * 5 + cfg.model.clsNum)) # network = YOLOv1(params={"dropout": 0.5, "num_class": cfg.model.clsNum}) network.to(device) weights = torch.load(cfg.dir.modelSaveDir + cfg.dir.modelName) # 加载参数 network.load_state_dict(weights) # 给自己的模型加载参数 with torch.no_grad(): if modeDict[mode] == "camMode": cap = cv2.VideoCapture(0) while (1): assert cfg.model.imgChannelNumber == 3, "输入通道目前支支持3通道" ret, img = cap.read() frame = np.copy(img) if not ret: print("can not cap a picture") time.sleep(1) continue img, infos = resizeUniform(img, cfg.model.netInput) imgs = np.array([img]) imgs = torch.from_numpy(imgs.astype(np.float32)), imgs = imgs.to(device).float() mean = torch.tensor(cfg.data.normalize[0]).cuda().reshape(3, 1, 1) std = torch.tensor(cfg.data.normalize[1]).cuda().reshape(3, 1, 1) imgs = (imgs - mean) / std pred = network(imgs) pred = pred.permute(0, 2, 3, 1) # rotate the BCHW to BHWC """post""" bcDets = post(pred, postDict) dets = bcDets[0] cv2.imshow("capture", img) """plot pred""" imgp = plotGride(frame, grideHW=(cfg.model.featSize), stride=cfg.model.stride) for i in range(dets.shape[0]): x1, y1, x2, y2, score, cls = dets[i][0], dets[i][1], dets[i][2], dets[i][3], dets[i][4], dets[i][5] imgp = plotBox(imgp, x1, y1, x2, y2, ["s: " + str(round(score, 3)), "c: " + cfg.clsname[cls]]) cv2.imshow("pred", imgp) cv2.waitKey() if cv2.waitKey(1) & 0xFF == 32: break if modeDict[mode] == "valMode" or modeDict[mode] == "testMode": for id, infos in enumerate(dataLoader): #每个batch """forward and pred""" imgs = infos['images'] imgs = imgs.to(device).float() mean = torch.tensor(cfg.data.normalize[0]).cuda().reshape(3, 1, 1) std = torch.tensor(cfg.data.normalize[1]).cuda().reshape(3, 1, 1) imgs = (imgs - mean) / std pred = network(imgs) pred = pred.permute(0, 2, 3, 1) # rotate the BCHW to BHWC """post""" bcDets = post(pred, postDict) for bcid in range(batchsize): dets = bcDets[bcid] if showFlag: image = infos['images'][bcid] image = image.to('cpu').numpy() image = image.transpose(1, 2, 0).astype(np.uint8) image = cv2.UMat(image).get() imgt = np.copy(image) imgp = np.copy(image) """plot pred""" imgp = plotGride(imgp, grideHW=(cfg.model.featSize), stride=cfg.model.stride) for i in range(dets.shape[0]): x1, y1, x2, y2, score, cls = dets[i][0], dets[i][1], dets[i][2], dets[i][3], dets[i][4], dets[i][5] imgp = plotBox(imgp, x1, y1, x2, y2, [str(i)+" s: " + str(round(score, 3)), "c: " + cfg.clsname[cls]]) print("pred[%d x1:%.2f y1:%.2f w:%.2f h:%.2f score: %.2f "%(i, x1,y1,x2-x1,y2-y1,score)+ cfg.clsname[cls] +"]") cv2.imshow("pred", imgp) cv2.waitKey() if showFlag and not modeDict[mode] == "testModeFlag":# test mode 没有target """read target""" bboxesGt = infos['bboxesGt'][bcid] classesGt = infos['classes'][bcid] annoName = infos["annoName"][bcid] """plot target""" imgt = plotGride(imgt, grideHW=(cfg.model.featSize), stride=cfg.model.stride) for i in range(bboxesGt.shape[0]): x1, y1, w, h = bboxesGt[i] cls = classesGt[i] plotBox(imgt, x1,y1,x1+w,y1+h,[cfg.clsname[cls]]) print("target[x1:%.2f y1:%.2f w:%.2f h:%.2f " % (x1, y1, w, h)+ cfg.clsname[cls]+"]") print(annoName) cv2.imshow("target", imgt) cv2.waitKey() print("-"*50) # """post""" # # target contain object mask # coobjMask_ = pred[:, :, :, 4] > scoreThresh # for i in range(1, cfg.model.bboxPredNum): # coobjMask_ = torch.logical_or(coobjMask_, pred[:, :, :, 4 + i * 5] > scoreThresh) # # #选择每个anchor 点 预测置信度最大的, bbox num = 1 代码也可以 # confPred = pred[:, :, :, 4:5 * cfg.model.bboxPredNum:5] # _, maxIndex = torch.max(confPred, dim=-1, keepdim=True) # b_, h_, w_, c_ = pred.size() # oneHot = torch.zeros(b_, h_, w_, cfg.model.bboxPredNum).to("cuda:0").scatter_(-1, maxIndex, 1).type( # torch.bool) # # coobjMask = torch.logical_and(oneHot, torch.unsqueeze(coobjMask_, -1)) # chioceIndex = torch.nonzero(coobjMask, as_tuple=False) # # # """ plot pred """ # chioceIndex = chioceIndex.to('cpu').numpy() # dets = [] # imgp = np.copy(image) # for i in range(chioceIndex.shape[0]): # imId, hid, wid, boxid = chioceIndex[i][0], chioceIndex[i][1], chioceIndex[i][2], chioceIndex[i][3] # # fullVector = pred[imId][hid][wid].to("cpu").numpy() # clsVec = fullVector[-cfg.model.clsNum:] # boxVector = fullVector[boxid * 5: (boxid + 1) * 5] # deltax, deltay, w, h, score = boxVector[0], boxVector[1], boxVector[2], boxVector[3], boxVector[4] # # cy = (hid + deltay) * cfg.model.stride # cx = (wid + deltax) * cfg.model.stride # w = w*w* cfg.model.netInput[1] # h = h*h * cfg.model.netInput[0] # c = clsVec.argmax() # dets.append([cx, cy, w, h, score, c]) # dets = np.array(dets) # dets = dets.reshape(-1, 6) # dets[:, :2] -= dets[:, 2:4] / 2 # dets[:, 2:4] += dets[:, :2] # # dets = nms(dets, iouThresh) if saveFlag and modeDict[mode] == "valModeFlag": np.savetxt(saveDir + infos["annoName"][bcid],dets) if saveFlag and modeDict[mode] == "testModeFlag": np.savetxt(saveDir + infos["imgName"][bcid].split["."][0]+".txt",dets)