#include <fstream> #include <sstream> #include <iostream> #include <string.h> #include <opencv2/dnn.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/highgui.hpp> using namespace cv; using namespace dnn; using namespace std; // Initialize the parameters float confThreshold = 0.5; // Confidence threshold float maskThreshold = 0.3; // Mask threshold vector<string> classes; vector<Scalar> colors; // Draw the predicted bounding box void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask); // Postprocess the neural network's output for each frame void postprocess(Mat& frame, const vector<Mat>& outs); int main() { // Load names of classes string classesFile = "./mask_rcnn_inception_v2_coco_2018_01_28/mscoco_labels.names"; ifstream ifs(classesFile.c_str()); string line; while (getline(ifs, line)) classes.push_back(line); // Load the colors string colorsFile = "./mask_rcnn_inception_v2_coco_2018_01_28/colors.txt"; ifstream colorFptr(colorsFile.c_str()); while (getline(colorFptr, line)) { char* pEnd; double r, g, b; r = strtod(line.c_str(), &pEnd); g = strtod(pEnd, NULL); b = strtod(pEnd, NULL); Scalar color = Scalar(r, g, b, 255.0); colors.push_back(Scalar(r, g, b, 255.0)); } // Give the configuration and weight files for the model String textGraph = "./mask_rcnn_inception_v2_coco_2018_01_28/mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"; String modelWeights = "./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb"; // Load the network Net net = readNetFromTensorflow(modelWeights, textGraph); net.setPreferableBackend(DNN_BACKEND_OPENCV); net.setPreferableTarget(DNN_TARGET_CPU); // Open a video file or an image file or a camera stream. string str, outputFile; VideoCapture cap(0);//根据摄像头端口id不同,修改下即可 //VideoWriter video; Mat frame, blob; // Create a window static const string kWinName = "Deep learning object detection in OpenCV"; namedWindow(kWinName, WINDOW_NORMAL); // Process frames. while (waitKey(1) < 0) { // get frame from the video cap >> frame; // Stop the program if reached end of video if (frame.empty()) { cout << "Done processing !!!" << endl; cout << "Output file is stored as " << outputFile << endl; waitKey(3000); break; } // Create a 4D blob from a frame. blobFromImage(frame, blob, 1.0, Size(frame.cols, frame.rows), Scalar(), true, false); //blobFromImage(frame, blob); //Sets the input to the network net.setInput(blob); // Runs the forward pass to get output from the output layers std::vector<String> outNames(2); outNames[0] = "detection_out_final"; outNames[1] = "detection_masks"; vector<Mat> outs; net.forward(outs, outNames); // Extract the bounding box and mask for each of the detected objects postprocess(frame, outs); // Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes) vector<double> layersTimes; double freq = getTickFrequency() / 1000; double t = net.getPerfProfile(layersTimes) / freq; string label = format("Mask-RCNN on 2.5 GHz Intel Core i7 CPU, Inference time for a frame : %0.0f ms", t); putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0)); // Write the frame with the detection boxes Mat detectedFrame; frame.convertTo(detectedFrame, CV_8U); imshow(kWinName, frame); } cap.release(); return 0; } // For each frame, extract the bounding box and mask for each detected object void postprocess(Mat& frame, const vector<Mat>& outs) { Mat outDetections = outs[0]; Mat outMasks = outs[1]; // Output size of masks is NxCxHxW where // N - number of detected boxes // C - number of classes (excluding background) // HxW - segmentation shape const int numDetections = outDetections.size[2]; const int numClasses = outMasks.size[1]; outDetections = outDetections.reshape(1, outDetections.total() / 7); for (int i = 0; i < numDetections; ++i) { float score = outDetections.at<float>(i, 2); if (score > confThreshold) { // Extract the bounding box int classId = static_cast<int>(outDetections.at<float>(i, 1)); int left = static_cast<int>(frame.cols * outDetections.at<float>(i, 3)); int top = static_cast<int>(frame.rows * outDetections.at<float>(i, 4)); int right = static_cast<int>(frame.cols * outDetections.at<float>(i, 5)); int bottom = static_cast<int>(frame.rows * outDetections.at<float>(i, 6)); left = max(0, min(left, frame.cols - 1)); top = max(0, min(top, frame.rows - 1)); right = max(0, min(right, frame.cols - 1)); bottom = max(0, min(bottom, frame.rows - 1)); Rect box = Rect(left, top, right - left + 1, bottom - top + 1); // Extract the mask for the object Mat objectMask(outMasks.size[2], outMasks.size[3], CV_32F, outMasks.ptr<float>(i, classId)); // Draw bounding box, colorize and show the mask on the image drawBox(frame, classId, score, box, objectMask); } } } // Draw the predicted bounding box, colorize and show the mask on the image void drawBox(Mat& frame, int classId, float conf, Rect box, Mat& objectMask) { //Draw a rectangle displaying the bounding box rectangle(frame, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), Scalar(255, 178, 50), 3); //Get the label for the class name and its confidence string label = format("%.2f", conf); if (!classes.empty()) { CV_Assert(classId < (int)classes.size()); label = classes[classId] + ":" + label; } //Display the label at the top of the bounding box int baseLine; Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); box.y = max(box.y, labelSize.height); rectangle(frame, Point(box.x, box.y - round(1.5*labelSize.height)), Point(box.x + round(1.5*labelSize.width), box.y + baseLine), Scalar(255, 255, 255), FILLED); putText(frame, label, Point(box.x, box.y), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1); Scalar color = colors[classId%colors.size()]; // Resize the mask, threshold, color and apply it on the image resize(objectMask, objectMask, Size(box.width, box.height)); Mat mask = (objectMask > maskThreshold); Mat coloredRoi = (0.3 * color + 0.7 * frame(box)); coloredRoi.convertTo(coloredRoi, CV_8UC3); // Draw the contours on the image vector<Mat> contours; Mat hierarchy; mask.convertTo(mask, CV_8U); findContours(mask, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_SIMPLE); drawContours(coloredRoi, contours, -1, color, 5, LINE_8, hierarchy, 100); coloredRoi.copyTo(frame(box), mask); }
https://github.com/spmallick/learnopencv/tree/master/Mask-RCNN
https://www.learnopencv.com/deep-learning-based-object-detection-and-instance-segmentation-using-mask-r-cnn-in-opencv-python-c/
line: (303,392) (313,104) (0.0532837,-0.178375,0.288),(0,0,0)
0.0930815
----------------------------------------------------
line: (304,393) (465,390) (0.0540282,-0.178375,0.288),(0,0,0)
0.0931887
----------------------------------------------------
line: (302,391) (314,34) (0.0525392,-0.178375,0.288),(-1.27422,-1.06591,1.721)
0.798125
----------------------------------------------------
line: (307,189) (312,34) (-0.096823,-0.176517,0.285),(-1.27422,-1.06591,1.721)
0.737782
----------------------------------------------------
line: (304,392) (512,385) (0.0532837,-0.178375,0.288),(0,0,0)
0.0930815
----------------------------------------------------
line: (307,395) (410,393) (0,0,0),(0.0540282,-0.178375,0.288)
0.0931887
----------------------------------------------------
line: (305,278) (313,34) (0,0,0),(-1.27422,-1.06591,1.721)
0.830633
----------------------------------------------------
line: (271,377) (279,130) (0.0421168,-0.178375,0.288),(0,0,0)
0.0916397
----------------------------------------------------
line: (272,378) (278,192) (0.0428612,-0.178375,0.288),(0,0,0)
0.0917259
----------------------------------------------------
line: (63,361) (275,354) (0.0301005,-0.177755,0.287),(0.0249073,-0.177755,0.287)
0.00259657
----------------------------------------------------
line: (308,38) (477,62) (-1.263,-1.07149,1.73),(0,0,0)
0.828137
----------------------------------------------------
line: (305,391) (311,198) (0.0525392,-0.178375,0.288),(-0.0901926,-0.176517,0.285)
0.071372
----------------------------------------------------
line: (307,394) (445,391) (0,0,0),(0.0525392,-0.178375,0.288)
0.0929756
----------------------------------------------------
line: (273,379) (276,292) (0.0436057,-0.178375,0.288),(-0.0210156,-0.177136,0.286)
0.0323166
----------------------------------------------------
line: (401,385) (512,389) (0.0480725,-0.178375,0.288),(0,0,0)
0.0923694
----------------------------------------------------
line: (315,35) (402,49) (-1.27346,-1.06901,1.726),(0,0,0)
0.831336
----------------------------------------------------
line: (418,384) (514,382) (0.047328,-0.178375,0.288),(0,0,0)
0.0922733
----------------------------------------------------
line: (176,359) (271,356) (0.0286167,-0.177755,0.287),(0,0,0)
0.090022
----------------------------------------------------
line: (385,386) (511,390) (0.0488169,-0.178375,0.288),(0,0,0)
0.092467
----------------------------------------------------
line: (67,359) (181,355) (0.0286167,-0.177755,0.287),(0,0,0)
0.090022
----------------------------------------------------
line: (311,37) (425,53) (-1.26747,-1.07149,1.73),(0,0,0)
0.829843
----------------------------------------------------
line: (275,131) (278,69) (-0.138573,-0.175278,0.283),(0,0,0)
0.111719
----------------------------------------------------
line: (66,360) (241,354) (0.0293586,-0.177755,0.287),(0.0249073,-0.177755,0.287)
0.00222563
----------------------------------------------------
line: (118,360) (275,355) (0.0293586,-0.177755,0.287),(0,0,0)
0.0900817
----------------------------------------------------
line: (441,383) (510,382) (0.0465835,-0.178375,0.288),(0,0,0)
0.0921785
----------------------------------------------------
line: (309,92) (310,37) (0,0,0),(-1.26747,-1.07149,1.73)
0.829843
----------------------------------------------------
line: (306,391) (308,321) (0.0525392,-0.178375,0.288),(0.000425389,-0.177755,0.287)
0.0260588
----------------------------------------------------
line: (274,248) (278,154) (0,0,0),(-0.121747,-0.175278,0.283)
0.106706
----------------------------------------------------
line: (454,384) (511,386) (0.047328,-0.178375,0.288),(0,0,0)
0.0922733
----------------------------------------------------
line: (362,387) (440,389) (0.0495614,-0.178375,0.288),(0,0,0)
0.092566
----------------------------------------------------
line: (221,359) (271,357) (0.0286167,-0.177755,0.287),(0,0,0)
0.090022
----------------------------------------------------
line: (344,390) (396,393) (0,0,0),(0.0540282,-0.178375,0.288)
0.0931887
----------------------------------------------------
line: (314,393) (376,395) (0.0540282,-0.178375,0.288),(0,0,0)
0.0931887
----------------------------------------------------
line: (115,355) (180,354) (0,0,0),(0.0249073,-0.177755,0.287)
0.0897459
----------------------------------------------------
line: (296,388) (358,391) (0,0,0),(0.0525392,-0.178375,0.288)
0.0929756
----------------------------------------------------
line: (449,329) (514,378) (0.00638257,-0.178375,0.288),(0,0,0)
0.0892444
常用模型下载地址:https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md