zoukankan      html  css  js  c++  java
  • openmp 和 thread 性能实测

    #include <stdio.h>
    #include <iostream>
    #include <thread>
    #include<omp.h>
    //#include <opencv2/core.hpp>
    //#include <cv/cv_tools.h>
    #include <opencv2/highgui/highgui.hpp>
    #include <opencv2/imgproc/imgproc.hpp>
    #include <opencv2/core/version.hpp>
    using namespace std;
    using namespace cv;
    
    #define TEST_BY_OPENMP
    
    void childFunc(Mat& pic, Mat &dst)
    {
        int w = pic.cols;
        int h = pic.rows;
        //do{
        int sz = 3 * w*h;
        for (int i = 0; i != sz; ++i) {
            //int src_data = pic.data[i];
            //float ss= 1.234556;
            //ss*=1.2;
            //src_data+=ss;
            //dst.data[i] = ((src_data >>5)<<5);
            dst.data[i] = ((pic.data[i] >> 5) << 5);
        }
        //}while(0);
    }
    
    
    vector<Mat> split(Mat& _src, int n) 
    {
        int sz = _src.cols*_src.rows*3;
        vector<Mat> pics(n);
        for (int i = 0; i != n; ++i) {
            pics[i] = Mat(_src.rows/n,_src.cols,CV_8UC3, _src.data + i*sz/n );
        }
        return pics;
    }
    int main()
    {
        const int LOOPS = 1e10;
        Mat src = imread("bg.jpg");
        resize(src,src,Size(3840,2160));
        Mat dst(src.size(),src.type());
        vector<Mat> child_pics   = split(src, 4);
        vector<Mat> child_pics_d = split(dst, 4);
    
    
    
        long long t0 = getTickCount();
    
        for(int k=0;k!=LOOPS;++k){
    #ifdef TEST_BY_OPENMP
        #pragma omp parallel num_threads(4)
        {
            int index = omp_get_thread_num();
            childFunc(child_pics[index], child_pics_d[index]);
        }//7.3 ms   cpu=60%
    
        for (int i = 0; i != 4; ++i) {
            childFunc(child_pics[i], child_pics_d[i]);
        }//6.97ms  cpu=45%
    #else
            thread t[4];
            for (int i = 0; i != 4; ++i) {
                t[i] = thread(childFunc, child_pics[i], child_pics_d[i]);
            }
            for (int i = 0; i != 4; ++i) {
                t[i].join();
            }//21.32ms  cpu=55%
    #endif
    
        };
    
        long long t1 = getTickCount();
        double time_waste = double(t1 - t0) / getTickFrequency();
        time_waste /= LOOPS;
        printf("time waste=%.2f ms
    ", time_waste * 1000);
        imshow("test", dst);
        waitKey(0);
    }

    发现好像确实不能使用thread   分片处理图像数据   处理时间太慢   不知道为什么....可能是数据竞争的原因吧。。

    openmp速度更快一些

  • 相关阅读:
    jsonp 跨域解决 spring
    goEasy消息推送,pushlet 向特写用户实时推送
    支持中文加密解密
    webservice 小小例子
    DES加密
    线程、异步
    mysql数据库导出模型到powerdesigner,PDM图形窗口中显示数据列的中文注释
    Log4j按级别输出到不同文件
    niginx 负载均衡
    node-webkit安装及简单实现
  • 原文地址:https://www.cnblogs.com/luoyinjie/p/11390517.html
Copyright © 2011-2022 走看看