zoukankan      html  css  js  c++  java
  • cuda实践4

    这里对 上一篇 ---cuda实践3 中的代码进行修改,在block中对share memory 进行迭代。

    template <int BLOCK_SIZE> __global__ void caculateShelter_cuda(
        float *uv_triangulation_0,
        float *uv_triangulation,
        float *w_triangulation,
        float *w_triangulation_center,
        float *position_panorama_vect,
        int triangule_num_d, int panorama_num_d, int imgHeight_d, int imgWidth_d, float dis_threshold, int*inside_ptr_d,  int* result)
    {
        int distance_threshold = dis_threshold;
        int times = triangule_num_d;
        int num = imgHeight_d * imgWidth_d;
    
        int x = threadIdx.x;
        int y = threadIdx.y;
        int Row = blockIdx.y * BLOCK_SIZE + threadIdx.y;
        int Col = blockIdx.x * BLOCK_SIZE + threadIdx.x;
        //int blockId = blockIdx.x + blockIdx.y * gridDim.x;
        //int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
        float searchPoint[2];
        searchPoint[0] = Col * 1.0 / imgWidth_d;
        searchPoint[1] = (imgHeight_d - Row) * 1.0 / imgHeight_d;
    
        int threadId = Row * imgWidth_d + Col;
    
        int inside = -1;
        int index_temp = -1;
        int step = BLOCK_SIZE * BLOCK_SIZE;
        int grid_size = (times / step) + 1;
    
        int threadid_0 = BLOCK_SIZE * y + x;
        int tempx = threadid_0 % 32;
        int tempy = threadid_0 / 32;
        for (int t = 0; t < grid_size; t++)
        {        
            ////////////////////////////////////////share memory
            __shared__ float tile_uv[BLOCK_SIZE*BLOCK_SIZE * 6];
            if ((t*step + BLOCK_SIZE * y + x) < times)
            {
    
                tile_uv[32 * tempy * 6 + tempx] = uv_triangulation_0[t*step + BLOCK_SIZE * y + x];    //注意越域
                tile_uv[32 * tempy * 6 + 1 * 32 + tempx] = uv_triangulation_0[1 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[32 * tempy * 6 + 2 * 32 + tempx] = uv_triangulation_0[2 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[32 * tempy * 6 + 3 * 32 + tempx] = uv_triangulation_0[3 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[32 * tempy * 6 + 4 * 32 + tempx] = uv_triangulation_0[4 * times + t * step + BLOCK_SIZE * y + x];
                tile_uv[32 * tempy * 6 + 5 * 32 + tempx] = uv_triangulation_0[5 * times + t * step + BLOCK_SIZE * y + x];
            }
            else
            {
                tile_uv[32 * tempy * 6 + tempx] = 0.0;    //注意越域
                tile_uv[32 * tempy * 6 + 1 * 32 + tempx] = 0.0;
                tile_uv[32 * tempy * 6 + 2 * 32 + tempx] = 0.0;
                tile_uv[32 * tempy * 6 + 3 * 32 + tempx] = 0.0;
                tile_uv[32 * tempy * 6 + 4 * 32 + tempx] = 0.0;
                tile_uv[32 * tempy * 6 + 5 * 32 + tempx] = 0.0;
            }
            __syncthreads();
    
            int v = 0;
            while (v < BLOCK_SIZE*BLOCK_SIZE)
            {
                float A[2], B[2], C[2];
    
                int threadid_1 = (threadid_0 + v) % (BLOCK_SIZE*BLOCK_SIZE);
                int tempx2 = threadid_1 % 32;
                int tempy2 = threadid_1 / 32;
    
                A[0] = tile_uv[32 * tempy2 * 6 + tempx2];
                A[1] = tile_uv[32 * tempy2 * 6 + 1 * 32 + tempx2];
                B[0] = tile_uv[32 * tempy2 * 6 + 2 * 32 + tempx2];
                B[1] = tile_uv[32 * tempy2 * 6 + 3 * 32 + tempx2];
                C[0] = tile_uv[32 * tempy2 * 6 + 4 * 32 + tempx2];
                C[1] = tile_uv[32 * tempy2 * 6 + 5 * 32 + tempx2];
    
                if ((t*step + tempy2*32 + tempx2) >= times)
                {
                    break;
                }
    
                bool inornot = pointInTriangle_cuda(A, B, C, searchPoint);
                if (inornot && inside == -1 && inside == -1)   //inside /on 
                {
                    index_temp = t * step + tempy2 * 32 + tempx2;
                    inside = 0;
                    break;
                }
    
                v++;
            }
            __syncthreads();
    
            //////////////////////////////////////////////global memory
            //float A[2], B[2], C[2];
            //A[0] = uv_triangulation_0[t];
            //A[1] = uv_triangulation_0[1 * times + t];
            //B[0] = uv_triangulation_0[2 * times + t];
            //B[1] = uv_triangulation_0[3 * times + t];
            //C[0] = uv_triangulation_0[4 * times + t];
            //C[1] = uv_triangulation_0[5 * times + t];
            //if (pointInTriangle_cuda(A, B, C, searchPoint))   //inside /on 
            //{
            //    index_temp = t;
            //    inside = 0;
            //    break;
            //}
        }
    
        inside_ptr_d[2 * threadId] = inside;
        inside_ptr_d[2 * threadId + 1] = index_temp;
    
        if (inside == 0 && threadId < num)
        {
            int tr_index = index_temp;
            float pt3d[3];
            float uv_triangulation_temp[6];
    
            uv_triangulation_temp[0] = uv_triangulation[6 * tr_index];
            uv_triangulation_temp[1] = uv_triangulation[6 * tr_index + 1];
            uv_triangulation_temp[2] = uv_triangulation[6 * tr_index + 2];
            uv_triangulation_temp[3] = uv_triangulation[6 * tr_index + 3];
            uv_triangulation_temp[4] = uv_triangulation[6 * tr_index + 4];
            uv_triangulation_temp[5] = uv_triangulation[6 * tr_index + 5];
    
            float w_triangulation_temp[9];
            w_triangulation_temp[0] = w_triangulation[9 * tr_index];
            w_triangulation_temp[1] = w_triangulation[9 * tr_index + 1];
            w_triangulation_temp[2] = w_triangulation[9 * tr_index + 2];
            w_triangulation_temp[3] = w_triangulation[9 * tr_index + 3];
            w_triangulation_temp[4] = w_triangulation[9 * tr_index + 4];
            w_triangulation_temp[5] = w_triangulation[9 * tr_index + 5];
            w_triangulation_temp[6] = w_triangulation[9 * tr_index + 6];
            w_triangulation_temp[7] = w_triangulation[9 * tr_index + 7];
            w_triangulation_temp[8] = w_triangulation[9 * tr_index + 8];
    
            caculateMappingTriangle2dTo3d(uv_triangulation_temp, searchPoint, w_triangulation_temp, pt3d); // rewrite in cuda
    
            int not_in_shelter_num = 0;
            //Shelter check
            for (int m = 0; m < panorama_num_d; m++)
            {
                if (not_in_shelter_num >= PanoramaNUM)
                {
                    break;
                }
                // add threshold ditance (pt3d to optical_center)
                //...
                // add threshold  angle of triangle face normal and vector(pt3d to optical_center)
                //...
                float optical_center[3];
                optical_center[0] = position_panorama_vect[m * 3];
                optical_center[1] = position_panorama_vect[m * 3 +1];
                optical_center[2] = position_panorama_vect[m * 3 +2];
    
                //caculate shelter
                //caculate near triangles of the ray
                int ret0 = 0;
                float redius_ = distance3d(optical_center, pt3d);
    
                float rayoptical[3];
                rayoptical[0] = optical_center[0] - pt3d[0];
                rayoptical[1] = optical_center[1] - pt3d[1];
                rayoptical[2] = optical_center[2] - pt3d[2];
                float angle = angleOfNormalOfTriangleAndSunline(w_triangulation_temp, rayoptical);
                float temp0 = fabs(angle - 3.1415926 / 2.0);
    
                if (redius_ > distance_threshold || temp0 < 3.1415926 / 10.0)  //
                {
                    continue;
                }
    
                float mid_pt[3];
                mid_pt[0] = (optical_center[0] + pt3d[0])*0.5;
                mid_pt[1] = (optical_center[1] + pt3d[1])*0.5;
                mid_pt[2] = (optical_center[2] + pt3d[2])*0.5;
                for (int i = 0; i < times; i++)
                {
                    float triangle[9];
                    triangle[0] = w_triangulation[9 * i];
                    triangle[1] = w_triangulation[9 * i + 1];
                    triangle[2] = w_triangulation[9 * i + 2];
                    triangle[3] = w_triangulation[9 * i + 3];
                    triangle[4] = w_triangulation[9 * i + 4];
                    triangle[5] = w_triangulation[9 * i + 5];
                    triangle[6] = w_triangulation[9 * i + 6];
                    triangle[7] = w_triangulation[9 * i + 7];
                    triangle[8] = w_triangulation[9 * i + 8];
                    
                    float pt_temp[3];
                    pt_temp[0] = w_triangulation_center[3 * i];
                    pt_temp[1] = w_triangulation_center[3 * i + 1];
                    pt_temp[2] = w_triangulation_center[3 * i + 2];
    
                    float temp1 = distance3d(pt_temp, mid_pt);
                    if (temp1 > 0.36*redius_ || i == tr_index)
                    {
                        continue;
                    }
    
                    
                    //int ret1 = rayTracingShelterCaculate_cuda2(pt3d, optical_center, triangle);
                    //if (ret1 == 0)  // in shelter
                    //{
                    //    ret0 = -1;
                    //    break;
                    //}
                    float ret_dis = pt3dToLine3d(pt_temp, optical_center, pt3d);
                    if (ret_dis < 0.10)  // in shelter
                    {
                        ret0 = -1;
                        break;
                    }
                }
         
                if (ret0 == 0)
                {
                    result[threadId*PanoramaNUM + not_in_shelter_num] = m+1;    //not in shelter
                    not_in_shelter_num++;
                }
            }
        }
    
    }
  • 相关阅读:
    env文件的作用
    Ubuntu 卸载wine
    Linux配置yaf3.x.x环境
    yaf中使用Cli模式,也就是定时器
    Yaf 在Bootstrap中注册变量,然后在其他地方进行使用!
    yaf 查看配置
    yaf配置通用函数
    一个严谨的接口调用
    后台基础表
    tensorflow环境搭建
  • 原文地址:https://www.cnblogs.com/lovebay/p/13441212.html
Copyright © 2011-2022 走看看