zoukankan      html  css  js  c++  java
  • 0_Simple__cppIntegration

    ▶ 分离编译【留坑,在 Linux 上用命令行试一下】

    ▶ 源代码:

     1 // cppIntegration_gold.cpp
     2 #include <vector_types.h>
     3 
     4 extern "C" void computeGold(char *reference, char *idata, const unsigned int len);
     5 extern "C" void computeGold2(int2 *reference, int2 *idata, const unsigned int len);
     6 
     7 void computeGold(char *reference, char *idata, const unsigned int len)
     8 {
     9     for (unsigned int i = 0; i < len; ++i)
    10         reference[i] = idata[i] - 10;
    11 }
    12 
    13 void computeGold2(int2 *reference, int2 *idata, const unsigned int len)
    14 {
    15     for (unsigned int i = 0; i < len; ++i)
    16     {
    17         reference[i].x = idata[i].x - idata[i].y;
    18         reference[i].y = idata[i].y;
    19     }
    20 }
     1 // cppIntegration.cu
     2 #include <stdlib.h>
     3 #include <stdio.h>
     4 #include <string.h>
     5 #include <math.h>
     6 #include <assert.h>
     7 #include <cuda_runtime.h>
     8 #include <helper_cuda.h>
     9 #include <helper_functions.h>
    10 
    11 #ifndef MAX
    12 #define MAX(a,b) (a > b ? a : b)
    13 #endif
    14 
    15 extern "C" void computeGold(char *reference, char *idata, const unsigned int len);
    16 extern "C" void computeGold2(int2 *reference, int2 *idata, const unsigned int len);
    17 
    18 // GPU上的运算
    19 __global__ void kernel(int *g_data)
    20 {
    21     const unsigned int tid = threadIdx.x;
    22     int data = g_data[tid];
    23     // data 每个字节的值减去 10,再拼接到一起
    24     g_data[tid] = ((((data <<  0) >> 24) - 10) << 24) | ((((data <<  8) >> 24) - 10) << 16) | ((((data << 16) >> 24) - 10) <<  8) | ((((data << 24) >> 24) - 10) <<  0) ;
    25 }
    26 
    27 __global__ void kernel2(int2 *g_data)       // 使用 int2* 格式的输入
    28 {
    29     const unsigned int tid = threadIdx.x;
    30     int2 data = g_data[tid];    
    31     g_data[tid].x = data.x - data.y;        // data.x 中每个元素减去 data.y 中对应元素的偏移量
    32 }
    33 
    34 // 测试不同的核函数处理的结果。输入两种格式的待处理数据,及其长度
    35 extern "C" bool runTest(char *data, int2 *data_int2, unsigned int len)
    36 {   
    37     assert((len % 4) == 0);                                                     // 要求数组长度为 4 的倍数
    38     const unsigned int num_threads = len / 4, mem_size = sizeof(char) * len, mem_size_int2 = sizeof(int2) * len;
    39 
    40     char *d_data;
    41     cudaMalloc((void **)&d_data, mem_size);
    42     cudaMemcpy(d_data, data, mem_size, cudaMemcpyHostToDevice);
    43     int2 *d_data_int2;
    44     cudaMalloc((void **)&d_data_int2, mem_size_int2);
    45     cudaMemcpy(d_data_int2, data_int2, mem_size_int2, cudaMemcpyHostToDevice);
    46     
    47     kernel << < dim3(1, 1, 1), dim3(num_threads, 1, 1) >> > ((int *)d_data);
    48     kernel2 << < dim3(1, 1, 1), dim3(len, 1, 1) >> > (d_data_int2);
    49 
    50     getLastCudaError("Kernel execution failed");                                // 检查和函数运行是否有错误,有错则输出这话
    51 
    52     char *reference = (char *)malloc(mem_size);                                 // 使用 CPU 计算
    53     computeGold(reference, data, len);
    54     printf("ref char*:%s
    ", reference);
    55     int2 *reference2 = (int2 *)malloc(mem_size_int2);
    56     computeGold2(reference2, data_int2, len);
    57     printf("ref int2 :");
    58     for (int i = 0; i < len;i++)
    59         printf("%c", reference2[i].x);
    60     printf("
    ");
    61 
    62     cudaMemcpy(data, d_data, mem_size, cudaMemcpyDeviceToHost);    
    63     cudaMemcpy(data_int2, d_data_int2, mem_size_int2, cudaMemcpyDeviceToHost);
    64     cudaDeviceSynchronize();
    65     printf("gpu char*:%s
    ", (char *)data);
    66     printf("gpu int2 :");
    67     for (int i = 0; i < len; i++)        
    68         printf("%c", data_int2[i].x);
    69     printf("
    ");
    70     
    71     cudaFree(d_data);
    72     cudaFree(d_data_int2);
    73     free(reference);
    74     free(reference2);
    75     return 0;
    76 }
     1 // main.cpp
     2 #include <iostream>
     3 #include <cstdlib>
     4 #include <cuda_runtime.h>
     5 #include <vector_types.h>
     6 #include <helper_cuda.h>
     7 
     8 extern "C" bool runTest(char *data, int2 *data_int2, unsigned int len);
     9 
    10 int main()
    11 {
    12     const int len = 16;
    13     int2 i2[16];                    // cuda 内置的 int2 类型
    14     char str[len] = { 82, 111, 118, 118,121, 42, 97, 121, 124, 118, 110, 56, 10, 10, 10,  10};    
    15     for (int i = 0; i < len; i++)
    16     {
    17         i2[i].x = str[i];
    18         i2[i].y = 10;
    19     }    
    20     runTest(str, i2, len);
    21 
    22     getchar();
    23     return 0;
    24 }

    ● 输出结果:

    ref char*: Hello World.
    ref int2 :Hello World.
    gpu char*: Hello World.
    gpu int2 :Hello World.

    ▶ 涨姿势:

    ● cuda 内置的 int2 类型,整数有序对。涉及的定义如下:

    1 #define __cuda_builtin_vector_align8(tag, members)  
    2     struct __device_builtin__ __align__(8) tag      
    3     {                                               
    4         members                                     
    5     }
    6 
    7 __cuda_builtin_vector_align8(int2, int x; int y;);
    8 
    9 typedef __device_builtin__ struct int2 int2;

    ● 警告函数和错误检查函数

     1 #define assert(expression) (void)                                                                   
     2 (                                                                                                   
     3 (!!(expression)) || (_wassert(_CRT_WIDE(#expression), _CRT_WIDE(__FILE__), (unsigned)(__LINE__)), 0)
     4 )
     5 
     6 #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
     7 
     8 inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
     9 {
    10     cudaError_t err = cudaGetLastError();
    11     if (cudaSuccess != err)
    12     {
    13         fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.
    ",
    14             file, line, errorMessage, (int)err, cudaGetErrorString(err));
    15         DEVICE_RESET
    16         exit(EXIT_FAILURE);
    17     }
    18 }
  • 相关阅读:
    https://archive.ics.uci.edu/ml/datasets.php
    实战教程 :使用Python和keras进行文本分类(上)(重要)
    洛谷 P1073 最优贸易
    P2278 [HNOI2003]操作系统
    洛谷P2024 食物链
    模板
    我的博客
    mysql一条sql把表中的男改为女,女改为男
    树的度和结点数的关系
    Spider实例详解
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7742549.html
Copyright © 2011-2022 走看看