使用CUDA的 Driver API 来计算矩阵乘法。
▶ 源代码:
1 #include <stdio.h> 2 3 #include <cuda.h> 4 #include <builtin_types.h> 5 #include <helper_cuda_drvapi.h> 6 #include <helper_timer.h> 7 #include "matrixMul.h" 8 9 #define PTX_FILE "matrixMul_kernel64.ptx" 10 #define CUBIN_FILE "matrixMul_kernel64.cubin" 11 12 const bool use_64bit_memory_address = true; 13 using namespace std; 14 15 CUdevice cuDevice; 16 CUcontext cuContext; 17 CUmodule cuModule; 18 size_t totalGlobalMem; 19 20 void constantInit(float *data, int size, float val) 21 { 22 for (int i = 0; i < size; ++i) 23 data[i] = val; 24 } 25 26 bool inline findModulePath(const char *module_file, string &module_path, char **argv, string &ptx_source) 27 { 28 char *actual_path = sdkFindFilePath(module_file, argv[0]);// 依命令行的参数 29 30 if (actual_path) 31 module_path = actual_path; 32 else 33 { 34 printf("> findModulePath file not found: <%s> ", module_file); 35 return false; 36 } 37 38 if (module_path.empty()) 39 { 40 printf("> findModulePath file not found: <%s> ", module_file); 41 return false; 42 } 43 printf("> findModulePath <%s> ", module_path.c_str()); 44 45 if (module_path.rfind(".ptx") != string::npos) 46 { 47 FILE *fp = fopen(module_path.c_str(), "rb"); 48 fseek(fp, 0, SEEK_END); 49 int file_size = ftell(fp); 50 char *buf = new char[file_size + 1]; 51 fseek(fp, 0, SEEK_SET); 52 fread(buf, sizeof(char), file_size, fp); 53 fclose(fp); 54 buf[file_size] = '