zoukankan      html  css  js  c++  java
  • 稀疏矩阵 part 5

    ▶ 目前为止能跑的所有代码及其结果(2019年2月24日),之后添加:DIA 乘法 GPU 版;其他维度的乘法(矩阵乘矩阵);其他稀疏矩阵格式之间的相互转化

      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <memory.h>
      4 #include <malloc.h>
      5 #include <math.h>
      6 #include <time.h>
      7 #include "cuda_runtime.h"
      8 #include "device_launch_parameters.h"
      9 
     10 #define ROW         (8192)
     11 #define COL         (8192)
     12 #define RATIO       0.1                         // 系数矩阵非零元素占比
     13 #define EPSILON     0.0001                      // 浮点数比较
     14 #define THREAD_SIZE 256
     15 #define SEED        1                           // (unsigned int)time(NULL)
     16 #define MAX(x,y)    (((x)>(y))?(x):(y))
     17 #define CEIL(x,y)   (int)(( x - 1 ) /  y + 1)
     18 #define INT                                     // 数字格式用 int 还是 float
     19 
     20 #ifdef INT
     21 typedef int format;
     22 #else
     23 typedef float format;
     24 #endif
     25 
     26 // 矩阵存储格式
     27 typedef struct          // 顺序格式
     28 {
     29     int     row;        // 行数
     30     int     col;        // 列数
     31     int     count;      // 非零元个数(用于转换,不用于计算)
     32     format  *data;      // 元素的值
     33 }
     34 MAT;
     35 
     36 typedef struct          // Compressed Sparse Row 格式
     37 {
     38     int     row;        // 行数
     39     int     col;        // 列数
     40     format  *data;      // 元素的值
     41     int     *index;     // 元素的列号
     42     int     *ptr;       // 每行首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数 
     43 }
     44 CSR;
     45 
     46 typedef struct          // Compressed Sparse Row 格式
     47 {
     48     int     row;        // 行数
     49     int     col;        // 列数
     50     format  *data;      // 元素的值
     51     int     *index;     // 元素的行号
     52     int     *ptr;       // 每列首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数 
     53 }
     54 CSC;
     55 
     56 
     57 typedef struct          // ELLPACK 格式
     58 {
     59     int     row;        // 行数
     60     int     col;        // 列数,相当于MAT格式的行数
     61     int     colOrigin;  // 原列数,相当于MAT格式的列数
     62     format  *data;      // 元素的值
     63     int     *index;     // 元素在MAT格式中的列号
     64 }
     65 ELL;
     66 
     67 typedef struct          // Coordinate 格式
     68 {
     69     int     row;        // 行数
     70     int     col;        // 列数
     71     int     count;      // 非零元个数
     72     int     *rowIndex;  // 行向量
     73     int     *colIndex;  // 列向量
     74     format  *data;      // 元素的值
     75 }
     76 COO;
     77 
     78 typedef struct              // Diagonal 格式
     79 {
     80     int     row;            // 行数
     81     int     col;            // 列数    
     82     int     colOrigin;      // 原列数
     83     format  *data;          // 元素的值
     84     int     *index;         // 原矩阵各对角线是否非零
     85 }
     86 DIA;
     87 
     88 // 全局指针
     89 __managed__ MAT *aMAT, *xMAT, *yMATRef, *yMATCal;
     90 __managed__ CSR *aCSR;
     91 __managed__ ELL *aELL;
     92 __managed__ COO *aCOO;
     93 
     94 // 通用函数
     95 __host__ __device__ inline void checkNULL(const void *input)            // 有点问题,设备函数无法使用 exit(1) 来推出
     96 {
     97     if (input == NULL)
     98         printf("
    	find a NULL!");
     99     return;
    100 }
    101 
    102 __host__ inline void checkCudaError(cudaError input)
    103 {
    104     if (input != cudaSuccess)
    105     {
    106         printf("
    	find a cudaError!");
    107         exit(1);
    108     }
    109     return;
    110 }
    111 
    112 int checkEqual(const format * in1, const format * in2, const int length)// 注意两向量相等时返 0,否则返回“值不等的元素下标加一”
    113 {
    114     int i;
    115     for (i = 0; i < length; i++)
    116     {
    117 #ifdef INT
    118         if (in1[i] != in2[i])
    119 #else
    120         if (fabs(in1[i] - in2[i]) > EPSILON)
    121 #endif
    122         {
    123             printf("
    	Error at i = %d
    	in1[i] = %10f, in2[i] = %10f
    ", i, (float)in1[i], (float)in2[i]);
    124             return i + 1;
    125         }
    126     }
    127     printf("
    	Check output, passed.
    ");
    128     return 0;
    129 }
    130 
    131 // 打印矩阵
    132 void print(const char* info, const MAT *in)// 打印MAT格式的矩阵
    133 {
    134     printf("%s
    	MAT,
    	row = %d, col = %d, count = %d", info, in->row, in->col, in->count);
    135     printf("
    	data =
    	");
    136     for (int i = 0; i < in->row * in->col; i++)
    137     {
    138 #ifdef INT
    139         printf("%d ", in->data[i]);
    140 #else
    141         printf("%.2f ", in->data[i]);
    142 #endif
    143         if ((i + 1) % in->col == 0)
    144             printf("
    	");
    145     }
    146     printf("
    ");
    147     return;
    148 }
    149 
    150 void print(const char* info, const CSR *in)// 打印CSR格式的矩阵
    151 {
    152     printf("%s
    	CSR,
    	row = %d, col = %d", info, in->row, in->col);
    153     printf("
    	data =
    	");
    154     for (int i = 0; i < in->ptr[in->row]; i++)
    155 #ifdef INT
    156         printf("%d ", in->data[i]);
    157 #else
    158         printf("%.2f ", in->data[i]);
    159 #endif
    160     printf("
    	index =
    	");
    161     for (int i = 0; i < in->ptr[in->row]; i++)
    162         printf("%d ", in->index[i]);
    163     printf("
    	ptr =
    	");
    164     for (int i = 0; i < in->row + 1; i++)
    165         printf("%d ", in->ptr[i]);
    166     printf("
    
    ");
    167     return;
    168 }
    169 
    170 void print(const char* info, const ELL *in)// 打印ELL格式的矩阵
    171 {
    172     int i;
    173     printf("%s
    	ELL,
    	row = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin);
    174     printf("
    	data =
    	");
    175     for (i = 0; i < in->row * in->col; i++)
    176     {
    177 #ifdef INT
    178         printf("%d ", in->data[i]);
    179 #else
    180         printf("%.2f ", in->data[i]);
    181 #endif
    182         if ((i + 1) % in->col == 0)
    183             printf("
    	");
    184     }
    185     printf("index =
    	");
    186     for (i = 0; i < in->row * in->col; i++)
    187     {
    188         printf("%d ", in->index[i]);
    189         if ((i + 1) % in->col == 0)
    190             printf("
    	");
    191     }
    192     printf("
    
    ");
    193     return;
    194 }
    195 
    196 void print(const char* info, const COO *in)// 打印COO格式的矩阵
    197 {
    198     int i;
    199     printf("%s
    	COO,
    	row = %d, col = %d, count = %d", info, in->row, in->col, in->count);
    200     printf("
    	(rowIndex, colIndex, data) =
    	");
    201     for (i = 0; i < in->count; i++)
    202     {
    203 #ifdef INT
    204         printf("(%d,%d,%d)
    	", in->rowIndex[i], in->colIndex[i], in->data[i]);
    205 #else
    206         printf("(%d,%d,%.2f)
    	", in->rowIndex[i], in->colIndex[i], in->data[i]);
    207 #endif
    208     }
    209     printf("
    
    ");
    210     return;
    211 }
    212 
    213 void print(const char* info, const DIA *in)// 打印DIA格式的矩阵
    214 {
    215     int i;
    216     printf("%s
    	DIA,
    	row = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin,in->colOrigin);
    217     printf("
    	data =
    	");
    218     int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
    219     for (int i = 0, j = 0; i < in->row + in->col - 1; i++)
    220     {
    221         if (in->index[i] == 1)
    222         {
    223             inverseIndex[j] = i;
    224             j++;
    225         }
    226     }  
    227     for (i = 0; i < in->row * in->col; i++)
    228     {                                                                            
    229         if (i / in->col < in->row - 1 - inverseIndex[i % in->col] || i / in->col > inverseIndex[in->col - 1] - inverseIndex[i % in->col])
    230             printf("* ");
    231         else
    232 #ifdef INT
    233             printf("%d ", in->data[i]);
    234 #else
    235             printf("%.2f ", in->data[i]);
    236 #endif
    237         if ((i + 1) % in->col == 0)
    238             printf("
    	");
    239     }
    240     printf("index =
    	");
    241     for (i = 0; i < in->row + in->col - 1; i++)    
    242         printf("%d ", in->index[i]);       
    243     printf("
    
    ");
    244     free(inverseIndex);
    245     return;
    246 }
    247 
    248 // 矩阵初始化与清理
    249 MAT *initializeMAT(const int row, const int col, const int count = 0)// 初始化MAT,注意非零元默认为 0
    250 {
    251     MAT *temp;
    252     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(MAT)));
    253     temp->row = row;
    254     temp->col = col;
    255     temp->count = count;
    256     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
    257     return temp;
    258 }
    259 
    260 // 统计MAT形式的矩阵中的非零元素个数
    261 #define COUNT_MAT(in)                               
    262 {                                                   
    263     checkNULL(in);                                  
    264     int i, zero;                                    
    265     for (i = zero = 0; i < in->row * in->col; i++)  
    266         zero += !in->data[i];                       
    267     in->count = in->row * in->col - zero;           
    268 }
    269 
    270 int freeMatrix(MAT *in)// 释放MAT
    271 {
    272     checkNULL(in);
    273     cudaFree(in->data);
    274     cudaFree(in);
    275     return 0;
    276 }
    277 
    278 CSR * initializeCSR(const int row, const int col, const int count)// 初始化CSR,要求给出 count
    279 {
    280     CSR *temp;
    281     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(CSR)));
    282     temp->row = row;
    283     temp->col = col;
    284     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
    285     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * count));
    286     checkCudaError(cudaMallocManaged((void **)&temp->ptr, sizeof(int) * (row + 1)));
    287     return temp;
    288 }
    289 
    290 int freeMatrix(CSR *in)// 释放CSR
    291 {
    292     checkNULL(in);
    293     cudaFree(in->data);
    294     cudaFree(in->index);
    295     cudaFree(in->ptr);
    296     cudaFree(in);
    297     return 0;
    298 }
    299 
    300 ELL * initializeELL(const int row, const int col, const int colOrigin)// 初始化ELL,要求给出原列数
    301 {
    302     ELL *temp;
    303     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(ELL)));
    304     temp->row = row;
    305     temp->col = col;
    306     temp->colOrigin = colOrigin;
    307     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
    308     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * row * col));
    309     return temp;
    310 }
    311 
    312 int freeMatrix(ELL *in)// 释放ELL
    313 {
    314     cudaFree(in->data);
    315     cudaFree(in->index);
    316     cudaFree(in);
    317     return 0;
    318 }
    319 
    320 COO * initializeCOO(const int row, const int col, const int count)// 初始化COO,要求给出 count
    321 {
    322     COO * temp;
    323     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(COO)));
    324     temp->row = row;
    325     temp->col = col;
    326     temp->count = count;
    327     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
    328     checkCudaError(cudaMallocManaged((void **)&temp->rowIndex, sizeof(int) * count));
    329     checkCudaError(cudaMallocManaged((void **)&temp->colIndex, sizeof(int) * count));
    330     return temp;
    331 }
    332 
    333 int freeMatrix(COO *in)// 释放COO
    334 {
    335     checkNULL(in);
    336     cudaFree(in->data);
    337     cudaFree(in->rowIndex);
    338     cudaFree(in->colIndex);
    339     cudaFree(in);
    340     return 0;
    341 }
    342 
    343 DIA * initializeDIA(const int row, const int col, const int colOrigin)// 初始化DIA,要求给出原行数、原列数
    344 {
    345     DIA * temp;
    346     checkCudaError(cudaMallocManaged((void **)&temp, sizeof(DIA)));
    347     temp->row = row;
    348     temp->col = col;
    349     temp->colOrigin = colOrigin;
    350     checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
    351     checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(format) * (row + col - 1)));
    352     return temp;
    353 }
    354 
    355 int freeMatrix(DIA *in)// 释放DIA
    356 {
    357     checkNULL(in);
    358     cudaFree(in->data);
    359     cudaFree(in->index);
    360     cudaFree(in);
    361     return 0;
    362 }
    363 
    364 // 矩阵格式的相互转化
    365 CSR * MATToCSR(const MAT *in)                                       // MAT 转 CSR
    366 {
    367     checkNULL(in);
    368     CSR * out = initializeCSR(in->row, in->col, in->count);
    369     checkNULL(out);
    370     
    371     out->ptr[0] = 0;
    372     for (int i = 0, j = 0, k = 1; i < in->row * in->col; i++)       // i 遍历 in->data
    373     {
    374         if (in->data[i] != 0)                                       // 找到非零元
    375         {
    376             if (j == in->count)                                     // 在 out->data 已经填满了的基础上又发现了非零元,错误
    377                 return NULL;
    378             out->data[j] = in->data[i];                             // 填充非零元素
    379             out->index[j] = i % in->col;                            // 填充列号
    380             j++;
    381         }
    382         if ((i + 1) % in->col == 0)                                 // 到了最后一列,写入行指针号
    383             out->ptr[k++] = j;
    384     }
    385     return out;
    386 }
    387 
    388 MAT * CSRToMAT(const CSR *in)                                       // CSR转MAT
    389 {
    390     checkNULL(in);
    391     MAT *out = initializeMAT(in->row, in->col, in->ptr[in->row]);
    392     checkNULL(out);
    393 
    394     memset(out->data, 0, sizeof(format) * in->row * in->col);
    395     for (int i = 0; i < in->row; i++)                               // i 遍历行
    396     {                                                
    397         for (int j = in->ptr[i]; j < in->ptr[i + 1]; j++)           // j 遍历列 
    398             out->data[i * in->col + in->index[j]] = in->data[j];
    399     }
    400     return out;
    401 }
    402 
    403 ELL * MATToELL(const MAT *in)// MAT转ELL
    404 {
    405     checkNULL(in);
    406 
    407     int i, j, maxElement; 
    408     for (i = j = maxElement = 0; i < in->row * in->col; i++)                    // i 遍历 in->data,j 记录该行非零元素数,maxElement 记录一行非零元素最大值
    409     {     
    410         if (in->data[i] != 0)                                                   // 找到非零元       
    411             j++;                                                       
    412         if ((i + 1) % in->col == 0)                                             // 行末,更新 maxElement                        
    413         {
    414             maxElement = MAX(j, maxElement);                    
    415             j = 0;                                                              // 开始下一行之前清空 j
    416         }
    417     }
    418     format* temp_data=(format *)malloc(sizeof(format) * in->row * maxElement);  // 临时数组,将列数压缩到 maxElement
    419     checkNULL(temp_data);
    420     int* temp_index = (int *)malloc(sizeof(int) * in->row * maxElement);
    421     checkNULL(temp_index);
    422     memset(temp_data, 0, sizeof(format) * in->row * maxElement);
    423     memset(temp_index, 0, sizeof(int) * in->row * maxElement);
    424     for (i = j = 0; i < in->row * in->col; i++)                                 // i 遍历 in->data,j 记录该行非零元素数,把 in 中每行的元素往左边推
    425     {        
    426         if (in->data[i] != 0)                                                   // 找到非零元
    427         {
    428             temp_data[i / in->col * maxElement + j] = in->data[i];              // 存放元素
    429             temp_index[i / in->col * maxElement + j] = i % in->col;             // 记录所在的列号
    430             j++;                                                    
    431         }
    432         if ((i + 1) % in->col == 0)                                             // 行末,将剩余位置的下标记作 -1,即无效元素
    433         {            
    434             for (j += i / in->col * in->col; j < maxElement * (i / in->col + 1); j++)   // 使得 j 指向本行最后一个非零元素之后的元素,再开始填充
    435                 temp_index[j] = -1;                                 
    436             j = 0;                                                              // 开始下一行之前清空 j
    437         }
    438     }    
    439     ELL *out = initializeELL(maxElement, in->row, in->col);                     // 最终输出,如果不转置的话不要这部分
    440     checkNULL(out);
    441     for (i = 0; i < out->row * out->col; i++)                                   // 将 temp_data 和 temp_index 转置以提高缓存利用
    442     {
    443         out->data[i] = temp_data[i % out->col * out->row + i / out->col];
    444         out->index[i] = temp_index[i % out->col * out->row + i / out->col];
    445     }
    446     free(temp_data);
    447     free(temp_index);
    448     return out;
    449 }
    450 
    451 MAT * ELLToMAT(const ELL *in)                                                   // ELL转MAT
    452 {
    453     checkNULL(in);
    454     MAT *out = initializeMAT(in->col, in->colOrigin);
    455     checkNULL(out);
    456 
    457     for (int i = 0; i < in->row * in->col; i++)                                 // i 遍历 out->data 
    458     {
    459         if (in->index[i] < 0)                                                   // 注意跳过无效元素
    460             continue;
    461         out->data[i % in->col * in->colOrigin + in->index[i]] = in->data[i];
    462     }
    463     COUNT_MAT(out);
    464     return out;
    465 }
    466 
    467 COO * MATToCOO(const MAT *in)                               // MAT转COO
    468 {
    469     checkNULL(in);
    470     COO *out = initializeCOO(in->row, in->col, in->count);
    471 
    472     for (int i=0, j = 0; i < in->row * in->col; i++)
    473     {
    474 #ifdef INT
    475         if (in->data[i] != 0)
    476 #else
    477         if (fabs(in->data[i]) > EPSILON)
    478 #endif
    479         {
    480             out->data[j] = in->data[i];
    481             out->rowIndex[j] = i / in->col;
    482             out->colIndex[j] = i % in->col;
    483             j++;
    484         }
    485     }
    486     return out;
    487 }
    488 
    489 MAT * COOToMAT(const COO *in)                               // COO转MAT
    490 {
    491     checkNULL(in);
    492     MAT *out = initializeMAT(in->row, in->col, in->count);
    493     checkNULL(out);
    494 
    495     for (int i = 0; i < in->row * in->col; i++)
    496         out->data[i] = 0;
    497     for (int i = 0; i < in->count; i++)
    498         out->data[in->rowIndex[i] * in->col + in->colIndex[i]] = in->data[i];
    499     return out;
    500 }
    501 
    502 DIA * MATToDIA(const MAT *in)                                       // MAT转DIA
    503 {
    504     checkNULL(in);
    505 
    506     int *index = (int *)malloc(sizeof(int)*(in->row + in->col - 1));
    507     for (int diff = in->row - 1; diff > 0; diff--)                  // 左侧零对角线情况
    508     {        
    509         int flagNonZero = 0;
    510         for (int i = 0; i < in->col && i + diff < in->row; i++)     // i 沿着对角线方向遍历 in->data,flagNonZero 记录该对角线是否全部为零元
    511         {            
    512 #ifdef INT
    513             if (in->data[(i + diff) * in->col + i] != 0)
    514 #else
    515             if (fabs(in->data[(i + diff) * in->col + i]) > EPSILON)
    516 #endif            
    517                 flagNonZero = 1;
    518         }
    519         index[in->row - 1 - diff] = flagNonZero;                    // 标记该对角线上有非零元
    520     }
    521     for (int diff = in->col - 1; diff >= 0; diff--)                 // 右侧零对角线情况
    522     {                                                                                                                 
    523         int flagNonZero = 0;
    524         for (int j = 0; j < in->row && j + diff < in->col; j++)
    525         {
    526 #ifdef INT
    527             if (in->data[j * in->col + j + diff] != 0)
    528 #else
    529             if (fabs(in->data[j * in->col + j + diff]) > EPSILON)
    530 #endif            
    531                 flagNonZero = 1;
    532         }
    533         index[in->row - 1 + diff] = flagNonZero;                    // 标记该对角线上有非零元
    534     }       
    535     int *prefixSumIndex = (int *)malloc(sizeof(int)*(in->row + in->col - 1));
    536     prefixSumIndex[0] = index[0];
    537     for (int i = 1; i < in->row + in->col - 1; i++)                 // 闭前缀和,prefixSumIndex[i] 表示原矩阵第 0 ~ i 条对角线中共有多少条非零对角线(含)
    538         prefixSumIndex[i] = prefixSumIndex[i-1] + index[i];         // index[in->row + in->col -2] 表示原矩阵非零对角线条数,等于 DIA 矩阵列数
    539     DIA *out = initializeDIA(in->row, prefixSumIndex[in->row + in->col - 2], in->col);
    540     checkNULL(out);
    541 
    542     memset(out->data, 0, sizeof(int)*out->row * out->col);
    543     for (int i = 0; i < in->row + in->col - 1; i++)             
    544         out->index[i] = index[i];                                   // index 搬进 out
    545     for (int i = 0; i < in->row; i++)                               // i,j 遍历原矩阵,将元素搬进 out
    546     {
    547         for (int j = 0; j < in->col; j++)
    548         {
    549             int temp = j - i + in->row - 1;
    550             if (index[temp] == 0)
    551                 continue;            
    552             out->data[i * out->col + (temp > 0 ? prefixSumIndex[temp - 1] : 0)] = in->data[i * in->col + j];    // 第 row - 1 行第 0 列元素 temp == 0,单独处理
    553         }
    554     }
    555     free(index);
    556     free(prefixSumIndex);
    557     return out;
    558 }
    559 
    560 MAT * DIAToMAT(const DIA *in)                                       // DIA转MAT
    561 {
    562     checkNULL(in);
    563     MAT *out = initializeMAT(in->row, in->colOrigin);
    564     checkNULL(out);
    565 
    566     int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
    567     for (int i = 0, j = 0; i < in->row + in->col - 1; i++)          // 求一个 index 的逆,即 DIA 中第 i 列对应原矩阵第 inverseIndex[i] 对角线
    568     {                                                               // 原矩阵对角线编号 (row-1, 0) 为第 0 条,(0, 0) 为第 row - 1 条,(col-1, 0) 为第 row + col - 2 条
    569         if (in->index[i] == 1)
    570         {
    571             inverseIndex[j] = i;
    572             j++;
    573         }
    574     }
    575     for (int i = 0; i < in->row; i++)                               // i 遍历 in->data 行,j 遍历 in->data 列
    576     {
    577         for (int j = 0; j < in->col; j++)
    578         {
    579             if (i < in->row - 1 - inverseIndex[j] || i > inverseIndex[in->col - 1] - inverseIndex[j])   // 跳过两边呈三角形的无效元素
    580                 continue;
    581             out->data[i * in->col + inverseIndex[j] - in->row + 1] = in->data[i * in->col + j];         // 利用 inverseIndex 来找钙元素在原距震中的位置
    582         }
    583     }
    584     free(inverseIndex);
    585     return out;
    586 }
    587 
    588 // 各种形式的矩阵和一个向量的乘法
    589 int dotCPU(const MAT *a, const MAT *x, MAT *y)      // CPU MAT 乘法
    590 {
    591     checkNULL(a); checkNULL(x); checkNULL(y);
    592     if (a->col != x->row)
    593     {
    594         printf("dotMATCPU dimension mismatch!
    ");
    595         return 1;
    596     }
    597     
    598     y->row = a->row;
    599     y->col = x->col;
    600     for (int i = 0; i < a->row; i++)
    601     {
    602         format sum = 0;
    603         for (int j = 0; j < a->col; j++)        
    604             sum += a->data[i * a->col + j] * x->data[j];                
    605         y->data[i] = sum;        
    606     }
    607     COUNT_MAT(y);
    608     return 0;
    609 }
    610 
    611 int dotCPU(const CSR *a, const MAT *x, MAT *y)      // CPU CSR 乘法
    612 {
    613     checkNULL(a); checkNULL(x); checkNULL(y);
    614     if (a->col != x->row)
    615     {
    616         printf("dotCSRCPU dimension mismatch!
    ");
    617         return 1;
    618     }
    619     
    620     y->row = a->row;
    621     y->col = x->col;
    622     for (int i = 0; i < a->row; i++)                            // i 遍历 ptr,j 遍历行内数据,A 中为 0 的元素不参加乘法
    623     {
    624         format sum = 0;
    625         for (int j = a->ptr[i]; j < a->ptr[i + 1]; j++)
    626             sum += a->data[j] * x->data[a->index[j]];
    627         y->data[i] = sum;
    628     }
    629     COUNT_MAT(y);
    630     return 0;
    631 }
    632 
    633 int dotCPU(const ELL *a, const MAT *x, MAT *y)      // CPU ELL乘法
    634 {
    635     checkNULL(a); checkNULL(x); checkNULL(y);
    636     if (a->colOrigin != x->row)
    637     {
    638         printf("dotELLCPU dimension mismatch!
    ");
    639         return 1;
    640     }
    641 
    642     y->row = a->col;
    643     y->col = x->col;
    644     for (int i = 0; i<a->col; i++)
    645     {
    646         format sum = 0;
    647         for (int j = 0; j < a->row; j++)
    648         {
    649             int temp = a->index[j * a->col + i];
    650             if (temp < 0)                                   // 跳过无效元素
    651                 continue;
    652             sum += a->data[j * a->col + i] * x->data[temp];
    653         }
    654         y->data[i] = sum;
    655     }
    656     COUNT_MAT(y);
    657     return 0;
    658 }
    659 
    660 int dotCPU(const COO *a, const MAT *x, MAT *y)// CPU COO乘法
    661 {
    662     checkNULL(a); checkNULL(x); checkNULL(y);
    663     if (a->col != x->row)
    664     {
    665         printf("dotCOOCPU null!
    ");
    666         return 1;
    667     }
    668 
    669     y->row = a->row;
    670     y->col = x->col;
    671     for (int i = 0; i<a->count; i++)
    672         y->data[a->rowIndex[i]] += a->data[i] * x->data[a->colIndex[i]];
    673     COUNT_MAT(y);
    674     return 0;
    675 }
    676 
    677 int dotCPU(const DIA *a, const MAT *x, MAT *y)// CPU DIA乘法
    678 {
    679     checkNULL(a); checkNULL(x); checkNULL(y);
    680     if (a->colOrigin != x->row)
    681     {
    682         printf("dotDIACPU null!
    ");
    683         return 1;
    684     }    
    685     y->row = a->row;
    686     y->col = x->col;
    687     int * inverseIndex = (int *)malloc(sizeof(int) * a->col);
    688     for (int i = 0, j = 0; i < a->row + a->col - 1; i++)
    689     {
    690         if (a->index[i] == 1)
    691         {
    692             inverseIndex[j] = i;
    693             j++;
    694         }
    695     }
    696     for (int i = 0; i < a->row; i++)
    697     {
    698         format sum = 0;
    699         for (int j = 0; j < a->col; j++)
    700         {
    701             if (i < a->row - 1 - inverseIndex[j] || i > inverseIndex[a->col - 1] - inverseIndex[j])
    702                 continue;
    703             sum += a->data[i * a->col + j] * x->data[i + inverseIndex[j] - a->row + 1];
    704         }
    705         y->data[i] = sum;
    706     }
    707     COUNT_MAT(y);
    708     free(inverseIndex);
    709     return 0;
    710 }
    711 
    712 __global__ void dotGPU(const MAT *a, const MAT *x, MAT *y)// GPU MAT乘法
    713 {
    714     int id = blockIdx.x * blockDim.x + threadIdx.x;
    715     if (id < a->row)
    716     {
    717         format sum = 0;
    718         for (int i = 0; i < a->col; i++)
    719             sum += a->data[id * a->col + i] * x->data[i];
    720         y->data[id] = sum;
    721     }
    722     if (id == 0)
    723     {
    724         y->row = a->row;
    725         y->col = x->col;
    726         COUNT_MAT(y);
    727     }
    728     return;
    729 }
    730 
    731 __global__ void dotGPU(const CSR *a, const MAT *x, MAT *y)// GPU CSR乘法
    732 {
    733     int id = blockIdx.x * blockDim.x + threadIdx.x;
    734     if (id < a->row)
    735     {
    736         format sum = 0;
    737         for (int j = a->ptr[id]; j < a->ptr[id + 1]; j++)
    738             sum += a->data[j] * x->data[a->index[j]];
    739         y->data[id] = sum;
    740     }
    741     if (id == 0)
    742     {
    743         y->row = a->row;
    744         y->col = x->col;
    745         COUNT_MAT(y);
    746     }
    747     return;
    748 }
    749 
    750 __global__ void dotGPU(const ELL *a, const MAT *x, MAT *y)// GPU ELL乘法
    751 {
    752     int id = blockIdx.x * blockDim.x + threadIdx.x;
    753     if (id < a->col)
    754     {
    755         format sum = 0;
    756         for (int j = 0; j < a->row; j++)            
    757             sum += a->data[id + j * a->col] * (a->index[id + j * a->col] < 0 ? 0 : x->data[a->index[id + j * a->col]]);
    758         y->data[id] = sum;
    759     }
    760     if (id == 0)
    761     {
    762         y->row = a->col;
    763         y->col = x->col;
    764         COUNT_MAT(y);
    765     }
    766     return;
    767 }
    768 
    769 __global__ void dotGPU(const COO *a, const MAT *x, MAT *y)// GPU COO乘法
    770 {
    771     int id = blockIdx.x * blockDim.x + threadIdx.x;
    772     if (id < a->count)
    773         atomicAdd(&(y->data[a->rowIndex[id]]), a->data[id] * x->data[a->colIndex[id]]);
    774     if (id == 0)
    775     {
    776         y->row = a->row;
    777         y->col = x->col;
    778         COUNT_MAT(y);
    779     }
    780     return;
    781 }
    782 
    783 int test()// 测试矩阵打印和CPU计算的函数
    784 {
    785     const int row = 4, col = 5;
    786     
    787     MAT* a = initializeMAT(row, col);
    788     a->data[0] = 3, a->data[2] = 1, a->data[4] = 5, a->data[11] = 2;
    789     a->data[12] = 4, a->data[13] = 1, a->data[15] = 1, a->data[18] = 1;
    790     COUNT_MAT(a);
    791 
    792     MAT* x = initializeMAT(col, 1);
    793     for (int i = 0; i < x->row; i++)
    794         x->data[i] = i + 1;
    795     COUNT_MAT(x);
    796 
    797     MAT* y = initializeMAT(row, 1);
    798     COUNT_MAT(y);
    799 
    800     print("MAT a =", a);    
    801     print("MAT x =", x);
    802     dotCPU(a, x, y);    
    803     print("MAT y = a * x = ", y);
    804    
    805     CSR* b = MATToCSR(a);        
    806     print("CSR b = a = ", b);
    807     memset(y->data, 0, sizeof(format) * y->row * y->col);
    808     dotCPU(b, x, y);    
    809     print("MAT y = b * x = ", y);
    810     MAT* c = CSRToMAT(b);
    811     print("MAT c = b =", c);
    812     freeMatrix(b);
    813     
    814     ELL* d = MATToELL(a);
    815     print("ELL d = a =", d);
    816     memset(y->data, 0, sizeof(format) * y->row * y->col);
    817     dotCPU(d, x, y);
    818     print("MAT y = d * x =", y);    
    819     c = ELLToMAT(d);
    820     print("MAT c = d =", c);
    821     freeMatrix(d);
    822 
    823     COO* e = MATToCOO(a);
    824     print("ELL e = a = ", e);
    825     memset(y->data, 0, sizeof(format) * y->row * y->col);
    826     dotCPU(e, x, y);
    827     print("MAT y = e * x = ", y);    
    828     c = COOToMAT(e);
    829     print("MAT c = e =", c);
    830     freeMatrix(e);
    831    
    832     DIA* f = MATToDIA(a);
    833     print("DIA f = a = ", f);
    834     memset(y->data, 0, sizeof(format) * y->row * y->col);
    835     dotCPU(f, x, y);
    836     print("MAT y = f * x = ", y);
    837     c = DIAToMAT(f);
    838     print("MAT c = f =", c);
    839     freeMatrix(f);
    840 
    841     freeMatrix(a);
    842     freeMatrix(x);
    843     freeMatrix(y);   
    844     freeMatrix(c);    
    845     return 0;
    846 }
    847 
    848 int main()
    849 {
    850     test();
    851    
    852     clock_t timeCPU;
    853     cudaEvent_t startGPU, stopGPU;
    854     float timeGPU;
    855     cudaEventCreate(&startGPU);
    856     cudaEventCreate(&stopGPU);
    857 
    858     printf("
    	Start. Matrix dimension:	%d × %d", ROW, COL);
    859 
    860     // 初始化
    861     timeCPU = clock();
    862     aMAT = initializeMAT(ROW, COL);
    863     xMAT = initializeMAT(COL, 1);
    864     yMATRef = initializeMAT(ROW, 1);
    865     yMATCal = initializeMAT(ROW, 1);
    866 
    867     srand(SEED);
    868 #ifdef INT
    869     for (int i = 0; i < COL * ROW; i++)
    870         aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? (rand() - RAND_MAX / 2) % 32 : 0;
    871     COUNT_MAT(aMAT);
    872     int count = aMAT->count;
    873     for (int i = 0; i < COL; i++)
    874         xMAT->data[i] = i % 32;
    875     COUNT_MAT(xMAT);
    876 #else
    877     for (int i = 0; i < COL * ROW; i++)
    878         aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? ((float)rand() / RAND_MAX) : 0;
    879     aMAT->count = countMAT(aMAT);
    880     for (int i = 0; i < COL; i++)
    881         xMAT->data[i] = ((float)rand() / RAND_MAX);
    882     xMAT->count = countMAT(xMAT);
    883 #endif
    884     printf("
    	Initialized. Time:		%d ms
    ", clock() - timeCPU);
    885 
    886     //CPU计算部分
    887     //MAT 格式
    888     timeCPU = clock();
    889     dotCPU(aMAT, xMAT, yMATRef);
    890     timeCPU = clock() - timeCPU;
    891     printf("
    	dotMATCPU time:	%8.3f ms
    ", (float)timeCPU);
    892 
    893     // CSR格式                    
    894     aCSR = MATToCSR(aMAT);
    895     timeCPU = clock();
    896     dotCPU(aCSR, xMAT, yMATCal);
    897     timeCPU = clock() - timeCPU;
    898     printf("
    	dotCSRCPU time:	%8.3f ms
    ", (float)timeCPU);
    899     checkEqual(yMATRef->data, yMATCal->data, ROW);    
    900 
    901     // ELL格式
    902     aELL = MATToELL(aMAT);
    903     timeCPU = clock();
    904     dotCPU(aELL, xMAT, yMATCal);
    905     timeCPU = clock() - timeCPU;
    906     printf("
    	dotELLCPU time:	%8.3f ms
    ", (float)timeCPU);
    907     checkEqual(yMATRef->data, yMATCal->data, ROW);    
    908 
    909     // COO格式
    910     aCOO = MATToCOO(aMAT);
    911     timeCPU = clock();
    912     dotCPU(aCOO, xMAT, yMATCal);
    913     timeCPU = clock() - timeCPU;
    914     printf("
    	dotCOOCPU time:	%8.3f ms
    ", (float)timeCPU);
    915     checkEqual(yMATRef->data, yMATCal->data, ROW);
    916 
    917     // GPU计算部分
    918     // MAT格式
    919     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
    920     yMATCal->count = 0;
    921     cudaEventRecord(startGPU, 0);
    922     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aMAT, xMAT, yMATCal);
    923     cudaDeviceSynchronize();                                                  
    924     cudaEventRecord(stopGPU, 0);                                              
    925     cudaEventSynchronize(stopGPU);
    926     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
    927     printf("
    	dotMATGPU time:	%8.3f ms
    ", timeGPU);
    928     checkEqual(yMATRef->data, yMATCal->data, ROW);
    929     freeMatrix(aMAT);
    930 
    931     // CSR格式
    932     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
    933     yMATCal->count = 0;
    934     cudaEventRecord(startGPU, 0);
    935     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aCSR, xMAT, yMATCal);
    936     cudaDeviceSynchronize();
    937     cudaEventRecord(stopGPU, 0);
    938     cudaEventSynchronize(stopGPU);
    939     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
    940     printf("
    	dotCSRGPU time:	%8.3f ms
    ", timeGPU);
    941     checkEqual(yMATRef->data, yMATCal->data, ROW);
    942     freeMatrix(aCSR);
    943 
    944     // ELL格式
    945     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
    946     yMATCal->count = 0;
    947     cudaEventRecord(startGPU, 0);
    948     dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aELL, xMAT, yMATCal);
    949     cudaDeviceSynchronize();
    950     cudaEventRecord(stopGPU, 0);
    951     cudaEventSynchronize(stopGPU);
    952     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
    953     printf("
    	dotELLGPU time:	%8.3f ms
    ", timeGPU);
    954     checkEqual(yMATRef->data, yMATCal->data, ROW);
    955     freeMatrix(aELL);
    956 
    957     // COO格式
    958     cudaMemset(yMATCal->data, 0, sizeof(format) * yMATCal->row * yMATCal->col);
    959     yMATCal->count = 0;
    960     cudaEventRecord(startGPU, 0);
    961     dotGPU << <CEIL(count, THREAD_SIZE), THREAD_SIZE >> > (aCOO, xMAT, yMATCal);
    962     cudaDeviceSynchronize();
    963     cudaEventRecord(stopGPU, 0);
    964     cudaEventSynchronize(stopGPU);
    965     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
    966     printf("
    	dotCOOGPU time:	%8.3f ms
    ", timeGPU);
    967     checkEqual(yMATRef->data, yMATCal->data, ROW);
    968     freeMatrix(aCOO);
    969 
    970     // 清理内存    
    971     freeMatrix(xMAT);
    972     freeMatrix(yMATRef);
    973     freeMatrix(yMATCal); 
    974 
    975     getchar();
    976     return 0;
    977 }

    ● 输出结果

    MAT a =
            MAT,
            row = 4, col = 5, count = 8
            3 0 1 0 5
            0 0 0 0 0
            0 2 4 1 0
            1 0 0 1 0
    
    MAT x =
            MAT,
            row = 5, col = 1, count = 5
            1
            2
            3
            4
            5
    
    MAT y = a * x =
            MAT,
            row = 4, col = 1, count = 3
            31
            0
            20
            5
    
    CSR b = a =
            CSR,
            row = 4, col = 5
            3 1 5 2 4 1 1 1
            0 2 4 1 2 3 0 3
            0 3 3 6 8
    
    MAT y = b * x =
            MAT,
            row = 4, col = 1, count = 3
            31
            0
            20
            5
    
    MAT c = b =
            MAT,
            row = 4, col = 5, count = 8
            3 0 1 0 5
            0 0 0 0 0
            0 2 4 1 0
            1 0 0 1 0
    
    ELL d = a =
            ELL,
            row = 3, col = 4, colOrigin = 5
            3 0 2 1
            1 0 4 1
            5 0 1 0
    
            0 0 1 0
            2 0 2 3
            4 -1 3 0
    
    
    MAT y = d * x =
            MAT,
            row = 4, col = 1, count = 3
            31
            0
            20
            5
    
    MAT c = d =
            MAT,
            row = 4, col = 5, count = 7
            3 0 1 0 5
            0 0 0 0 0
            0 2 4 1 0
            0 0 0 1 0
    
    ELL e = a =
            COO,
            row = 4, col = 5, count = 8
            (0,0,3)
            (0,2,1)
            (0,4,5)
            (2,1,2)
            (2,2,4)
            (2,3,1)
            (3,0,1)
            (3,3,1)
    
    MAT y = e * x
            MAT,
            row = 4, col = 1, count = 3
            31
            0
            20
            5
    
    MAT c = e =
            MAT,
            row = 4, col = 5, count = 8
            3 0 1 0 5
            0 0 0 0 0
            0 2 4 1 0
            1 0 0 1 0
    
    
            Start. Matrix dimension:        8192 × 8192
            Initialized. Time:                 0.000 ms
    
            dotMATCPU time:  304.000 ms
    
            dotCSRCPU time:    3.000 ms
    
            Check output, passed.
    
            dotCELLPU time:  103.000 ms
    
            Check output, passed.
    
            dotCOOCPU time:   11.000 ms
    
            Check output, passed.
    
            dotMATGPU time:    5.133 ms
    
            Check output, passed.
    
            dotCSRGPU time:    2.263 ms
    
            Check output, passed.
    
            dotELLGPU time:    1.253 ms
    
            Check output, passed.
    
            dotCOOGPU time:    4.754 ms
    
            Check output, passed.
  • 相关阅读:
    SDWebImage 原理及使用问题
    iOS沙盒目录
    java基础知识积累总结
    切片原型[start:stop:step]
    select嵌套问题
    Numpy:字符串转为数值型、日期型;日期型转为数值型
    数据标准化处理(简单整理)
    Numpy:自定义复合数据类型
    Numpy:数组创建 numpy.arrray() , numpy.arange()、np.linspace ()、数组基本属性
    Numpy:使用索引、切片提取一维数组、多维数组内数据
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/10428527.html
Copyright © 2011-2022 走看看