▶ 各种稀疏矩阵数据结构下 y(n,1) = A(n,m) * x(m,1) 的实现,GPU版本
● MAT 乘法
1 __global__ void dotGPU(const MAT *a, const MAT *x, MAT *y) 2 { 3 int id = blockIdx.x * blockDim.x + threadIdx.x; 4 if (id < a->row) 5 { 6 format sum = 0; 7 for (int i = 0; i < a->col; i++) 8 sum += a->data[id * a->col + i] * x->data[i]; 9 y->data[id] = sum; 10 } 11 if (id == 0) 12 { 13 y->row = a->row; 14 y->col = x->col; 15 COUNT_MAT(y); 16 } 17 return; 18 }
● CSR 乘法
1 __global__ void dotGPU(const CSR *a, const MAT *x, MAT *y) 2 { 3 int id = blockIdx.x * blockDim.x + threadIdx.x; 4 if (id < a->row) 5 { 6 format sum = 0; 7 for (int j = a->ptr[id]; j < a->ptr[id + 1]; j++) 8 sum += a->data[j] * x->data[a->index[j]]; 9 y->data[id] = sum; 10 } 11 if (id == 0) 12 { 13 y->row = a->row; 14 y->col = x->col; 15 COUNT_MAT(y); 16 } 17 return; 18 }
● ELL 乘法
1 __global__ void dotGPU(const ELL *a, const MAT *x, MAT *y) 2 { 3 int id = blockIdx.x * blockDim.x + threadIdx.x; 4 if (id < a->col) 5 { 6 format sum = 0; 7 for (int j = 0; j < a->row; j++) 8 sum += a->data[id + j * a->col] * (a->index[id + j * a->col] < 0 ? 0 : x->data[a->index[id + j * a->col]]); 9 y->data[id] = sum; 10 } 11 if (id == 0) 12 { 13 y->row = a->col; 14 y->col = x->col; 15 COUNT_MAT(y); 16 } 17 return; 18 }
● COO 乘法
1 __global__ void dotGPU(const ELL *a, const MAT *x, MAT *y)// GPU ELL乘法 2 { 3 int id = blockIdx.x * blockDim.x + threadIdx.x; 4 if (id < a->col) 5 { 6 format sum = 0; 7 for (int j = 0; j < a->row; j++) 8 sum += a->data[id + j * a->col] * (a->index[id + j * a->col] < 0 ? 0 : x->data[a->index[id + j * a->col]]); 9 y->data[id] = sum; 10 } 11 if (id == 0) 12 { 13 y->row = a->col; 14 y->col = x->col; 15 COUNT_MAT(y); 16 } 17 return; 18 }
● DIA 乘法,留坑