zoukankan      html  css  js  c++  java
  • cubla sample-code

    cublasSscal 
    //Example 1. Application Using C and CUBLAS: 1-based indexing
    
    #include <stdlib.h>
    #include <math.h>
    #include <cuda_runtime.h>
    #include "cublas_v2.h"
    #include <stdio.h>
    #define M 6
    #define N 5
    #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
    static __inline__ void modify (cublasHandle_t handle, float*m, int ldm, int
        n, int p, int q, float alpha, float beta){
            cublasSscal (handle, n-p+1, &alpha, &m[IDX2F(p,q,ldm)], ldm);
            cublasSscal (handle, ldm-p+1, &beta, &m[IDX2F(p,q,ldm)], 1);
    }
    int main (void){
        cudaError_t cudaStat; 
        cublasStatus_t stat;
        cublasHandle_t handle;
        int i, j;
        float* devPtrA;
        float* a = 0;
        a = (float*)malloc (M * N * sizeof(*a));
        if(!a) {
            printf("host memory allocation failed");
            return EXIT_FAILURE;
        }
        for(j = 1; j <= N; j++) {
            for(i = 1; i <= M; i++) {
                a[IDX2F(i,j,M)] = (float)((i-1) * M + j);
                printf("%7.0f",a[IDX2F(i,j,M)]);
            }printf("
    ");
        }printf("
    ");
        cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a));
        if(cudaStat != cudaSuccess) {
            printf ("device memory allocation failed");
            return EXIT_FAILURE;
        }
        stat = cublasCreate(&handle);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf ("CUBLAS initialization failed
    ");
            return EXIT_FAILURE;
        }
        stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf ("data download failed");
            cudaFree (devPtrA);
            cublasDestroy(handle);
            return EXIT_FAILURE;
        }
        modify (handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
        stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf("data upload failed");
            cudaFree (devPtrA);
            cublasDestroy(handle); 
            return EXIT_FAILURE;
        } 
        cudaFree (devPtrA);
        cublasDestroy(handle);
        for(j = 1; j <= N; j++) {
            for(i = 1; i <= M; i++) {
                printf ("%7.0f", a[IDX2F(i,j,M)]);
            }
            printf ("
    ");
        }
        free(a);
        return EXIT_SUCCESS;
    }

  • 相关阅读:
    Sqlite基础(一)
    个人作业冲刺(四)
    安卓用户名密码操作及虚拟机问题
    安卓之界面跳转
    个人作业冲刺(三)
    个人作业冲刺(二)
    个人作业冲刺(一)
    Android studio RatingBar(星级评分条)
    阅读笔记——《构建之法》4
    Android studio GridLayout(网格布局)
  • 原文地址:https://www.cnblogs.com/zhxfl/p/3905818.html
Copyright © 2011-2022 走看看