zoukankan      html  css  js  c++  java
  • cubla sample-code

    cublasSscal 
    //Example 1. Application Using C and CUBLAS: 1-based indexing
    
    #include <stdlib.h>
    #include <math.h>
    #include <cuda_runtime.h>
    #include "cublas_v2.h"
    #include <stdio.h>
    #define M 6
    #define N 5
    #define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))
    static __inline__ void modify (cublasHandle_t handle, float*m, int ldm, int
        n, int p, int q, float alpha, float beta){
            cublasSscal (handle, n-p+1, &alpha, &m[IDX2F(p,q,ldm)], ldm);
            cublasSscal (handle, ldm-p+1, &beta, &m[IDX2F(p,q,ldm)], 1);
    }
    int main (void){
        cudaError_t cudaStat; 
        cublasStatus_t stat;
        cublasHandle_t handle;
        int i, j;
        float* devPtrA;
        float* a = 0;
        a = (float*)malloc (M * N * sizeof(*a));
        if(!a) {
            printf("host memory allocation failed");
            return EXIT_FAILURE;
        }
        for(j = 1; j <= N; j++) {
            for(i = 1; i <= M; i++) {
                a[IDX2F(i,j,M)] = (float)((i-1) * M + j);
                printf("%7.0f",a[IDX2F(i,j,M)]);
            }printf("
    ");
        }printf("
    ");
        cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a));
        if(cudaStat != cudaSuccess) {
            printf ("device memory allocation failed");
            return EXIT_FAILURE;
        }
        stat = cublasCreate(&handle);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf ("CUBLAS initialization failed
    ");
            return EXIT_FAILURE;
        }
        stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf ("data download failed");
            cudaFree (devPtrA);
            cublasDestroy(handle);
            return EXIT_FAILURE;
        }
        modify (handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f);
        stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M);
        if(stat != CUBLAS_STATUS_SUCCESS) {
            printf("data upload failed");
            cudaFree (devPtrA);
            cublasDestroy(handle); 
            return EXIT_FAILURE;
        } 
        cudaFree (devPtrA);
        cublasDestroy(handle);
        for(j = 1; j <= N; j++) {
            for(i = 1; i <= M; i++) {
                printf ("%7.0f", a[IDX2F(i,j,M)]);
            }
            printf ("
    ");
        }
        free(a);
        return EXIT_SUCCESS;
    }

  • 相关阅读:
    JDK动态代理
    回顾反射机制Method
    静态代理和动态代理
    使用jQuery实现ajax请求
    ajax函数
    事件 on
    函数2
    pytest-mock 调试实例
    Linux自启动tomcat
    第一次做性能测试
  • 原文地址:https://www.cnblogs.com/zhxfl/p/3905818.html
Copyright © 2011-2022 走看看