zoukankan      html  css  js  c++  java
  • 【神经网络与深度学习】【C/C++】使用blas做矩阵乘法

     
    复制代码
    #define min(x,y) (((x) < (y)) ? (x) : (y))
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <cublas_v2.h>
    #include <iostream>
    #include <vector>
    //extern "C"
    //{
       #include <cblas.h>
    //}
    
    using namespace std;
    int main()
    {
    
        const enum CBLAS_ORDER Order=CblasRowMajor;
        const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
        const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
        const int M=4;//A的行数,C的行数
        const int N=2;//B的列数,C的列数
        const int K=3;//A的列数,B的行数
        const float alpha=1;
        const float beta=0;
        const int lda=K;//A的列
        const int ldb=N;//B的列
        const int ldc=N;//C的列
        const float A[M*K]={1,2,3,4,5,6,7,8,9,8,7,6};
        const float B[K*N]={5,4,3,2,1,0};
        float C[M*N];
       
        cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
         
        for(int i=0;i<M;i++)
        {
           for(int j=0;j<N;j++)
           {
               cout<<C[i*N+j]<<"
    ";
           }
           cout<<endl;
        }
       
        return EXIT_SUCCESS;
    
     
    }
    复制代码

    g++ testblas.c++ -lopenblas  -o testout

    g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout   本地编译openblas版本

    注意library放在引用library的函数的后面

    复制代码
    cblas_sgemm
    
    Multiplies two matrices (single-precision).
    
    void cblas_sgemm (
    const enum CBLAS_ORDER Order,  // Specifies row-major (C) or column-major (Fortran) data ordering.
    //typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
    
    const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
    const enum CBLAS_TRANSPOSE TransB,
    const int M,   //Number of rows in matrices A and C.
    const int N,//Number of rows in matrices A and C.
    const int K,  //Number of columns in matrix A; number of rows in matrix B
    const float alpha, //Scaling factor for the product of matrices A and B
    const float *A, 
    const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.  stride
    
    lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them : 
    
    The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix.
    
    Matrix A =
    [1 2 3]
    [4 5 6]
    Row-major stores values as {1,2,3,4,5,6}
    Stride here is 3
    
    Col-major stores values as {1, 4, 2, 5, 3, 6}
    Stride here is 2
    
    
    Matrix B =
    [1 2 3]
    [4 5 6]
    [7 8 9]
    
    Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9}
    Stride here is 3
    
    
    Read more: http://www.physicsforums.com 
    
    const float *B,  
    const int ldb,  //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
    const float beta,  //Scaling factor for matrix C.
    float *C,
    const int ldc    //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
    );
    
    Thus, it calculates either
    C←αAB + βC
    or
    C←αBA + βC
    with optional use of transposed forms of A, B, or both.
    
    
    
    
    复制代码

     

    typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
    typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;

    C=ABC=A∗B

    CT=(AB)T=BTATCT=(A∗B)T=BT∗AT  把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。

     

     Y←αAX + βY

    cblas_sgemv
    Multiplies a matrix by a vector (single precision).
    复制代码
    void cblas_sgemv (
    const enum CBLAS_ORDER Order,
    const enum CBLAS_TRANSPOSE TransA,
    const int M,
    const int N,
    const float alpha,
    const float *A,
    const int lda,
    const float *X,
    const int incX,
    const float beta,
    float *Y,
    const int incY
    );
    复制代码

     

     

     

     

     

     

     

     

     

     

     

    STL版本

    cblas_daxpy
    Computes a constant times a vector plus a vector (double-precision).  

    On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +
    Y[i].

    复制代码
    #include <OpenBlas/cblas.h>
    #include <OpenBlas/common.h>
    #include <iostream>
    #include <vector>
    
    int main()
    {
        blasint n = 10;
        blasint in_x =1;
        blasint in_y =1;
    
        std::vector<double> x(n);
        std::vector<double> y(n);
    
        double alpha = 10;
    
        std::fill(x.begin(),x.end(),1.0);
        std::fill(y.begin(),y.end(),2.0);
    
        cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y);
    
        //Print y 
        for(int j=0;j<n;j++)
            std::cout << y[j] << "	";
    
        std::cout << std::endl;
    }
    复制代码

     

    复制代码
    
    cublas
    
    

    cublasStatus_t
    cublasCreate(cublasHandle_t *handle)

    
    

    Return Value MeaningCUBLAS_STATUS_SUCCESS the initialization succeededCUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failedCUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated

    cublasStatus_tcublasDestroy(cublasHandle_t handle)

    Return Value MeaningCUBLAS_STATUS_SUCCESS the shut down succeededCUBLAS_STATUS_NOT_INITIALIZED the library was not initialized

     

    
    
    cublasStatus_t cublasSgemm(cublasHandle_t handle,  // 唯一的不同:handle to the cuBLAS library context.
    cublasOperation_t transa,
     cublasOperation_t transb
    int m,
     int n, 
    int k,
    const float *alpha,
    const float*A, 
    int lda,
    const float*B, 
    int ldb,
    const float*beta,
    float*C,
     int ldc
    )
    复制代码
    void cblas_sgemm (
    const enum CBLAS_ORDER Order,  // Specifies row-major (C) or column-major (Fortran) data ordering.
    //typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
    
    const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
    const enum CBLAS_TRANSPOSE TransB,
    const int M,   //Number of rows in matrices A and C.
    const int N,//Number of rows in matrices A and C.
    const int K,  //Number of columns in matrix A; number of rows in matrix B
    const float alpha, //Scaling factor for the product of matrices A and B
    const float *A, 
    const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.
    const float *B,  
    const int ldb,  //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
    const float beta,  //Scaling factor for matrix C.
    float *C,
    const int ldc    //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
    );
    复制代码
  • 相关阅读:
    libmv
    visualSFM
    opencv学习笔记——时间计算函数getTickCount()和getTickFrequency()
    opencv学习笔记——cv::mean()函数详解
    linux使用ip能ping通,但使用域名却不能访问的解决方法
    yum 安装出错--"Couldn't resolve host 'mirrors.aliyun.com'"
    vmware复制虚拟机出现Error:No suitable device found:no device found for connection 'System eth0'
    VMWare虚拟机 网络连接模式
    js监听input输入框值的实时变化实例
    本地连接linux虚拟机的方法
  • 原文地址:https://www.cnblogs.com/huty/p/8517894.html
Copyright © 2011-2022 走看看