CUDA类转换为CPU类
示例1
CuMatrix<BaseFloat> A(RandInt(1, 50), RandInt(1, 50)); A.SetRandn(); Matrix<BaseFloat> A2(A); |
示例2
CuMatrix<BaseFloat> A(RandInt(1, 50), RandInt(1, 50)); A.SetRandn(); Matrix<BaseFloat> A2(); A.CopyToMat(A2); |
代码实现
src/matrix/kaldi-matrix.h |
template<typename OtherReal> explicit Matrix(const CuMatrixBase<OtherReal> &cu, MatrixTransposeType trans = kNoTrans); |
src/cudamatrix/cu-matrix.h |
template<typename Real> template<typename OtherReal> Matrix<Real>::Matrix(const CuMatrixBase<OtherReal> &M, MatrixTransposeType trans) { if (trans == kNoTrans) Init(M.NumRows(), M.NumCols(), kDefaultStride); else Init(M.NumCols(), M.NumRows(), kDefaultStride); M.CopyToMat(this, trans); } template<typename OtherReal> void CopyToMat(MatrixBase<OtherReal> *dst, MatrixTransposeType trans = kNoTrans) const;
|
src/cudamatrix/cu-matrix.cc |
template<typename Real> template<typename OtherReal> void CuMatrixBase<Real>::CopyToMat(MatrixBase<OtherReal> *dst, MatrixTransposeType trans) const { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { if (trans == kTrans || sizeof(OtherReal) != sizeof(Real)) { CuMatrix<OtherReal> this_trans(*this, trans); this_trans.CopyToMat(dst, kNoTrans); } else { KALDI_ASSERT(dst->NumRows() == NumRows() && dst->NumCols() == NumCols()); if (num_rows_ == 0) return; CuTimer tim;
MatrixIndexT src_pitch = stride_*sizeof(Real); MatrixIndexT dst_pitch = dst->Stride()*sizeof(Real); MatrixIndexT width = NumCols()*sizeof(Real); CU_SAFE_CALL(cudaMemcpy2DAsync(dst->Data(), dst_pitch, this->data_, src_pitch, width, this->num_rows_, cudaMemcpyDeviceToHost, cudaStreamPerThread)); CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread)); CuDevice::Instantiate().AccuProfile("CuMatrix::CopyToMatD2H", tim); } } else #endif { dst->CopyFromMat(Mat(), trans); } }
|
CPU类转换为CUDA类
示例1
Matrix<BaseFloat> A(RandInt(1, 50), RandInt(1, 50)); A.SetRandn(); CuMatrix<BaseFloat> A2(A); |
示例2
Matrix<BaseFloat> A(RandInt(1, 50), RandInt(1, 50)); A.SetRandn(); CuMatrix<BaseFloat> A2(); A2.CopyFromMat(A); |
代码实现
src/cudamatrix/cu-matrix.h |
template<typename OtherReal> explicit CuMatrix(const MatrixBase<OtherReal> &other, MatrixTransposeType trans = kNoTrans); |
src/cudamatrix/cu-matrix.cc |
template<typename Real> template<typename OtherReal> CuMatrix<Real>::CuMatrix(const MatrixBase<OtherReal> &other, MatrixTransposeType trans) { if (trans == kNoTrans) this->Resize(other.NumRows(), other.NumCols(), kUndefined); else this->Resize(other.NumCols(), other.NumRows(), kUndefined); this->CopyFromMat(other, trans); }
template<typename Real> void CuMatrixBase<Real>::CopyFromMat(const MatrixBase<Real> &src, MatrixTransposeType trans) { #if HAVE_CUDA == 1 if (CuDevice::Instantiate().Enabled()) { if (trans == kNoTrans) { KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_); CuTimer tim;
MatrixIndexT dst_pitch = stride_*sizeof(Real); MatrixIndexT src_pitch = src.Stride()*sizeof(Real); MatrixIndexT width = src.NumCols()*sizeof(Real); CU_SAFE_CALL(cudaMemcpy2DAsync(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyHostToDevice, cudaStreamPerThread)); CU_SAFE_CALL(cudaStreamSynchronize(cudaStreamPerThread));
CuDevice::Instantiate().AccuProfile("CuMatrixBase::CopyFromMat(from CPU)", tim); } else { CuMatrix<Real> trans_mat(src); // Do the transpose on the GPU board. this->CopyFromMat(trans_mat, kTrans); } } else #endif { Mat().CopyFromMat(src, trans); } } |