From 50840b9587df316708cf963b27e033e1a6e219c6 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool Date: Tue, 5 May 2020 12:58:01 -0700 Subject: [PATCH] Add macros to avoid frequent ROCm ifdefs. Clean up the ifdefs in core/kernels/sparse. PiperOrigin-RevId: 310004168 Change-Id: I1e48a64d6f7895c7f031397f1e231e231c4568cd --- tensorflow/core/kernels/cuda_sparse.h | 6 +++ tensorflow/core/kernels/sparse/mat_mul_op.cc | 39 ++++--------------- .../core/kernels/sparse/sparse_mat_mul_op.cc | 18 +++------ .../core/kernels/sparse/transpose_op.cc | 6 +-- 4 files changed, 20 insertions(+), 49 deletions(-) diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/kernels/cuda_sparse.h index 5dd62037ff0..35bd5ccf0d7 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/kernels/cuda_sparse.h @@ -35,6 +35,9 @@ using gpusparseAction_t = cusparseAction_t; using gpusparseHandle_t = cusparseHandle_t; using gpuStream_t = cudaStream_t; +#define GPUSPARSE(postfix) CUSPARSE_##postfix +#define gpusparse(postfix) cusparse##postfix + #elif TENSORFLOW_USE_ROCM #include "rocm/include/hipsparse/hipsparse.h" @@ -46,6 +49,9 @@ using gpusparseAction_t = hipsparseAction_t; using gpusparseHandle_t = hipsparseHandle_t; using gpuStream_t = hipStream_t; +#define GPUSPARSE(postfix) HIPSPARSE_##postfix +#define gpusparse(postfix) hipsparse##postfix + #endif #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index 1a9186b7e4b..36b1ec18ded 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -748,34 +748,19 @@ class CSRSparseMatrixMatMul { // transA must be non-transpose if transB is transpose (cusparse // limitation). -#if GOOGLE_CUDA - const gpusparseOperation_t transA = CUSPARSE_OPERATION_NON_TRANSPOSE; -#elif TENSORFLOW_USE_ROCM - const gpusparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; -#endif + const gpusparseOperation_t transA = GPUSPARSE(OPERATION_NON_TRANSPOSE); // transB: b is row-major, and cusparse requires col-major b (or // equivalently transB == transpose). this version is actually more // efficient. -#if GOOGLE_CUDA - const gpusparseOperation_t transB = CUSPARSE_OPERATION_TRANSPOSE; + const gpusparseOperation_t transB = GPUSPARSE(OPERATION_TRANSPOSE); gpusparseMatDescr_t descrA; - TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA)); + TF_RETURN_IF_GPUSPARSE_ERROR(gpusparse(CreateMatDescr)(&descrA)); TF_RETURN_IF_GPUSPARSE_ERROR( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + gpusparse(SetMatType)(descrA, GPUSPARSE(MATRIX_TYPE_GENERAL))); TF_RETURN_IF_GPUSPARSE_ERROR( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); -#elif TENSORFLOW_USE_ROCM - const gpusparseOperation_t transB = HIPSPARSE_OPERATION_TRANSPOSE; - - gpusparseMatDescr_t descrA; - TF_RETURN_IF_GPUSPARSE_ERROR(hipsparseCreateMatDescr(&descrA)); - TF_RETURN_IF_GPUSPARSE_ERROR( - hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL)); - TF_RETURN_IF_GPUSPARSE_ERROR( - hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ZERO)); -#endif + gpusparse(SetMatIndexBase)(descrA, GPUSPARSE(INDEX_BASE_ZERO))); // A is (m, k), Bt is (ldb, k) and Ct is (ldc, n) const int k = b.dimension(0); @@ -838,19 +823,11 @@ class CSRSparseMatrixMatVec { const T beta = 0; gpusparseMatDescr_t descrA; -#if GOOGLE_CUDA - TF_RETURN_IF_GPUSPARSE_ERROR(cusparseCreateMatDescr(&descrA)); + TF_RETURN_IF_GPUSPARSE_ERROR(gpusparse(CreateMatDescr)(&descrA)); TF_RETURN_IF_GPUSPARSE_ERROR( - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + gpusparse(SetMatType)(descrA, GPUSPARSE(MATRIX_TYPE_GENERAL))); TF_RETURN_IF_GPUSPARSE_ERROR( - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); -#elif TENSORFLOW_USE_ROCM - TF_RETURN_IF_GPUSPARSE_ERROR(hipsparseCreateMatDescr(&descrA)); - TF_RETURN_IF_GPUSPARSE_ERROR( - hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL)); - TF_RETURN_IF_GPUSPARSE_ERROR( - hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ZERO)); -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + gpusparse(SetMatIndexBase)(descrA, GPUSPARSE(INDEX_BASE_ZERO))); const int m = a.dense_shape_host(0); const int n = a.dense_shape_host(1); diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index e06dbcb0242..7a66c8af163 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -529,20 +529,12 @@ struct CSRSparseSparseMatrixMatMul adjoint_a_(adjoint_a), transpose_b_(transpose_b) { // TODO(ebrevdo): Figure out why transposed implementations crash cuSparse. -#if GOOGLE_CUDA - transA_ = transpose_a ? (adjoint_a ? CUSPARSE_OPERATION_TRANSPOSE - : CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE) - : CUSPARSE_OPERATION_NON_TRANSPOSE; - transB_ = transpose_b ? CUSPARSE_OPERATION_TRANSPOSE - : CUSPARSE_OPERATION_NON_TRANSPOSE; -#elif TENSORFLOW_USE_ROCM transA_ = transpose_a - ? (adjoint_a ? HIPSPARSE_OPERATION_TRANSPOSE - : HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE) - : HIPSPARSE_OPERATION_NON_TRANSPOSE; - transB_ = transpose_b ? HIPSPARSE_OPERATION_TRANSPOSE - : HIPSPARSE_OPERATION_NON_TRANSPOSE; -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + ? (adjoint_a ? GPUSPARSE(OPERATION_TRANSPOSE) + : GPUSPARSE(OPERATION_CONJUGATE_TRANSPOSE)) + : GPUSPARSE(OPERATION_NON_TRANSPOSE); + transB_ = transpose_b ? GPUSPARSE(OPERATION_TRANSPOSE) + : GPUSPARSE(OPERATION_NON_TRANSPOSE); } Status Initialize() { diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index f9ddb1d8d97..3158eb5016d 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -262,11 +262,7 @@ struct CSRSparseMatrixTransposeComponent { TF_RETURN_IF_ERROR(ValidateTransposeInputs(x, *y)); GpuSparse cuda_sparse(ctx); TF_RETURN_IF_ERROR(cuda_sparse.Initialize()); -#if GOOGLE_CUDA - const gpusparseAction_t copyValues = CUSPARSE_ACTION_NUMERIC; -#elif TENSORFLOW_USE_ROCM - const gpusparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + const gpusparseAction_t copyValues = GPUSPARSE(ACTION_NUMERIC); const int rank = x.dense_shape_host.size(); const int m = x.row_ptr.size() - 1; const int n = x.dense_shape_host(rank - 1);