From dfff1ff4371e6490a66bab2163b0e4fc764e429a Mon Sep 17 00:00:00 2001 From: sunway513 Date: Wed, 4 Mar 2020 11:39:15 -0600 Subject: [PATCH] Update Tensorflow ROCm code base to build with hipclang compiler --- tensorflow/core/kernels/conv_2d_gpu.h | 2 +- tensorflow/core/lib/bfloat16/bfloat16.h | 2 +- .../crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl | 7 +++++++ third_party/gpus/rocm_configure.bzl | 4 ++++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/conv_2d_gpu.h b/tensorflow/core/kernels/conv_2d_gpu.h index 22d7f939686..31abe9dfead 100644 --- a/tensorflow/core/kernels/conv_2d_gpu.h +++ b/tensorflow/core/kernels/conv_2d_gpu.h @@ -236,7 +236,7 @@ __global__ void SwapDimension1And2InTensor3UsingTiles( // One extra line in the inner dimension to avoid share memory bank conflict. // This is to mimic the following, but no constructor of T can be invoked. // __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1]; -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_COMPILER_IS_HIP_CLANG __shared__ __align__( alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)]; typedef T(*SharedMemoryTile)[TileSizeJ + 1]; diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index a25f4d947ed..d4e33143f29 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/core/platform/byte_order.h" -#ifdef __CUDACC__ +#if defined(__CUDACC__) || (defined(__HIPCC__) && defined(__HIP__)) // All functions callable from CUDA code must be qualified with __device__ #define B16_DEVICE_FUNC __host__ __device__ diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl index 8a94afbfde1..f5ac7b39dfd 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -173,6 +173,13 @@ def InvokeHipcc(argv, log=False): out = ' -o ' + out_file[0] hipccopts = ' ' + # In hip-clang environment, we need to make sure that hip header is included + # before some standard math header like is included in any source. + # Otherwise, we get build error. + # Also we need to retain warning about uninitialised shared variable as + # warning only, even when -Werror option is specified. + if HIPCC_IS_HIPCLANG: + hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init ' hipccopts += ' ' + hipcc_compiler_options # Use -fno-gpu-rdc by default for early GPU kernel finalization # This flag would trigger GPU kernels be generated at compile time, instead diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 20ff2a4aafa..3c345e6724b 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -250,6 +250,10 @@ def _rocm_include_path(repository_ctx, rocm_config): inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/") inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/10.0.0/include") + # Support hcc based off clang 11.0.0, included in ROCm3.1 + inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/11.0.0/include/") + inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/11.0.0/include") + return inc_dirs def _enable_rocm(repository_ctx):