Merge pull request from ROCmSoftwarePlatform:google-upstream-hipclang-032020

PiperOrigin-RevId: 303810406
Change-Id: I7bdfe0369d96872c49c01e2ee65efaf577f6409d
This commit is contained in:
TensorFlower Gardener 2020-03-30 13:29:58 -07:00
commit 2c8517a223
4 changed files with 13 additions and 2 deletions
tensorflow/core
kernels
lib/bfloat16
third_party/gpus

View File

@ -236,7 +236,7 @@ __global__ void SwapDimension1And2InTensor3UsingTiles(
// One extra line in the inner dimension to avoid share memory bank conflict.
// This is to mimic the following, but no constructor of T can be invoked.
// __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1];
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_COMPILER_IS_HIP_CLANG
__shared__ __align__(
alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)];
typedef T(*SharedMemoryTile)[TileSizeJ + 1];

View File

@ -23,7 +23,7 @@ limitations under the License.
#include "tensorflow/core/platform/byte_order.h"
#ifdef __CUDACC__
#if defined(__CUDACC__) || (defined(__HIPCC__) && defined(__HIP__))
// All functions callable from CUDA code must be qualified with __device__
#define B16_DEVICE_FUNC __host__ __device__

View File

@ -173,6 +173,13 @@ def InvokeHipcc(argv, log=False):
out = ' -o ' + out_file[0]
hipccopts = ' '
# In hip-clang environment, we need to make sure that hip header is included
# before some standard math header like <complex> is included in any source.
# Otherwise, we get build error.
# Also we need to retain warning about uninitialised shared variable as
# warning only, even when -Werror option is specified.
if HIPCC_IS_HIPCLANG:
hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init '
hipccopts += ' ' + hipcc_compiler_options
# Use -fno-gpu-rdc by default for early GPU kernel finalization
# This flag would trigger GPU kernels be generated at compile time, instead

View File

@ -250,6 +250,10 @@ def _rocm_include_path(repository_ctx, rocm_config):
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/")
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/10.0.0/include")
# Support hcc based off clang 11.0.0, included in ROCm3.1
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/11.0.0/include/")
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/11.0.0/include")
return inc_dirs
def _enable_rocm(repository_ctx):