Merge pull request #37452 from ROCmSoftwarePlatform:google-upstream-hipclang-032020
PiperOrigin-RevId: 303810406 Change-Id: I7bdfe0369d96872c49c01e2ee65efaf577f6409d
This commit is contained in:
commit
2c8517a223
tensorflow/core
third_party/gpus
@ -236,7 +236,7 @@ __global__ void SwapDimension1And2InTensor3UsingTiles(
|
||||
// One extra line in the inner dimension to avoid share memory bank conflict.
|
||||
// This is to mimic the following, but no constructor of T can be invoked.
|
||||
// __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1];
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_COMPILER_IS_HIP_CLANG
|
||||
__shared__ __align__(
|
||||
alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)];
|
||||
typedef T(*SharedMemoryTile)[TileSizeJ + 1];
|
||||
|
@ -23,7 +23,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/platform/byte_order.h"
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#if defined(__CUDACC__) || (defined(__HIPCC__) && defined(__HIP__))
|
||||
// All functions callable from CUDA code must be qualified with __device__
|
||||
#define B16_DEVICE_FUNC __host__ __device__
|
||||
|
||||
|
@ -173,6 +173,13 @@ def InvokeHipcc(argv, log=False):
|
||||
out = ' -o ' + out_file[0]
|
||||
|
||||
hipccopts = ' '
|
||||
# In hip-clang environment, we need to make sure that hip header is included
|
||||
# before some standard math header like <complex> is included in any source.
|
||||
# Otherwise, we get build error.
|
||||
# Also we need to retain warning about uninitialised shared variable as
|
||||
# warning only, even when -Werror option is specified.
|
||||
if HIPCC_IS_HIPCLANG:
|
||||
hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init '
|
||||
hipccopts += ' ' + hipcc_compiler_options
|
||||
# Use -fno-gpu-rdc by default for early GPU kernel finalization
|
||||
# This flag would trigger GPU kernels be generated at compile time, instead
|
||||
|
4
third_party/gpus/rocm_configure.bzl
vendored
4
third_party/gpus/rocm_configure.bzl
vendored
@ -250,6 +250,10 @@ def _rocm_include_path(repository_ctx, rocm_config):
|
||||
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/")
|
||||
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/10.0.0/include")
|
||||
|
||||
# Support hcc based off clang 11.0.0, included in ROCm3.1
|
||||
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/11.0.0/include/")
|
||||
inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/11.0.0/include")
|
||||
|
||||
return inc_dirs
|
||||
|
||||
def _enable_rocm(repository_ctx):
|
||||
|
Loading…
Reference in New Issue
Block a user