diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 060a0375271..497dcda4361 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -689,7 +689,7 @@ std::unique_ptr<llvm::TargetMachine> AMDGPUGetTargetMachine( llvm::Triple target_triple, int amdgpu_version, const HloModuleConfig& hlo_module_config) { return GetTargetMachine(target_triple, absl::StrCat("gfx", amdgpu_version), - hlo_module_config, "-code-object-v3"); + hlo_module_config, "+code-object-v3"); } void AMDGPUBackendInit(const HloModuleConfig& hlo_module_config) { diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc index e22a243a70b..216602a7597 100644 --- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc +++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc @@ -132,6 +132,10 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) { VLOG(3) << "Unloading HSACO module " << module; GpuDriver::UnloadModule(context_, module); gpu_binary_to_module_.erase(module_it); + const char* mem_it = nullptr; + for (auto x : in_memory_modules_) + if (x.second == module) mem_it = x.first; + if (mem_it != nullptr) in_memory_modules_.erase(mem_it); } return true; } diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl index f5ac7b39dfd..89275128a9c 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -179,7 +179,7 @@ def InvokeHipcc(argv, log=False): # Also we need to retain warning about uninitialised shared variable as # warning only, even when -Werror option is specified. if HIPCC_IS_HIPCLANG: - hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init ' + hipccopts += ' --include=hip/hip_runtime.h ' hipccopts += ' ' + hipcc_compiler_options # Use -fno-gpu-rdc by default for early GPU kernel finalization # This flag would trigger GPU kernels be generated at compile time, instead @@ -258,6 +258,8 @@ def main(): gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY) + if HIPCC_IS_HIPCLANG: + gpu_linker_flags.append("-lrt") if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags)) return subprocess.call([CPU_COMPILER] + gpu_linker_flags) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 545aeebe97a..ce924fe4cd2 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -808,23 +808,28 @@ def make_copy_files_rule(repository_ctx, name, srcs, outs): cmd = \"""%s \""", )""" % (name, "\n".join(outs), " && \\\n".join(cmds)) -def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir): +def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions=None): """Returns a rule to recursively copy a directory.""" src_dir = _norm_path(src_dir) out_dir = _norm_path(out_dir) outs = read_dir(repository_ctx, src_dir) + post_cmd='' + if exceptions!=None: + outs = [x for x in outs if not any([x.startswith(y) for y in exceptions])] outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs] - # '@D' already contains the relative path for a single file, see # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)" + if exceptions!=None: + for x in exceptions: + post_cmd+=" ; rm -fR " + x.replace(src_dir, out_dir) return """genrule( name = "%s", outs = [ %s ], - cmd = \"""cp -rLf "%s/." "%s/" \""", -)""" % (name, "\n".join(outs), src_dir, out_dir) + cmd = \"""cp -rLf "%s/." "%s/" %s\""", +)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd) def _flag_enabled(repository_ctx, flag_name): return get_host_environ(repository_ctx, flag_name) == "1" diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 3c345e6724b..3f518fb05f1 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -615,6 +615,8 @@ def _create_local_rocm_repository(repository_ctx): name = "rocm-include", src_dir = rocm_toolkit_path + "/include", out_dir = "rocm/include", + exceptions = [rocm_toolkit_path + "/include/gtest", + rocm_toolkit_path + "/include/gmock"], ), make_copy_dir_rule( repository_ctx,