diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 060a0375271..497dcda4361 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -689,7 +689,7 @@ std::unique_ptr AMDGPUGetTargetMachine( llvm::Triple target_triple, int amdgpu_version, const HloModuleConfig& hlo_module_config) { return GetTargetMachine(target_triple, absl::StrCat("gfx", amdgpu_version), - hlo_module_config, "-code-object-v3"); + hlo_module_config, "+code-object-v3"); } void AMDGPUBackendInit(const HloModuleConfig& hlo_module_config) { diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc index e22a243a70b..fd3b5f19913 100644 --- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc +++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc @@ -132,6 +132,11 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) { VLOG(3) << "Unloading HSACO module " << module; GpuDriver::UnloadModule(context_, module); gpu_binary_to_module_.erase(module_it); + const char* mem_it = nullptr; + for (auto x : in_memory_modules_) { + if (x.second == module) mem_it = x.first; + } + if (mem_it != nullptr) in_memory_modules_.erase(mem_it); } return true; } diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl index f5ac7b39dfd..89275128a9c 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -179,7 +179,7 @@ def InvokeHipcc(argv, log=False): # Also we need to retain warning about uninitialised shared variable as # warning only, even when -Werror option is specified. if HIPCC_IS_HIPCLANG: - hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init ' + hipccopts += ' --include=hip/hip_runtime.h ' hipccopts += ' ' + hipcc_compiler_options # Use -fno-gpu-rdc by default for early GPU kernel finalization # This flag would trigger GPU kernels be generated at compile time, instead @@ -258,6 +258,8 @@ def main(): gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH) gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY) + if HIPCC_IS_HIPCLANG: + gpu_linker_flags.append("-lrt") if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags)) return subprocess.call([CPU_COMPILER] + gpu_linker_flags) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index aa8a2f0226d..91eb0444b7c 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -809,23 +809,35 @@ def make_copy_files_rule(repository_ctx, name, srcs, outs): cmd = \"""%s \""", )""" % (name, "\n".join(outs), " && \\\n".join(cmds)) -def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir): - """Returns a rule to recursively copy a directory.""" +def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions = None): + """Returns a rule to recursively copy a directory. + If exceptions is not None, it must be a list of files or directories in + 'src_dir'; these will be excluded from copying. + """ src_dir = _norm_path(src_dir) out_dir = _norm_path(out_dir) outs = read_dir(repository_ctx, src_dir) + post_cmd = "" + if exceptions != None: + outs = [x for x in outs if not any([ + x.startswith(src_dir + "/" + y) + for y in exceptions + ])] outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs] # '@D' already contains the relative path for a single file, see # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)" + if exceptions != None: + for x in exceptions: + post_cmd += " ; rm -fR " + out_dir + "/" + x return """genrule( name = "%s", outs = [ %s ], - cmd = \"""cp -rLf "%s/." "%s/" \""", -)""" % (name, "\n".join(outs), src_dir, out_dir) + cmd = \"""cp -rLf "%s/." "%s/" %s\""", +)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd) def _flag_enabled(repository_ctx, flag_name): return get_host_environ(repository_ctx, flag_name) == "1" diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 3c345e6724b..4cfec2459e4 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -615,6 +615,7 @@ def _create_local_rocm_repository(repository_ctx): name = "rocm-include", src_dir = rocm_toolkit_path + "/include", out_dir = "rocm/include", + exceptions = ["gtest", "gmock"], ), make_copy_dir_rule( repository_ctx,