Merge pull request #39427 from ROCmSoftwarePlatform:google-upstream-hipclang35
PiperOrigin-RevId: 312140635 Change-Id: Idfb826a225782e62c33eebb4d9bdd52024db7f47
This commit is contained in:
commit
9282621aa9
@ -689,7 +689,7 @@ std::unique_ptr<llvm::TargetMachine> AMDGPUGetTargetMachine(
|
||||
llvm::Triple target_triple, int amdgpu_version,
|
||||
const HloModuleConfig& hlo_module_config) {
|
||||
return GetTargetMachine(target_triple, absl::StrCat("gfx", amdgpu_version),
|
||||
hlo_module_config, "-code-object-v3");
|
||||
hlo_module_config, "+code-object-v3");
|
||||
}
|
||||
|
||||
void AMDGPUBackendInit(const HloModuleConfig& hlo_module_config) {
|
||||
|
@ -132,6 +132,11 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) {
|
||||
VLOG(3) << "Unloading HSACO module " << module;
|
||||
GpuDriver::UnloadModule(context_, module);
|
||||
gpu_binary_to_module_.erase(module_it);
|
||||
const char* mem_it = nullptr;
|
||||
for (auto x : in_memory_modules_) {
|
||||
if (x.second == module) mem_it = x.first;
|
||||
}
|
||||
if (mem_it != nullptr) in_memory_modules_.erase(mem_it);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -179,7 +179,7 @@ def InvokeHipcc(argv, log=False):
|
||||
# Also we need to retain warning about uninitialised shared variable as
|
||||
# warning only, even when -Werror option is specified.
|
||||
if HIPCC_IS_HIPCLANG:
|
||||
hipccopts += ' --include=hip/hip_runtime.h -Wno-error=cuda-shared-init '
|
||||
hipccopts += ' --include=hip/hip_runtime.h '
|
||||
hipccopts += ' ' + hipcc_compiler_options
|
||||
# Use -fno-gpu-rdc by default for early GPU kernel finalization
|
||||
# This flag would trigger GPU kernels be generated at compile time, instead
|
||||
@ -258,6 +258,8 @@ def main():
|
||||
gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
|
||||
if HIPCC_IS_HIPCLANG:
|
||||
gpu_linker_flags.append("-lrt")
|
||||
|
||||
if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags))
|
||||
return subprocess.call([CPU_COMPILER] + gpu_linker_flags)
|
||||
|
20
third_party/gpus/cuda_configure.bzl
vendored
20
third_party/gpus/cuda_configure.bzl
vendored
@ -809,23 +809,35 @@ def make_copy_files_rule(repository_ctx, name, srcs, outs):
|
||||
cmd = \"""%s \""",
|
||||
)""" % (name, "\n".join(outs), " && \\\n".join(cmds))
|
||||
|
||||
def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir):
|
||||
"""Returns a rule to recursively copy a directory."""
|
||||
def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions = None):
|
||||
"""Returns a rule to recursively copy a directory.
|
||||
If exceptions is not None, it must be a list of files or directories in
|
||||
'src_dir'; these will be excluded from copying.
|
||||
"""
|
||||
src_dir = _norm_path(src_dir)
|
||||
out_dir = _norm_path(out_dir)
|
||||
outs = read_dir(repository_ctx, src_dir)
|
||||
post_cmd = ""
|
||||
if exceptions != None:
|
||||
outs = [x for x in outs if not any([
|
||||
x.startswith(src_dir + "/" + y)
|
||||
for y in exceptions
|
||||
])]
|
||||
outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs]
|
||||
|
||||
# '@D' already contains the relative path for a single file, see
|
||||
# http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables
|
||||
out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)"
|
||||
if exceptions != None:
|
||||
for x in exceptions:
|
||||
post_cmd += " ; rm -fR " + out_dir + "/" + x
|
||||
return """genrule(
|
||||
name = "%s",
|
||||
outs = [
|
||||
%s
|
||||
],
|
||||
cmd = \"""cp -rLf "%s/." "%s/" \""",
|
||||
)""" % (name, "\n".join(outs), src_dir, out_dir)
|
||||
cmd = \"""cp -rLf "%s/." "%s/" %s\""",
|
||||
)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd)
|
||||
|
||||
def _flag_enabled(repository_ctx, flag_name):
|
||||
return get_host_environ(repository_ctx, flag_name) == "1"
|
||||
|
1
third_party/gpus/rocm_configure.bzl
vendored
1
third_party/gpus/rocm_configure.bzl
vendored
@ -615,6 +615,7 @@ def _create_local_rocm_repository(repository_ctx):
|
||||
name = "rocm-include",
|
||||
src_dir = rocm_toolkit_path + "/include",
|
||||
out_dir = "rocm/include",
|
||||
exceptions = ["gtest", "gmock"],
|
||||
),
|
||||
make_copy_dir_rule(
|
||||
repository_ctx,
|
||||
|
Loading…
Reference in New Issue
Block a user