From 65773fd394162eaada4b200d6bc3d4c2058e17e1 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Tue, 12 May 2020 01:34:43 -0700 Subject: [PATCH] Retrieve CUDA targets to build in nvcc wrapper from clang command line option. Hard-coding it (through a repo rule) in one place is bad enough. The proper solution would be to make CUDA targets a bazel 'feature' and map it to compiler flags in crosstools. The more pressing requirement though is to allow compiling a mix of SASS and PTX binaries, instead of SASS+PTX for every CUDA target. PiperOrigin-RevId: 311081931 Change-Id: If6aea7bfa08e21984471ce3593e0df3ac2c21798 --- .../crosstool_wrapper_driver_is_not_gcc.tpl | 36 ++++++++----------- .../windows/msvc_wrapper_for_nvcc.py.tpl | 32 +++++++---------- third_party/gpus/cuda_configure.bzl | 8 +---- 3 files changed, 29 insertions(+), 47 deletions(-) diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl index f3b2ae6846d..303339e77f7 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -53,13 +53,6 @@ NVCC_PATH = '%{nvcc_path}' PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) NVCC_VERSION = '%{cuda_version}' - -# TODO(amitpatankar): Benchmark enabling all capabilities by default. -# Environment variable for supported TF CUDA Compute Capabilities -# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 -CUDA_COMPUTE_ENV_VAR = 'TF_CUDA_COMPUTE_CAPABILITIES' -DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0' - def Log(s): print('gpus/crosstool: {0}'.format(s)) @@ -78,7 +71,8 @@ def GetOptionValue(argv, option): """ parser = ArgumentParser() - parser.add_argument('-' + option, nargs='*', action='append') + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-').replace('-', '_') args, _ = parser.parse_known_args(argv) if not args or not vars(args)[option]: return [] @@ -180,17 +174,17 @@ def InvokeNvcc(argv, log=False): host_compiler_options = GetHostCompilerOptions(argv) nvcc_compiler_options = GetNvccOptions(argv) - opt_option = GetOptionValue(argv, 'O') - m_options = GetOptionValue(argv, 'm') + opt_option = GetOptionValue(argv, '-O') + m_options = GetOptionValue(argv, '-m') m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) - include_options = GetOptionValue(argv, 'I') - out_file = GetOptionValue(argv, 'o') - depfiles = GetOptionValue(argv, 'MF') - defines = GetOptionValue(argv, 'D') + include_options = GetOptionValue(argv, '-I') + out_file = GetOptionValue(argv, '-o') + depfiles = GetOptionValue(argv, '-MF') + defines = GetOptionValue(argv, '-D') defines = ''.join([' -D' + define for define in defines]) - undefines = GetOptionValue(argv, 'U') + undefines = GetOptionValue(argv, '-U') undefines = ''.join([' -U' + define for define in undefines]) - std_options = GetOptionValue(argv, 'std') + std_options = GetOptionValue(argv, '-std') # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang. nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] std_options = ''.join([' -std=' + define @@ -198,7 +192,7 @@ def InvokeNvcc(argv, log=False): # The list of source files get passed after the -c option. I don't know of # any other reliable way to just get the list of source files to be compiled. - src_files = GetOptionValue(argv, 'c') + src_files = GetOptionValue(argv, '-c') # Pass -w through from host to nvcc, but don't do anything fancier with # warnings-related flags, since they're not necessarily the same across @@ -224,13 +218,12 @@ def InvokeNvcc(argv, log=False): srcs = ' '.join(src_files) out = ' -o ' + out_file[0] - supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ] nvccopts = '-D_FORCE_INLINES ' - for capability in supported_cuda_compute_capabilities: - capability = capability.replace('.', '') + for capability in GetOptionValue(argv, "--cuda-gpu-arch"): + capability = capability[len('sm_'):] nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % ( capability, capability, capability) - nvccopts += ' ' + nvcc_compiler_options + nvccopts += nvcc_compiler_options nvccopts += undefines nvccopts += defines nvccopts += std_options @@ -272,6 +265,7 @@ def main(): if args.x and args.x[0] == 'cuda': if args.cuda_log: Log('-x cuda') leftover = [pipes.quote(s) for s in leftover] + args.cuda_log = True if args.cuda_log: Log('using nvcc') return InvokeNvcc(leftover, log=args.cuda_log) diff --git a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl index 46e8aef3606..c10fb826494 100644 --- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl +++ b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl @@ -37,13 +37,6 @@ GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') NVCC_PATH = '%{nvcc_path}' NVCC_VERSION = '%{cuda_version}' NVCC_TEMP_DIR = "%{nvcc_tmp_dir}" -DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,6.0' - -# Taken from environment variable for supported TF CUDA Compute Capabilities -# eg. export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 -supported_cuda_compute_capabilities = os.environ.get( - 'TF_CUDA_COMPUTE_CAPABILITIES', - DEFAULT_CUDA_COMPUTE_CAPABILITIES).split(',') def Log(s): print('gpus/crosstool: {0}'.format(s)) @@ -53,7 +46,7 @@ def GetOptionValue(argv, option): """Extract the list of values for option from options. Args: - option: The option whose value to extract, without the leading '/'. + option: The option whose value to extract. Returns: 1. A list of values, either directly following the option, @@ -62,10 +55,11 @@ def GetOptionValue(argv, option): 2. The leftover options. """ - parser = ArgumentParser(prefix_chars='/') - parser.add_argument('/' + option, nargs='*', action='append') + parser = ArgumentParser(prefix_chars='-/') + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-/').replace('-', '_') args, leftover = parser.parse_known_args(argv) - if args and vars(args)[option]: + if args and vars(args).get(option): return (sum(vars(args)[option], []), leftover) return ([], leftover) @@ -122,18 +116,18 @@ def InvokeNvcc(argv, log=False): nvcc_compiler_options, argv = GetNvccOptions(argv) - opt_option, argv = GetOptionValue(argv, 'O') + opt_option, argv = GetOptionValue(argv, '/O') opt = ['-g'] if (len(opt_option) > 0 and opt_option[0] != 'd'): opt = ['-O2'] - include_options, argv = GetOptionValue(argv, 'I') + include_options, argv = GetOptionValue(argv, '/I') includes = ["-I " + include for include in include_options] - defines, argv = GetOptionValue(argv, 'D') + defines, argv = GetOptionValue(argv, '/D') defines = ['-D' + define for define in defines] - undefines, argv = GetOptionValue(argv, 'U') + undefines, argv = GetOptionValue(argv, '/U') undefines = ['-U' + define for define in undefines] # The rest of the unrecognized options should be passed to host compiler @@ -142,10 +136,10 @@ def InvokeNvcc(argv, log=False): m_options = ["-m64"] nvccopts = ['-D_FORCE_INLINES'] - for capability in supported_cuda_compute_capabilities: - capability = capability.replace('.', '') - nvccopts += [r'-gencode=arch=compute_%s,"code=sm_%s,compute_%s"' % ( - capability, capability, capability)] + for capability in GetOptionValue(argv, "--cuda-gpu-arch"): + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % ( + capability, capability, capability) nvccopts += nvcc_compiler_options nvccopts += undefines nvccopts += defines diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 545aeebe97a..c587f117deb 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -840,10 +840,7 @@ def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): "--cuda-gpu-arch=sm_" + cap.replace(".", "") for cap in compute_capabilities ] - - # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc - # TODO(csigg): Make this consistent with cuda clang and pass unconditionally. - return "if_cuda_clang(%s)" % str(capability_flags) + return str(capability_flags) def _tpl_path(repository_ctx, filename): return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename)) @@ -1092,9 +1089,6 @@ def _create_local_cuda_repository(repository_ctx): "%{cuda_version}": cuda_config.cuda_version, "%{nvcc_path}": nvcc_path, "%{gcc_host_compiler_path}": str(cc), - "%{cuda_compute_capabilities}": ", ".join( - ["\"%s\"" % c for c in cuda_config.compute_capabilities], - ), "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx), } repository_ctx.template(