Compress CUDA kernel binaries (CUBINs).
Impact: -32MB wheel size -600MB _pywrap_tensorflow_internal.so -70MB memory during startup +120ms startup time PiperOrigin-RevId: 315419813 Change-Id: I2c39a88d95a4aa3a692560c8e3d78b125e8445c9
This commit is contained in:
parent
7b0071d954
commit
0266394d50
|
@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
|
|||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
option: The option whose value to extract, without the leading '-'.
|
||||
option: The option whose value to extract, with the leading '-'.
|
||||
|
||||
Returns:
|
||||
A list of values, either directly following the option,
|
||||
|
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
|
|||
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
||||
std_options = ''.join([' -std=' + define
|
||||
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
||||
fatbin_options = ''.join([' --fatbin-options=' + option
|
||||
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
|
||||
|
||||
# The list of source files get passed after the -c option. I don't know of
|
||||
# any other reliable way to just get the list of source files to be compiled.
|
||||
|
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
|
|||
nvccopts += std_options
|
||||
nvccopts += m_options
|
||||
nvccopts += warning_options
|
||||
nvccopts += fatbin_options
|
||||
|
||||
if depfiles:
|
||||
# Generate the dependency file
|
||||
|
|
|
@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
|
|||
undefines, argv = GetOptionValue(argv, '/U')
|
||||
undefines = ['-U' + define for define in undefines]
|
||||
|
||||
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
|
||||
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
|
||||
|
||||
# The rest of the unrecognized options should be passed to host compiler
|
||||
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
||||
|
||||
|
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
|
|||
nvccopts += undefines
|
||||
nvccopts += defines
|
||||
nvccopts += m_options
|
||||
nvccopts += fatbin_options
|
||||
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
||||
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
||||
# Specify a unique temp directory for nvcc to generate intermediate files,
|
||||
|
|
|
@ -40,12 +40,15 @@ def if_cuda_clang_opt(if_true, if_false = []):
|
|||
|
||||
def cuda_default_copts():
|
||||
"""Default options for all CUDA compilations."""
|
||||
return if_cuda(
|
||||
["-x", "cuda", "-DGOOGLE_CUDA=1"]
|
||||
) + if_cuda_clang_opt(
|
||||
return if_cuda([
|
||||
"-x", "cuda",
|
||||
"-DGOOGLE_CUDA=1",
|
||||
"-Xcuda-fatbinary=--compress-all",
|
||||
"--no-cuda-include-ptx=all"
|
||||
] + %{cuda_extra_copts}) + if_cuda_clang_opt(
|
||||
# Some important CUDA optimizations are only enabled at O3.
|
||||
["-O3"]
|
||||
) + %{cuda_extra_copts}
|
||||
)
|
||||
|
||||
def cuda_is_configured():
|
||||
"""Returns true if CUDA was enabled during the configure process."""
|
||||
|
|
|
@ -905,14 +905,14 @@ def _tf_sysroot(repository_ctx):
|
|||
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
|
||||
|
||||
def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
|
||||
capability_flags = ["--no-cuda-include-ptx=all"]
|
||||
copts = []
|
||||
for capability in compute_capabilities:
|
||||
if capability.startswith("compute_"):
|
||||
capability = capability.replace("compute_", "sm_")
|
||||
capability_flags.append("--cuda-include-ptx=%s" % capability)
|
||||
capability_flags.append("--cuda-gpu-arch=%s" % capability)
|
||||
copts.append("--cuda-include-ptx=%s" % capability)
|
||||
copts.append("--cuda-gpu-arch=%s" % capability)
|
||||
|
||||
return str(capability_flags)
|
||||
return str(copts)
|
||||
|
||||
def _tpl_path(repository_ctx, filename):
|
||||
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
||||
|
|
|
@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
|
|||
undefines, argv = GetOptionValue(argv, '/U')
|
||||
undefines = ['-U' + define for define in undefines]
|
||||
|
||||
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
|
||||
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
|
||||
|
||||
# The rest of the unrecognized options should be passed to host compiler
|
||||
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
||||
|
||||
|
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
|
|||
nvccopts += undefines
|
||||
nvccopts += defines
|
||||
nvccopts += m_options
|
||||
nvccopts += fatbin_options
|
||||
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
||||
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
||||
# Specify a unique temp directory for nvcc to generate intermediate files,
|
||||
|
|
|
@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
|
|||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
option: The option whose value to extract, without the leading '-'.
|
||||
option: The option whose value to extract, with the leading '-'.
|
||||
|
||||
Returns:
|
||||
A list of values, either directly following the option,
|
||||
|
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
|
|||
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
||||
std_options = ''.join([' -std=' + define
|
||||
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
||||
fatbin_options = ''.join([' --fatbin-options=' + option
|
||||
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
|
||||
|
||||
# The list of source files get passed after the -c option. I don't know of
|
||||
# any other reliable way to just get the list of source files to be compiled.
|
||||
|
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
|
|||
nvccopts += std_options
|
||||
nvccopts += m_options
|
||||
nvccopts += warning_options
|
||||
nvccopts += fatbin_options
|
||||
|
||||
if depfiles:
|
||||
# Generate the dependency file
|
||||
|
|
Loading…
Reference in New Issue