Compress CUDA kernel binaries (CUBINs).

Impact:
 -32MB wheel size
-600MB _pywrap_tensorflow_internal.so
 -70MB memory during startup
+120ms startup time
PiperOrigin-RevId: 315419813
Change-Id: I2c39a88d95a4aa3a692560c8e3d78b125e8445c9
This commit is contained in:
Christian Sigg 2020-06-08 22:16:33 -07:00 committed by TensorFlower Gardener
parent 7b0071d954
commit 0266394d50
6 changed files with 27 additions and 10 deletions

View File

@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
Args:
argv: A list of strings, possibly the argv passed to main().
option: The option whose value to extract, without the leading '-'.
option: The option whose value to extract, with the leading '-'.
Returns:
A list of values, either directly following the option,
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
std_options = ''.join([' -std=' + define
for define in std_options if define in nvcc_allowed_std_options][-1:])
fatbin_options = ''.join([' --fatbin-options=' + option
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
# The list of source files get passed after the -c option. I don't know of
# any other reliable way to just get the list of source files to be compiled.
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
nvccopts += std_options
nvccopts += m_options
nvccopts += warning_options
nvccopts += fatbin_options
if depfiles:
# Generate the dependency file

View File

@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
undefines, argv = GetOptionValue(argv, '/U')
undefines = ['-U' + define for define in undefines]
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
# The rest of the unrecognized options should be passed to host compiler
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
nvccopts += undefines
nvccopts += defines
nvccopts += m_options
nvccopts += fatbin_options
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
# Specify a unique temp directory for nvcc to generate intermediate files,

View File

@ -40,12 +40,15 @@ def if_cuda_clang_opt(if_true, if_false = []):
def cuda_default_copts():
"""Default options for all CUDA compilations."""
return if_cuda(
["-x", "cuda", "-DGOOGLE_CUDA=1"]
) + if_cuda_clang_opt(
return if_cuda([
"-x", "cuda",
"-DGOOGLE_CUDA=1",
"-Xcuda-fatbinary=--compress-all",
"--no-cuda-include-ptx=all"
] + %{cuda_extra_copts}) + if_cuda_clang_opt(
# Some important CUDA optimizations are only enabled at O3.
["-O3"]
) + %{cuda_extra_copts}
)
def cuda_is_configured():
"""Returns true if CUDA was enabled during the configure process."""

View File

@ -905,14 +905,14 @@ def _tf_sysroot(repository_ctx):
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
capability_flags = ["--no-cuda-include-ptx=all"]
copts = []
for capability in compute_capabilities:
if capability.startswith("compute_"):
capability = capability.replace("compute_", "sm_")
capability_flags.append("--cuda-include-ptx=%s" % capability)
capability_flags.append("--cuda-gpu-arch=%s" % capability)
copts.append("--cuda-include-ptx=%s" % capability)
copts.append("--cuda-gpu-arch=%s" % capability)
return str(capability_flags)
return str(copts)
def _tpl_path(repository_ctx, filename):
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))

View File

@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
undefines, argv = GetOptionValue(argv, '/U')
undefines = ['-U' + define for define in undefines]
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
# The rest of the unrecognized options should be passed to host compiler
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
nvccopts += undefines
nvccopts += defines
nvccopts += m_options
nvccopts += fatbin_options
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
# Specify a unique temp directory for nvcc to generate intermediate files,

View File

@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
Args:
argv: A list of strings, possibly the argv passed to main().
option: The option whose value to extract, without the leading '-'.
option: The option whose value to extract, with the leading '-'.
Returns:
A list of values, either directly following the option,
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
std_options = ''.join([' -std=' + define
for define in std_options if define in nvcc_allowed_std_options][-1:])
fatbin_options = ''.join([' --fatbin-options=' + option
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
# The list of source files get passed after the -c option. I don't know of
# any other reliable way to just get the list of source files to be compiled.
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
nvccopts += std_options
nvccopts += m_options
nvccopts += warning_options
nvccopts += fatbin_options
if depfiles:
# Generate the dependency file