Compress CUDA kernel binaries (CUBINs).
Impact: -32MB wheel size -600MB _pywrap_tensorflow_internal.so -70MB memory during startup +120ms startup time PiperOrigin-RevId: 315419813 Change-Id: I2c39a88d95a4aa3a692560c8e3d78b125e8445c9
This commit is contained in:
parent
7b0071d954
commit
0266394d50
|
@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
argv: A list of strings, possibly the argv passed to main().
|
argv: A list of strings, possibly the argv passed to main().
|
||||||
option: The option whose value to extract, without the leading '-'.
|
option: The option whose value to extract, with the leading '-'.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of values, either directly following the option,
|
A list of values, either directly following the option,
|
||||||
|
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
||||||
std_options = ''.join([' -std=' + define
|
std_options = ''.join([' -std=' + define
|
||||||
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
||||||
|
fatbin_options = ''.join([' --fatbin-options=' + option
|
||||||
|
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
|
||||||
|
|
||||||
# The list of source files get passed after the -c option. I don't know of
|
# The list of source files get passed after the -c option. I don't know of
|
||||||
# any other reliable way to just get the list of source files to be compiled.
|
# any other reliable way to just get the list of source files to be compiled.
|
||||||
|
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvccopts += std_options
|
nvccopts += std_options
|
||||||
nvccopts += m_options
|
nvccopts += m_options
|
||||||
nvccopts += warning_options
|
nvccopts += warning_options
|
||||||
|
nvccopts += fatbin_options
|
||||||
|
|
||||||
if depfiles:
|
if depfiles:
|
||||||
# Generate the dependency file
|
# Generate the dependency file
|
||||||
|
|
|
@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
|
||||||
undefines, argv = GetOptionValue(argv, '/U')
|
undefines, argv = GetOptionValue(argv, '/U')
|
||||||
undefines = ['-U' + define for define in undefines]
|
undefines = ['-U' + define for define in undefines]
|
||||||
|
|
||||||
|
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
|
||||||
|
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
|
||||||
|
|
||||||
# The rest of the unrecognized options should be passed to host compiler
|
# The rest of the unrecognized options should be passed to host compiler
|
||||||
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
||||||
|
|
||||||
|
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvccopts += undefines
|
nvccopts += undefines
|
||||||
nvccopts += defines
|
nvccopts += defines
|
||||||
nvccopts += m_options
|
nvccopts += m_options
|
||||||
|
nvccopts += fatbin_options
|
||||||
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
||||||
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
||||||
# Specify a unique temp directory for nvcc to generate intermediate files,
|
# Specify a unique temp directory for nvcc to generate intermediate files,
|
||||||
|
|
|
@ -40,12 +40,15 @@ def if_cuda_clang_opt(if_true, if_false = []):
|
||||||
|
|
||||||
def cuda_default_copts():
|
def cuda_default_copts():
|
||||||
"""Default options for all CUDA compilations."""
|
"""Default options for all CUDA compilations."""
|
||||||
return if_cuda(
|
return if_cuda([
|
||||||
["-x", "cuda", "-DGOOGLE_CUDA=1"]
|
"-x", "cuda",
|
||||||
) + if_cuda_clang_opt(
|
"-DGOOGLE_CUDA=1",
|
||||||
|
"-Xcuda-fatbinary=--compress-all",
|
||||||
|
"--no-cuda-include-ptx=all"
|
||||||
|
] + %{cuda_extra_copts}) + if_cuda_clang_opt(
|
||||||
# Some important CUDA optimizations are only enabled at O3.
|
# Some important CUDA optimizations are only enabled at O3.
|
||||||
["-O3"]
|
["-O3"]
|
||||||
) + %{cuda_extra_copts}
|
)
|
||||||
|
|
||||||
def cuda_is_configured():
|
def cuda_is_configured():
|
||||||
"""Returns true if CUDA was enabled during the configure process."""
|
"""Returns true if CUDA was enabled during the configure process."""
|
||||||
|
|
|
@ -905,14 +905,14 @@ def _tf_sysroot(repository_ctx):
|
||||||
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
|
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
|
||||||
|
|
||||||
def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
|
def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
|
||||||
capability_flags = ["--no-cuda-include-ptx=all"]
|
copts = []
|
||||||
for capability in compute_capabilities:
|
for capability in compute_capabilities:
|
||||||
if capability.startswith("compute_"):
|
if capability.startswith("compute_"):
|
||||||
capability = capability.replace("compute_", "sm_")
|
capability = capability.replace("compute_", "sm_")
|
||||||
capability_flags.append("--cuda-include-ptx=%s" % capability)
|
copts.append("--cuda-include-ptx=%s" % capability)
|
||||||
capability_flags.append("--cuda-gpu-arch=%s" % capability)
|
copts.append("--cuda-gpu-arch=%s" % capability)
|
||||||
|
|
||||||
return str(capability_flags)
|
return str(copts)
|
||||||
|
|
||||||
def _tpl_path(repository_ctx, filename):
|
def _tpl_path(repository_ctx, filename):
|
||||||
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
||||||
|
|
|
@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False):
|
||||||
undefines, argv = GetOptionValue(argv, '/U')
|
undefines, argv = GetOptionValue(argv, '/U')
|
||||||
undefines = ['-U' + define for define in undefines]
|
undefines = ['-U' + define for define in undefines]
|
||||||
|
|
||||||
|
fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary')
|
||||||
|
fatbin_options = ['--fatbin-options=' + option for option in fatbin_options]
|
||||||
|
|
||||||
# The rest of the unrecognized options should be passed to host compiler
|
# The rest of the unrecognized options should be passed to host compiler
|
||||||
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
host_compiler_options = [option for option in argv if option not in (src_files + out_file)]
|
||||||
|
|
||||||
|
@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvccopts += undefines
|
nvccopts += undefines
|
||||||
nvccopts += defines
|
nvccopts += defines
|
||||||
nvccopts += m_options
|
nvccopts += m_options
|
||||||
|
nvccopts += fatbin_options
|
||||||
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"']
|
||||||
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files
|
||||||
# Specify a unique temp directory for nvcc to generate intermediate files,
|
# Specify a unique temp directory for nvcc to generate intermediate files,
|
||||||
|
|
|
@ -62,7 +62,7 @@ def GetOptionValue(argv, option):
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
argv: A list of strings, possibly the argv passed to main().
|
argv: A list of strings, possibly the argv passed to main().
|
||||||
option: The option whose value to extract, without the leading '-'.
|
option: The option whose value to extract, with the leading '-'.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of values, either directly following the option,
|
A list of values, either directly following the option,
|
||||||
|
@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
|
||||||
std_options = ''.join([' -std=' + define
|
std_options = ''.join([' -std=' + define
|
||||||
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
for define in std_options if define in nvcc_allowed_std_options][-1:])
|
||||||
|
fatbin_options = ''.join([' --fatbin-options=' + option
|
||||||
|
for option in GetOptionValue(argv, '-Xcuda-fatbinary')])
|
||||||
|
|
||||||
# The list of source files get passed after the -c option. I don't know of
|
# The list of source files get passed after the -c option. I don't know of
|
||||||
# any other reliable way to just get the list of source files to be compiled.
|
# any other reliable way to just get the list of source files to be compiled.
|
||||||
|
@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False):
|
||||||
nvccopts += std_options
|
nvccopts += std_options
|
||||||
nvccopts += m_options
|
nvccopts += m_options
|
||||||
nvccopts += warning_options
|
nvccopts += warning_options
|
||||||
|
nvccopts += fatbin_options
|
||||||
|
|
||||||
if depfiles:
|
if depfiles:
|
||||||
# Generate the dependency file
|
# Generate the dependency file
|
||||||
|
|
Loading…
Reference in New Issue