Provide NVIDIA CUDA build data in metadata and API

This change: First exposes //third_party/gpus:find_cuda_config as a library. Then, it extends gen_build_info.py with find_cuda_config to provide package build information within TensorFlow's API. This is accessible as a dictionary: from tensorflow.python.platform import build_info print(build_info.cuda_build_info) {'cuda_version': '10.2', 'cudnn_version': '7', 'tensorrt_version': None, 'nccl_version': None} Finally, setup.py pulls that into package metadata. The same wheel's long description ends with: TensorFlow 2.1.0 for NVIDIA GPUs was built with these platform and library versions: - NVIDIA CUDA 10.2 - NVIDIA cuDNN 7 - NVIDIA NCCL not enabled - NVIDIA TensorRT not enabled In lieu of NVIDIA CUDA classifiers [1], the same metadata is exposed in the normally-unused "platform" tag: >>> import pkginfo >>> a = pkginfo.Wheel('./tf_nightly_gpu-2.1.0-cp36-cp36m-linux_x86_64.whl') >>> a.platforms ['cuda_version:10.2', 'cudnn_version:7', 'tensorrt_version:None', 'nccl_version:None'] I'm not 100% confident this is the best way to accomplish this. It seems odd to import like this setup.py, even though it works, even in an environment with TensorFlow installed. One caveat for RBE: the contents of genrules still run on the local system, so I had to syncronize my local environment with the RBE environment I used to build TensorFlow. I'm not sure if this is going to require intervention on TensorFlow's current CI. Currently tested only on Linux GPU (Remote Build) for Python 3.6. I'd like to see more tests before merging. [1]: (https://github.com/pypa/trove-classifiers/issues/25),
2020-04-27 16:50:08 -07:00 · 2020-04-27 16:50:08 -07:00 · 2c71fe1ff3
commit 2c71fe1ff3
parent 06473adb2d
4 changed files with 77 additions and 25 deletions
--- a/tensorflow/tools/build_info/BUILD
+++ b/tensorflow/tools/build_info/BUILD
@ -15,5 +15,6 @@ py_binary(
    tags = ["no-remote-exec"],
    deps = [
        "@six_archive//:six",
+        "//third_party/gpus:find_cuda_config",
    ],
 )
--- a/tensorflow/tools/build_info/gen_build_info.py
+++ b/tensorflow/tools/build_info/gen_build_info.py
@ -1,4 +1,4 @@
-# Lint as: python2, python3
+# Lint as: python3
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -19,9 +19,14 @@ from __future__ import division
 from __future__ import print_function

 import argparse
+import os
+import platform
+import sys

 import six

+from third_party.gpus import find_cuda_config
+

 def write_build_info(filename, is_config_cuda, is_config_rocm, key_value_list):
  """Writes a Python that describes the build.
@ -61,7 +66,31 @@ def write_build_info(filename, is_config_cuda, is_config_rocm, key_value_list):
      key_value_pair_stmts.append("%s = %r" % (key, value))
  key_value_pair_content = "\n".join(key_value_pair_stmts)

-  contents = """
+  # Generate cuda_build_info, a dict describing the CUDA component versions
+  # used to build TensorFlow.
+  cuda_build_info = "{}"
+  if is_config_cuda == "True":
+    libs = ["_", "cuda", "cudnn"]
+    if platform.system() == "Linux":
+      if os.environ.get("TF_NEED_TENSORRT", "0") == "1":
+        libs.append("tensorrt")
+      if "TF_NCCL_VERSION" in os.environ:
+        libs.append("nccl")
+    # find_cuda_config accepts libraries to inspect as argv from the command
+    # line. We can work around this restriction by setting argv manually
+    # before calling find_cuda_config.
+    backup_argv = sys.argv
+    sys.argv = libs
+    cuda = find_cuda_config.find_cuda_config()
+    cuda_build_info = str({
+        "cuda_version": cuda["cuda_version"],
+        "cudnn_version": cuda["cudnn_version"],
+        "tensorrt_version": cuda.get("tensorrt_version", None),
+        "nccl_version": cuda.get("nccl_version", None),
+    })
+    sys.argv = backup_argv
+
+  contents = f"""
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -76,17 +105,16 @@ def write_build_info(filename, is_config_cuda, is_config_rocm, key_value_list):
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-%s
+{module_docstring}
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-is_rocm_build = %s
-is_cuda_build = %s
+is_rocm_build = {build_config_rocm_bool}
+is_cuda_build = {build_config_cuda_bool}
+cuda_build_info = {cuda_build_info}

-%s
-""" % (module_docstring, build_config_rocm_bool, build_config_cuda_bool,
-       key_value_pair_content)
+"""
  open(filename, "w").write(contents)


--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@ -1,3 +1,4 @@
+# lint as: python3
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@ -43,6 +44,8 @@ from setuptools import setup
 from setuptools.command.install import install as InstallCommandBase
 from setuptools.dist import Distribution

+from tensorflow.python.platform import build_info
+
 DOCLINES = __doc__.split('\n')

 # This version string is semver compatible, but incompatible with pip.
@ -82,6 +85,20 @@ REQUIRED_PACKAGES = [
    'scipy == 1.2.2;python_version<"3"',
 ]

+GPU_DESCRIPTION = ''
+if build_info.is_cuda_build:
+  gpu_header = (f'\nTensorFlow {_VERSION} for NVIDIA GPUs was built with these '
+                'platform and library versions:\n\n  - ')
+  cbi = build_info.cuda_build_info
+  trt_ver = cbi['tensorrt_version']
+  nccl_ver = cbi['nccl_version']
+  GPU_DESCRIPTION = gpu_header + '\n  - '.join([
+      'NVIDIA CUDA ' + cbi['cuda_version'],
+      'NVIDIA cuDNN ' + cbi['cudnn_version'],
+      'NVIDIA NCCL ' + 'not enabled' if not nccl_ver else nccl_ver,
+      'NVIDIA TensorRT ' + 'not enabled' if not trt_ver else trt_ver,
+  ])
+
 if sys.byteorder == 'little':
  # grpcio does not build correctly on big-endian machines due to lack of
  # BoringSSL support.
@ -117,7 +134,8 @@ CONSOLE_SCRIPTS = [
    # even though the command is not removed, just moved to a different wheel.
    'tensorboard = tensorboard.main:run_main',
    'tf_upgrade_v2 = tensorflow.tools.compatibility.tf_upgrade_v2_main:main',
-    'estimator_ckpt_converter = tensorflow_estimator.python.estimator.tools.checkpoint_converter:main',
+    'estimator_ckpt_converter = '
+    'tensorflow_estimator.python.estimator.tools.checkpoint_converter:main',
 ]
 # pylint: enable=line-too-long

@ -161,11 +179,10 @@ class InstallHeaders(Command):
  """
  description = 'install C/C++ header files'

-  user_options = [('install-dir=', 'd',
-                   'directory to install header files to'),
-                  ('force', 'f',
-                   'force installation (overwrite existing files)'),
-                 ]
+  user_options = [
+      ('install-dir=', 'd', 'directory to install header files to'),
+      ('force', 'f', 'force installation (overwrite existing files)'),
+  ]

  boolean_options = ['force']

@ -175,8 +192,7 @@ class InstallHeaders(Command):
    self.outfiles = []

  def finalize_options(self):
-    self.set_undefined_options('install',
-                               ('install_headers', 'install_dir'),
+    self.set_undefined_options('install', ('install_headers', 'install_dir'),
                               ('force', 'force'))

  def mkdir_and_copy_file(self, header):
@ -236,9 +252,7 @@ so_lib_paths = [

 matches = []
 for path in so_lib_paths:
-  matches.extend(
-      ['../' + x for x in find_files('*', path) if '.py' not in x]
-  )
+  matches.extend(['../' + x for x in find_files('*', path) if '.py' not in x])

 if os.name == 'nt':
  EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.pyd'
@ -257,17 +271,16 @@ headers = (
    list(find_files('*.h', 'tensorflow/stream_executor')) +
    list(find_files('*.h', 'google/com_google_protobuf/src')) +
    list(find_files('*.inc', 'google/com_google_protobuf/src')) +
-    list(find_files('*', 'third_party/eigen3')) + list(
-        find_files('*.h', 'tensorflow/include/external/com_google_absl')) +
-    list(
-        find_files('*.inc', 'tensorflow/include/external/com_google_absl'))
-    + list(find_files('*', 'tensorflow/include/external/eigen_archive')))
+    list(find_files('*', 'third_party/eigen3')) +
+    list(find_files('*.h', 'tensorflow/include/external/com_google_absl')) +
+    list(find_files('*.inc', 'tensorflow/include/external/com_google_absl')) +
+    list(find_files('*', 'tensorflow/include/external/eigen_archive')))

 setup(
    name=project_name,
    version=_VERSION.replace('-', ''),
    description=DOCLINES[0],
-    long_description='\n'.join(DOCLINES[2:]),
+    long_description='\n'.join(DOCLINES[2:]) + GPU_DESCRIPTION,
    url='https://www.tensorflow.org/',
    download_url='https://github.com/tensorflow/tensorflow/tags',
    author='Google Inc.',
@ -288,6 +301,10 @@ setup(
        ] + matches,
    },
    zip_safe=False,
+    # Accessible with importlib.metadata.metadata('tf-pkg-name').items()
+    platforms=[
+        f'{key}:{value}' for key, value in build_info.cuda_build_info.items()
+    ],
    distclass=BinaryDistribution,
    cmdclass={
        'install_headers': InstallHeaders,
--- a/third_party/gpus/BUILD
+++ b/third_party/gpus/BUILD
@ -0,0 +1,6 @@
+# Expose find_cuda_config.py as a library so other tools can reference it.
+py_library(
+    name = "find_cuda_config",
+    srcs = ["find_cuda_config.py"],
+    visibility = ["//visibility:public"],
+)