diff --git a/configure.py b/configure.py
index 2d919d8d5a2..c95b57fecd5 100644
--- a/configure.py
+++ b/configure.py
@@ -862,154 +862,41 @@ def reformat_version_sequence(version_str, sequence_count):
   return '.'.join(v[:sequence_count])
 
 
+def set_tf_cuda_paths(environ_cp):
+  """Set TF_CUDA_PATHS."""
+  ask_cuda_paths = (
+      'Please specify the comma-separated list of base paths to look for CUDA '
+      'libraries and headers. [Leave empty to use the default]: '
+  )
+  tf_cuda_paths = get_from_env_or_user_or_default(environ_cp, 'TF_CUDA_PATHS',
+                                                  ask_cuda_paths,
+                                                  '')
+  if tf_cuda_paths:
+    environ_cp['TF_CUDA_PATHS'] = tf_cuda_paths
+
+
 def set_tf_cuda_version(environ_cp):
-  """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION."""
+  """Set TF_CUDA_VERSION."""
   ask_cuda_version = (
       'Please specify the CUDA SDK version you want to use. '
       '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION
-
-  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
-    # Configure the Cuda SDK version to use.
-    tf_cuda_version = get_from_env_or_user_or_default(environ_cp,
-                                                      'TF_CUDA_VERSION',
-                                                      ask_cuda_version,
-                                                      _DEFAULT_CUDA_VERSION)
-    tf_cuda_version = reformat_version_sequence(str(tf_cuda_version), 2)
-
-    # Find out where the CUDA toolkit is installed
-    default_cuda_path = _DEFAULT_CUDA_PATH
-    if is_windows() or is_cygwin():
-      default_cuda_path = cygpath(
-          environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN))
-    elif is_linux():
-      # If the default doesn't exist, try an alternative default.
-      if (not os.path.exists(default_cuda_path)
-         ) and os.path.exists(_DEFAULT_CUDA_PATH_LINUX):
-        default_cuda_path = _DEFAULT_CUDA_PATH_LINUX
-    ask_cuda_path = ('Please specify the location where CUDA %s toolkit is'
-                     ' installed. Refer to README.md for more details. '
-                     '[Default is %s]: ') % (tf_cuda_version, default_cuda_path)
-    cuda_toolkit_path = get_from_env_or_user_or_default(environ_cp,
-                                                        'CUDA_TOOLKIT_PATH',
-                                                        ask_cuda_path,
-                                                        default_cuda_path)
-    if is_windows() or is_cygwin():
-      cuda_toolkit_path = cygpath(cuda_toolkit_path)
-
-    if is_windows():
-      cuda_rt_lib_paths = ['lib/x64/cudart.lib']
-    elif is_linux():
-      cuda_rt_lib_paths = [
-          '%s/libcudart.so.%s' % (x, tf_cuda_version) for x in [
-              'lib64',
-              'lib/powerpc64le-linux-gnu',
-              'lib/x86_64-linux-gnu',
-          ]
-      ]
-    elif is_macos():
-      cuda_rt_lib_paths = ['lib/libcudart.%s.dylib' % tf_cuda_version]
-
-    cuda_toolkit_paths_full = [
-        os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths
-    ]
-    if any(os.path.exists(x) for x in cuda_toolkit_paths_full):
-      break
-
-    # Reset and retry
-    print('Invalid path to CUDA %s toolkit. %s cannot be found' %
-          (tf_cuda_version, cuda_toolkit_paths_full))
-    environ_cp['TF_CUDA_VERSION'] = ''
-    environ_cp['CUDA_TOOLKIT_PATH'] = ''
-
-  else:
-    raise UserInputError('Invalid TF_CUDA_SETTING setting was provided %d '
-                         'times in a row. Assuming to be a scripting mistake.' %
-                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
-
-  # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION
-  environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path
-  write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path)
+  tf_cuda_version = get_from_env_or_user_or_default(environ_cp,
+                                                    'TF_CUDA_VERSION',
+                                                    ask_cuda_version,
+                                                    _DEFAULT_CUDA_VERSION)
   environ_cp['TF_CUDA_VERSION'] = tf_cuda_version
-  write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version)
 
 
 def set_tf_cudnn_version(environ_cp):
-  """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
+  """Set TF_CUDNN_VERSION."""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
       '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION
-
-  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
-    tf_cudnn_version = get_from_env_or_user_or_default(environ_cp,
-                                                       'TF_CUDNN_VERSION',
-                                                       ask_cudnn_version,
-                                                       _DEFAULT_CUDNN_VERSION)
-    tf_cudnn_version = reformat_version_sequence(str(tf_cudnn_version), 1)
-
-    default_cudnn_path = environ_cp.get('CUDA_TOOLKIT_PATH')
-    ask_cudnn_path = (r'Please specify the location where cuDNN %s library is '
-                      'installed. Refer to README.md for more details. [Default'
-                      ' is %s]: ') % (tf_cudnn_version, default_cudnn_path)
-    cudnn_install_path = get_from_env_or_user_or_default(
-        environ_cp, 'CUDNN_INSTALL_PATH', ask_cudnn_path, default_cudnn_path)
-
-    # Result returned from "read" will be used unexpanded. That make "~"
-    # unusable. Going through one more level of expansion to handle that.
-    cudnn_install_path = os.path.realpath(
-        os.path.expanduser(cudnn_install_path))
-    if is_windows() or is_cygwin():
-      cudnn_install_path = cygpath(cudnn_install_path)
-
-    if is_windows():
-      cuda_dnn_lib_path = 'lib/x64/cudnn.lib'
-      cuda_dnn_lib_alt_path = 'lib/x64/cudnn.lib'
-    elif is_linux():
-      cuda_dnn_lib_path = 'lib64/libcudnn.so.%s' % tf_cudnn_version
-      cuda_dnn_lib_alt_path = 'libcudnn.so.%s' % tf_cudnn_version
-    elif is_macos():
-      cuda_dnn_lib_path = 'lib/libcudnn.%s.dylib' % tf_cudnn_version
-      cuda_dnn_lib_alt_path = 'libcudnn.%s.dylib' % tf_cudnn_version
-
-    cuda_dnn_lib_path_full = os.path.join(cudnn_install_path, cuda_dnn_lib_path)
-    cuda_dnn_lib_alt_path_full = os.path.join(cudnn_install_path,
-                                              cuda_dnn_lib_alt_path)
-    if os.path.exists(cuda_dnn_lib_path_full) or os.path.exists(
-        cuda_dnn_lib_alt_path_full):
-      break
-
-    # Try another alternative for Linux
-    if is_linux():
-      ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
-      cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
-      cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)',
-                                           cudnn_path_from_ldconfig)
-      if cudnn_path_from_ldconfig:
-        cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1)
-        if os.path.exists('%s.%s' %
-                          (cudnn_path_from_ldconfig, tf_cudnn_version)):
-          cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig)
-          break
-
-    # Reset and Retry
-    print(
-        'Invalid path to cuDNN %s toolkit. None of the following files can be '
-        'found:' % tf_cudnn_version)
-    print(cuda_dnn_lib_path_full)
-    print(cuda_dnn_lib_alt_path_full)
-    if is_linux():
-      print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version))
-
-    environ_cp['TF_CUDNN_VERSION'] = ''
-  else:
-    raise UserInputError('Invalid TF_CUDNN setting was provided %d '
-                         'times in a row. Assuming to be a scripting mistake.' %
-                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
-
-  # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION
-  environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path
-  write_action_env_to_bazelrc('CUDNN_INSTALL_PATH', cudnn_install_path)
+  tf_cudnn_version = get_from_env_or_user_or_default(environ_cp,
+                                                     'TF_CUDNN_VERSION',
+                                                     ask_cudnn_version,
+                                                     _DEFAULT_CUDNN_VERSION)
   environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version
-  write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version)
 
 
 def is_cuda_compatible(lib, cuda_ver, cudnn_ver):
@@ -1041,253 +928,38 @@ def is_cuda_compatible(lib, cuda_ver, cudnn_ver):
   return cudnn_ok and cuda_ok
 
 
-def set_tf_tensorrt_install_path(environ_cp):
-  """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION.
-
-  Adapted from code contributed by Sami Kama (https://github.com/samikama).
-
-  Args:
-    environ_cp: copy of the os.environ.
-
-  Raises:
-    ValueError: if this method was called under non-Linux platform.
-    UserInputError: if user has provided invalid input multiple times.
-  """
+def set_tf_tensorrt_version(environ_cp):
+  """Set TF_TENSORRT_VERSION."""
   if not is_linux():
     raise ValueError('Currently TensorRT is only supported on Linux platform.')
 
-  # Ask user whether to add TensorRT support.
+  # Backwards compatibility: early-out when TF_NEED_TENSORRT=0.
   if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT',
                      False))) != '1':
     return
 
-  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
-    ask_tensorrt_path = (r'Please specify the location where TensorRT is '
-                         'installed. [Default is %s]:') % (
-                             _DEFAULT_TENSORRT_PATH_LINUX)
-    trt_install_path = get_from_env_or_user_or_default(
-        environ_cp, 'TENSORRT_INSTALL_PATH', ask_tensorrt_path,
-        _DEFAULT_TENSORRT_PATH_LINUX)
-
-    # Result returned from "read" will be used unexpanded. That make "~"
-    # unusable. Going through one more level of expansion to handle that.
-    trt_install_path = os.path.realpath(os.path.expanduser(trt_install_path))
-
-    def find_libs(search_path):
-      """Search for libnvinfer.so in "search_path"."""
-      fl = set()
-      if os.path.exists(search_path) and os.path.isdir(search_path):
-        fl.update([
-            os.path.realpath(os.path.join(search_path, x))
-            for x in os.listdir(search_path)
-            if 'libnvinfer.so' in x
-        ])
-      return fl
-
-    possible_files = find_libs(trt_install_path)
-    possible_files.update(find_libs(os.path.join(trt_install_path, 'lib')))
-    possible_files.update(find_libs(os.path.join(trt_install_path, 'lib64')))
-    cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION'])
-    cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION'])
-    nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$')
-    highest_ver = [0, None, None]
-
-    for lib_file in possible_files:
-      if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver):
-        matches = nvinfer_pattern.search(lib_file)
-        if not matches.groups():
-          continue
-        ver_str = matches.group(1)
-        ver = convert_version_to_int(ver_str) if len(ver_str) else 0
-        if ver > highest_ver[0]:
-          highest_ver = [ver, ver_str, lib_file]
-    if highest_ver[1] is not None:
-      trt_install_path = os.path.dirname(highest_ver[2])
-      tf_tensorrt_version = highest_ver[1]
-      break
-
-    # Try another alternative from ldconfig.
-    ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
-    ldconfig_output = run_shell([ldconfig_bin, '-p'])
-    search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)',
-                              ldconfig_output)
-    if search_result:
-      libnvinfer_path_from_ldconfig = search_result.group(2)
-      if os.path.exists(libnvinfer_path_from_ldconfig):
-        if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver,
-                              cudnn_ver):
-          trt_install_path = os.path.dirname(libnvinfer_path_from_ldconfig)
-          tf_tensorrt_version = search_result.group(1)
-          break
-
-    # Reset and Retry
-    if possible_files:
-      print('TensorRT libraries found in one the following directories',
-            'are not compatible with selected cuda and cudnn installations')
-      print(trt_install_path)
-      print(os.path.join(trt_install_path, 'lib'))
-      print(os.path.join(trt_install_path, 'lib64'))
-      if search_result:
-        print(libnvinfer_path_from_ldconfig)
-    else:
-      print(
-          'Invalid path to TensorRT. None of the following files can be found:')
-      print(trt_install_path)
-      print(os.path.join(trt_install_path, 'lib'))
-      print(os.path.join(trt_install_path, 'lib64'))
-      if search_result:
-        print(libnvinfer_path_from_ldconfig)
-
-  else:
-    raise UserInputError('Invalid TF_TENSORRT setting was provided %d '
-                         'times in a row. Assuming to be a scripting mistake.' %
-                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
-
-  # Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION
-  environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path
-  write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path)
+  ask_tensorrt_version = (
+      'Please specify the TensorRT version you want to use. '
+      '[Leave empty to not use a specific version]: ')
+  tf_tensorrt_version = get_from_env_or_user_or_default(environ_cp,
+                                                        'TF_TENSORRT_VERSION',
+                                                        ask_tensorrt_version,
+                                                        '')
   environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version
-  write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version)
 
 
-def set_tf_nccl_install_path(environ_cp):
-  """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION.
-
-  Args:
-    environ_cp: copy of the os.environ.
-
-  Raises:
-    ValueError: if this method was called under non-Linux platform.
-    UserInputError: if user has provided invalid input multiple times.
-  """
+def set_tf_nccl_version(environ_cp):
+  """Set TF_NCCL_VERSION."""
   if not is_linux():
-    raise ValueError('Currently NCCL is only supported on Linux platforms.')
+    raise ValueError('Currently NCCL is only supported on Linux platform.')
 
   ask_nccl_version = (
       'Please specify the locally installed NCCL version you want to use. '
-      '[Default is to use https://github.com/nvidia/nccl]: ')
-
-  for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
-    tf_nccl_version = get_from_env_or_user_or_default(environ_cp,
-                                                      'TF_NCCL_VERSION',
-                                                      ask_nccl_version, '')
-
-    if not tf_nccl_version:
-      break  # No need to get install path, building the open source code.
-
-    tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
-
-    # Look with ldconfig first if we can find the library in paths
-    # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
-    # include directory. This is where the NCCL .deb packages install them.
-
-    # First check to see if NCCL is in the ldconfig.
-    # If its found, use that location.
-    if is_linux():
-      ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
-      nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
-      nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)',
-                                           nccl2_path_from_ldconfig)
-    if nccl2_path_from_ldconfig:
-      nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1)
-      if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)):
-        nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig)
-        print('NCCL libraries found in ' + nccl2_path_from_ldconfig)
-
-        # Check if this is the main system lib location
-        if re.search('.*linux-gnu', nccl_install_path):
-          trunc_nccl_install_path = '/usr'
-          print('This looks like a system path.')
-        else:
-          trunc_nccl_install_path = nccl_install_path + '/..'
-
-        # Look for header
-        nccl_hdr_path = trunc_nccl_install_path + '/include'
-        print('Assuming NCCL header path is ' + nccl_hdr_path)
-        if os.path.exists(nccl_hdr_path + '/nccl.h'):
-          # Set NCCL_INSTALL_PATH
-          environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
-          write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
-
-          # Set NCCL_HDR_PATH
-          environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path
-          write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path)
-          break
-        else:
-          print(
-              'The header for NCCL2 cannot be found. Please install the libnccl-dev package.'
-          )
-      else:
-        print('NCCL2 is listed by ldconfig but the library is not found. '
-              'Your ldconfig is out of date. Please run sudo ldconfig.')
-    else:
-      # NCCL is not found in ldconfig. Ask the user for the location.
-      default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
-      ask_nccl_path = (
-          r'Please specify the location where NCCL %s library is '
-          'installed. Refer to README.md for more details. [Default '
-          'is %s]:') % (tf_nccl_version, default_nccl_path)
-      nccl_install_path = get_from_env_or_user_or_default(
-          environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
-
-      # Result returned from "read" will be used unexpanded. That make "~"
-      # unusable. Going through one more level of expansion to handle that.
-      nccl_install_path = os.path.realpath(
-          os.path.expanduser(nccl_install_path))
-      if is_windows() or is_cygwin():
-        nccl_install_path = cygpath(nccl_install_path)
-
-      nccl_lib_path = ''
-      if is_windows():
-        nccl_lib_path = 'lib/x64/nccl.lib'
-      elif is_linux():
-        nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version
-        nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename)
-        if not os.path.exists(nccl_lpath):
-          for relative_path in NCCL_LIB_PATHS:
-            path = '%s/%s%s' % (nccl_install_path, relative_path,
-                                nccl_lib_filename)
-            if os.path.exists(path):
-              print('NCCL found at ' + path)
-              nccl_lib_path = path
-              break
-        else:
-          nccl_lib_path = nccl_lpath
-      elif is_macos():
-        nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
-
-      nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
-      nccl_hdr_path = os.path.join(
-          os.path.dirname(nccl_lib_path), '../include/nccl.h')
-      print('Assuming NCCL header path is ' + nccl_hdr_path)
-      if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
-        # Set NCCL_INSTALL_PATH
-        environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path)
-        write_action_env_to_bazelrc('NCCL_INSTALL_PATH',
-                                    os.path.dirname(nccl_lib_path))
-
-        # Set NCCL_HDR_PATH
-        environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path)
-        write_action_env_to_bazelrc('NCCL_HDR_PATH',
-                                    os.path.dirname(nccl_hdr_path))
-        break
-
-      # Reset and Retry
-      print(
-          'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
-          'O/S agnostic package of NCCL 2' %
-          (tf_nccl_version, nccl_lib_path, nccl_hdr_path))
-
-      environ_cp['TF_NCCL_VERSION'] = ''
-  else:
-    raise UserInputError('Invalid TF_NCCL setting was provided %d '
-                         'times in a row. Assuming to be a scripting mistake.' %
-                         _DEFAULT_PROMPT_ASK_ATTEMPTS)
-
-  # Set TF_NCCL_VERSION
+      '[Leave empty to use https://github.com/nvidia/nccl]: ')
+  tf_nccl_version = get_from_env_or_user_or_default(environ_cp,
+                                                    'TF_NCCL_VERSION',
+                                                    ask_nccl_version, '')
   environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
-  write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
-
 
 def get_native_cuda_compute_capabilities(environ_cp):
   """Get native cuda compute capabilities.
@@ -1720,11 +1392,69 @@ def main():
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
-    set_tf_cuda_version(environ_cp)
-    set_tf_cudnn_version(environ_cp)
-    if is_linux():
-      set_tf_tensorrt_install_path(environ_cp)
-      set_tf_nccl_install_path(environ_cp)
+    for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
+      set_tf_cuda_version(environ_cp)
+      set_tf_cudnn_version(environ_cp)
+      cuda_libraries = ['cuda', 'cudnn']
+      if is_linux():
+        set_tf_tensorrt_version(environ_cp)
+        if 'TF_TENSORRT_VERSION' in environ_cp:  # if env variable exists
+          cuda_libraries.append('tensorrt')
+        set_tf_nccl_version(environ_cp)
+        if environ_cp['TF_NCCL_VERSION']:  # if env variable not empty
+          cuda_libraries.append('nccl')
+
+      def maybe_encode_env(env):
+        """Encodes unicode in env to str on Windows python 2.x."""
+        if not is_windows() or sys.version_info[0] != 2:
+          return env
+        for k, v in env.items():
+          if isinstance(k, unicode):
+            k = k.encode('ascii')
+          if isinstance(v, unicode):
+            v = v.encode('ascii')
+          env[k] = v
+        return env
+
+      # Verify CUDA configuration by calling find_cuda_config.py.
+      proc = subprocess.Popen(
+          [
+              environ_cp['PYTHON_BIN_PATH'],
+              'third_party/gpus/find_cuda_config.py'
+          ] + cuda_libraries,
+          stdout=subprocess.PIPE,
+          env=maybe_encode_env(environ_cp))
+
+      cuda_env_variable_names = [
+          'TF_CUDA_VERSION', 'TF_CUBLAS_VERSION', 'TF_CUDNN_VERSION',
+          'TF_TENSORRT_VERSION', 'TF_NCCL_VERSION', 'TF_CUDA_PATHS'
+      ]
+
+      if proc.wait():
+        print('\nCould not find all requested CUDA libraries or headers.\n')
+        for name in cuda_env_variable_names:
+          if name in environ_cp:
+            del environ_cp[name]
+        set_tf_cuda_paths(environ_cp)
+        continue
+
+      for line in proc.stdout:
+        match = re.match('cuda_toolkit_path: (.*)', line.decode('ascii'))
+        if match:
+          cuda_toolkit_path = match.group(1)
+
+      for name in cuda_env_variable_names:
+        if name in environ_cp:
+          write_action_env_to_bazelrc(name, environ_cp[name])
+
+      break
+    else:
+      raise UserInputError(
+          'Invalid CUDA setting were provided %d '
+          'times in a row. Assuming to be a scripting mistake.' %
+          _DEFAULT_PROMPT_ASK_ATTEMPTS)
+
+    environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path
 
     set_tf_cuda_compute_capabilities(environ_cp)
     if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt
index 2ad9ae42a16..44431d5010d 100644
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@@ -522,6 +522,7 @@ if (tensorflow_ENABLE_GPU)
     "#define CUDA_CUDA_CONFIG_H_\n"
     "#define TF_CUDA_CAPABILITIES ${TF_CUDA_CAP}\n"
     "#define TF_CUDA_VERSION \"64_${short_CUDA_VER}\"\n"
+    "#define TF_CUDA_LIB_VERSION \"64_${short_CUDA_VER}\"\n"
     "#define TF_CUDNN_VERSION \"64_${CUDNN_VERSION}\"\n"
     "#define TF_CUDA_TOOLKIT_PATH \"${CUDA_TOOLKIT_ROOT_DIR}\"\n"
     "#endif  // CUDA_CUDA_CONFIG_H_\n"
@@ -615,4 +616,4 @@ if(tensorflow_BUILD_SHARED_LIB)
 endif()
 if(tensorflow_BUILD_CC_TESTS OR tensorflow_BUILD_PYTHON_TESTS)
   include(tf_tests.cmake)
-endif()
\ No newline at end of file
+endif()
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index a2ceca57c8f..b6b9d4c171c 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -102,6 +102,7 @@ tensorflow/third_party/gpus/rocm/BUILD
 tensorflow/third_party/gpus/rocm/BUILD.tpl
 tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl
 tensorflow/third_party/gpus/cuda_configure.bzl
+tensorflow/third_party/gpus/find_cuda_config.py
 tensorflow/third_party/gpus/rocm_configure.bzl
 tensorflow/third_party/snappy.BUILD
 tensorflow/third_party/cython.BUILD
diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
index 5a3787340fa..ede91144ade 100644
--- a/tensorflow/stream_executor/platform/default/dso_loader.cc
+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
@@ -31,6 +31,7 @@ namespace internal {
 
 namespace {
 string GetCudaVersion() { return TF_CUDA_VERSION; }
+string GetCudaLibVersion() { return TF_CUDA_LIB_VERSION; }
 string GetCudnnVersion() { return TF_CUDNN_VERSION; }
 
 port::StatusOr<void*> GetDsoHandle(const string& name, const string& version) {
@@ -75,15 +76,15 @@ port::StatusOr<void*> GetCudaRuntimeDsoHandle() {
 }
 
 port::StatusOr<void*> GetCublasDsoHandle() {
-  return GetDsoHandle("cublas", GetCudaVersion());
+  return GetDsoHandle("cublas", GetCudaLibVersion());
 }
 
 port::StatusOr<void*> GetCufftDsoHandle() {
-  return GetDsoHandle("cufft", GetCudaVersion());
+  return GetDsoHandle("cufft", GetCudaLibVersion());
 }
 
 port::StatusOr<void*> GetCurandDsoHandle() {
-  return GetDsoHandle("curand", GetCudaVersion());
+  return GetDsoHandle("curand", GetCudaLibVersion());
 }
 
 port::StatusOr<void*> GetCuptiDsoHandle() {
diff --git a/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/gpus/cuda/cuda_config.h.tpl
index 811b040e8c1..916315d8c3e 100644
--- a/third_party/gpus/cuda/cuda_config.h.tpl
+++ b/third_party/gpus/cuda/cuda_config.h.tpl
@@ -19,6 +19,7 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities}
 
 #define TF_CUDA_VERSION "%{cuda_version}"
+#define TF_CUDA_LIB_VERSION "%{cuda_lib_version}"
 #define TF_CUDNN_VERSION "%{cudnn_version}"
 
 #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}"
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 872400b2d30..3e311436f03 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -11,12 +11,14 @@
   * `TF_DOWNLOAD_CLANG`: Whether to download a recent release of clang
     compiler and use it to build tensorflow. When this option is set
     CLANG_CUDA_COMPILER_PATH is ignored.
-  * `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
+  * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
+    `/usr/local/cuda,usr/`.
+  * `CUDA_TOOLKIT_PATH` (deprecated): The path to the CUDA toolkit. Default is
     `/usr/local/cuda`.
   * `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
     use the system default.
   * `TF_CUDNN_VERSION`: The version of the cuDNN library.
-  * `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
+  * `CUDNN_INSTALL_PATH` (deprecated): The path to the cuDNN library. Default is
     `/usr/local/cuda`.
   * `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
     `3.5,5.2`.
@@ -48,84 +50,8 @@ _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO"
 _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG"
 _PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
 
-_DEFAULT_CUDA_VERSION = ""
-_DEFAULT_CUDNN_VERSION = ""
-_DEFAULT_CUDA_TOOLKIT_PATH = "/usr/local/cuda"
-_DEFAULT_CUDNN_INSTALL_PATH = "/usr/local/cuda"
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
 
-# Lookup paths for CUDA / cuDNN libraries, relative to the install directories.
-#
-# Paths will be tried out in the order listed below. The first successful path
-# will be used. For example, when looking for the cudart libraries, the first
-# attempt will be lib64/cudart inside the CUDA toolkit.
-CUDA_LIB_PATHS = [
-    "lib64/",
-    "lib64/stubs/",
-    "lib/powerpc64le-linux-gnu/",
-    "lib/x86_64-linux-gnu/",
-    "lib/x64/",
-    "lib/",
-    "",
-]
-
-# Lookup paths for cupti.h, relative to the CUDA toolkit directory.
-#
-# On most systems, the cupti library is not installed in the same directory as
-# the other CUDA libraries but rather in a special extras/CUPTI directory.
-CUPTI_HEADER_PATHS = [
-    "extras/CUPTI/include/",
-    "include/cuda/CUPTI/",
-    "include/",
-]
-
-# Lookup paths for the cupti library, relative to the
-#
-# On most systems, the cupti library is not installed in the same directory as
-# the other CUDA libraries but rather in a special extras/CUPTI directory.
-CUPTI_LIB_PATHS = [
-    "extras/CUPTI/lib64/",
-    "lib/powerpc64le-linux-gnu/",
-    "lib/x86_64-linux-gnu/",
-    "lib64/",
-    "extras/CUPTI/libx64/",
-    "extras/CUPTI/lib/",
-    "lib/",
-]
-
-# Lookup paths for CUDA headers (cuda.h) relative to the CUDA toolkit directory.
-CUDA_INCLUDE_PATHS = [
-    "include/",
-    "include/cuda/",
-]
-
-# Lookup paths for cudnn.h relative to the CUDNN install directory.
-CUDNN_INCLUDE_PATHS = [
-    "",
-    "include/",
-    "include/cuda/",
-]
-
-# Lookup paths for NVVM libdevice relative to the CUDA directory toolkit.
-#
-# libdevice implements mathematical functions for GPU kernels, and is provided
-# in NVVM bitcode (a subset of LLVM bitcode).
-NVVM_LIBDEVICE_PATHS = [
-    "nvvm/libdevice/",
-    "share/cuda/",
-    "lib/nvidia-cuda-toolkit/libdevice/",
-]
-
-# Files used to detect the NVVM libdevice path.
-NVVM_LIBDEVICE_FILES = [
-    # CUDA 9.0 has a single file.
-    "libdevice.10.bc",
-
-    # CUDA 8.0 has separate files for compute versions 2.0, 3.0, 3.5 and 5.0.
-    # Probing for one of them is sufficient.
-    "libdevice.compute_20.10.bc",
-]
-
 def _get_python_bin(repository_ctx):
     """Gets the python bin path."""
     python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
@@ -414,31 +340,6 @@ def enable_cuda(repository_ctx):
         return enable_cuda == "1"
     return False
 
-def cuda_toolkit_path(repository_ctx):
-    """Finds the cuda toolkit directory.
-
-      Args:
-        repository_ctx: The repository context.
-
-      Returns:
-        A speculative real path of the cuda toolkit install directory.
-      """
-    cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
-    if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ:
-        cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip()
-    if not repository_ctx.path(cuda_toolkit_path).exists:
-        auto_configure_fail("Cannot find cuda toolkit path.")
-    return str(repository_ctx.path(cuda_toolkit_path).realpath)
-
-def _cudnn_install_basedir(repository_ctx):
-    """Finds the cudnn install directory."""
-    cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
-    if _CUDNN_INSTALL_PATH in repository_ctx.os.environ:
-        cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip()
-    if not repository_ctx.path(cudnn_install_path).exists:
-        auto_configure_fail("Cannot find cudnn install path.")
-    return cudnn_install_path
-
 def matches_version(environ_version, detected_version):
     """Checks whether the user-specified version matches the detected version.
 
@@ -477,71 +378,7 @@ def matches_version(environ_version, detected_version):
 
 _NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
 
-def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value):
-    """Detects the version of CUDA installed on the system.
-
-      Args:
-        repository_ctx: The repository context.
-        cuda_toolkit_path: The CUDA install directory.
-
-      Returns:
-        String containing the version of CUDA.
-      """
-
-    # Run nvcc --version and find the line containing the CUDA version.
-    nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
-        cuda_toolkit_path,
-        ".exe" if cpu_value == "Windows" else "",
-    ))
-    if not nvcc_path.exists:
-        auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path))
-    result = repository_ctx.execute([str(nvcc_path), "--version"])
-    if result.stderr:
-        auto_configure_fail("Error running nvcc --version: %s" % result.stderr)
-    lines = result.stdout.splitlines()
-    version_line = lines[len(lines) - 1]
-    if version_line.find(_NVCC_VERSION_PREFIX) == -1:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-
-    # Parse the CUDA version from the line containing the CUDA version.
-    prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "")
-    parts = prefix_removed.split(",")
-    if len(parts) != 2 or len(parts[0]) < 2:
-        auto_configure_fail(
-            "Could not parse CUDA version from nvcc --version. Got: %s" %
-            result.stdout,
-        )
-    full_version = parts[1].strip()
-    if full_version.startswith("V"):
-        full_version = full_version[1:]
-
-    # Check whether TF_CUDA_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDA_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        auto_configure_fail(
-            ("CUDA version detected from nvcc (%s) does not match " +
-             "TF_CUDA_VERSION (%s)") % (full_version, environ_version),
-        )
-
-    # We only use the version consisting of the major and minor version numbers.
-    version_parts = full_version.split(".")
-    if len(version_parts) < 2:
-        auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.")
-    if cpu_value == "Windows":
-        version = "64_%s%s" % (version_parts[0], version_parts[1])
-    else:
-        version = "%s.%s" % (version_parts[0], version_parts[1])
-    return version
-
 _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
-_DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR"
-_DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL"
 
 def find_cuda_define(repository_ctx, header_dir, header_file, define):
     """Returns the value of a #define in a header file.
@@ -611,60 +448,6 @@ def find_cuda_define(repository_ctx, header_dir, header_file, define):
         version = version[:version_end].strip()
     return version
 
-def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value):
-    """Detects the version of cuDNN installed on the system.
-
-      Args:
-        repository_ctx: The repository context.
-        cpu_value: The name of the host operating system.
-        cudnn_install_basedir: The cuDNN install directory.
-
-      Returns:
-        A string containing the version of cuDNN.
-      """
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cudnn_install_basedir,
-    )
-    major_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        cudnn_header_dir,
-        "cudnn.h",
-        _DEFINE_CUDNN_PATCHLEVEL,
-    )
-    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-
-    # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not
-    # match the detected version.
-    environ_version = ""
-    if _TF_CUDNN_VERSION in repository_ctx.os.environ:
-        environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip()
-    if environ_version and not matches_version(environ_version, full_version):
-        cudnn_h_path = repository_ctx.path(
-            "%s/include/cudnn.h" % cudnn_install_basedir,
-        )
-        auto_configure_fail(("cuDNN version detected from %s (%s) does not match " +
-                             "TF_CUDNN_VERSION (%s)") %
-                            (str(cudnn_h_path), full_version, environ_version))
-
-    # Only use the major version to match the SONAME of the library.
-    version = major_version
-    if cpu_value == "Windows":
-        version = "64_" + version
-    return version
-
 def compute_capabilities(repository_ctx):
     """Returns a list of strings representing cuda compute capabilities."""
     if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ:
@@ -699,7 +482,7 @@ def get_cpu_value(repository_ctx):
 
 def _is_windows(repository_ctx):
     """Returns true if the host operating system is windows."""
-    return get_cpu_value(repository_ctx) == "Windows"
+    return repository_ctx.os.name.lower().find("windows") >= 0
 
 def lib_name(base_name, cpu_value, version = None, static = False):
     """Constructs the platform-specific name of a library.
@@ -777,57 +560,11 @@ def _find_cuda_lib(
         Returns the path to the library.
       """
     file_name = lib_name(lib, cpu_value, version, static)
-    return find_lib(repository_ctx, [
-        "%s/%s%s" % (basedir, path, file_name)
-        for path in CUDA_LIB_PATHS
-    ], check_soname = version and not static)
-
-def _find_cupti_header_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cupti.h
-
-      On most systems, the cupti library is not installed in the same directory as
-      the other CUDA libraries but rather in a special extras/CUPTI directory.
-
-      Args:
-        repository_ctx: The repository context.
-        cuda_config: The CUDA config as returned by _get_cuda_config
-
-      Returns:
-        The path of the directory containing the cupti header.
-      """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUPTI_HEADER_PATHS:
-        if repository_ctx.path(
-            "%s/%scupti.h" % (cuda_toolkit_path, relative_path),
-        ).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cupti.h under %s" % ", ".join(
-        [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS],
-    ))
-
-def _find_cupti_lib(repository_ctx, cuda_config):
-    """Finds the cupti library on the system.
-
-      On most systems, the cupti library is not installed in the same directory as
-      the other CUDA libraries but rather in a special extras/CUPTI directory.
-
-      Args:
-        repository_ctx: The repository context.
-        cuda_config: The cuda configuration as returned by _get_cuda_config.
-
-      Returns:
-        Returns the path to the library.
-      """
-    file_name = lib_name(
-        "cupti",
-        cuda_config.cpu_value,
-        cuda_config.cuda_version,
+    return find_lib(
+        repository_ctx,
+        ["%s/%s" % (basedir, file_name)],
+        check_soname = version and not static,
     )
-    basedir = cuda_config.cuda_toolkit_path
-    return find_lib(repository_ctx, [
-        "%s/%s%s" % (basedir, path, file_name)
-        for path in CUPTI_LIB_PATHS
-    ])
 
 def _find_libs(repository_ctx, cuda_config):
     """Returns the CUDA and cuDNN libraries on the system.
@@ -840,26 +577,27 @@ def _find_libs(repository_ctx, cuda_config):
         Map of library names to structs of filename and path.
       """
     cpu_value = cuda_config.cpu_value
+    stub_dir = "" if _is_windows(repository_ctx) else "/stubs"
     return {
         "cuda": _find_cuda_lib(
             "cuda",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
+            cuda_config.config["cuda_library_dir"] + stub_dir,
             None,
         ),
         "cudart": _find_cuda_lib(
             "cudart",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
+            cuda_config.config["cuda_library_dir"],
             cuda_config.cuda_version,
         ),
         "cudart_static": _find_cuda_lib(
             "cudart_static",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
+            cuda_config.config["cuda_library_dir"],
             cuda_config.cuda_version,
             static = True,
         ),
@@ -867,112 +605,75 @@ def _find_libs(repository_ctx, cuda_config):
             "cublas",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
+            cuda_config.config["cublas_library_dir"],
+            cuda_config.cuda_lib_version,
         ),
         "cusolver": _find_cuda_lib(
             "cusolver",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
+            cuda_config.config["cuda_library_dir"],
+            cuda_config.cuda_lib_version,
         ),
         "curand": _find_cuda_lib(
             "curand",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
+            cuda_config.config["cuda_library_dir"],
+            cuda_config.cuda_lib_version,
         ),
         "cufft": _find_cuda_lib(
             "cufft",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
+            cuda_config.config["cuda_library_dir"],
+            cuda_config.cuda_lib_version,
         ),
         "cudnn": _find_cuda_lib(
             "cudnn",
             repository_ctx,
             cpu_value,
-            cuda_config.cudnn_install_basedir,
+            cuda_config.config["cudnn_library_dir"],
             cuda_config.cudnn_version,
         ),
-        "cupti": _find_cupti_lib(repository_ctx, cuda_config),
+        "cupti": _find_cuda_lib(
+            "cupti",
+            repository_ctx,
+            cpu_value,
+            cuda_config.config["cupti_library_dir"],
+            cuda_config.cuda_version,
+        ),
         "cusparse": _find_cuda_lib(
             "cusparse",
             repository_ctx,
             cpu_value,
-            cuda_config.cuda_toolkit_path,
-            cuda_config.cuda_version,
+            cuda_config.config["cuda_library_dir"],
+            cuda_config.cuda_lib_version,
         ),
     }
 
-def _find_cuda_include_path(repository_ctx, cuda_config):
-    """Returns the path to the directory containing cuda.h
-
-      Args:
-        repository_ctx: The repository context.
-        cuda_config: The CUDA config as returned by _get_cuda_config
-
-      Returns:
-        The path of the directory containing the CUDA headers.
-      """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path(
-            "%s/%scuda.h" % (cuda_toolkit_path, relative_path),
-        ).exists:
-            return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path)
-
-def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
-    """Returns the path to the directory containing cudnn.h
-
-      Args:
-        repository_ctx: The repository context.
-        cudnn_install_basedir: The cudnn install directory as returned by
-          _cudnn_install_basedir.
-
-      Returns:
-        The path of the directory containing the cudnn header.
-      """
-    for relative_path in CUDA_INCLUDE_PATHS:
-        if repository_ctx.path(
-            "%s/%scudnn.h" % (cudnn_install_basedir, relative_path),
-        ).exists:
-            return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1]
-    if repository_ctx.path("/usr/include/cudnn.h").exists:
-        return "/usr/include"
-    auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
-
-def _find_nvvm_libdevice_dir(repository_ctx, cuda_config):
-    """Returns the path to the directory containing libdevice in bitcode format.
-
-      Args:
-        repository_ctx: The repository context.
-        cuda_config: The CUDA config as returned by _get_cuda_config
-
-      Returns:
-        The path of the directory containing the CUDA headers.
-      """
-    cuda_toolkit_path = cuda_config.cuda_toolkit_path
-    for libdevice_file in NVVM_LIBDEVICE_FILES:
-        for relative_path in NVVM_LIBDEVICE_PATHS:
-            if repository_ctx.path("%s/%s%s" % (
-                cuda_toolkit_path,
-                relative_path,
-                libdevice_file,
-            )).exists:
-                return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1]
-    auto_configure_fail(
-        "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path,
-    )
-
 def _cudart_static_linkopt(cpu_value):
     """Returns additional platform-specific linkopts for cudart."""
     return "" if cpu_value == "Darwin" else "\"-lrt\","
 
+# TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl,
+# and nccl_configure.bzl.
+def find_cuda_config(repository_ctx, cuda_libraries):
+    """Returns CUDA config dictionary from running find_cuda_config.py"""
+    exec_result = repository_ctx.execute([
+        _get_python_bin(repository_ctx),
+        repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py")),
+    ] + cuda_libraries)
+    if exec_result.return_code:
+        auto_configure_fail("Failed to run find_cuda_config.py: %s" % exec_result.stderr)
+    config = {}
+    for line in exec_result.stdout.splitlines():
+        elements = line.split(": ")
+        if len(elements) != 2:
+            auto_configure_fail("Unexpected output from find_cuda_config.py")
+        config[elements[0]] = elements[1]
+    return config
+
 def _get_cuda_config(repository_ctx):
     """Detects and returns information about the CUDA installation on the system.
 
@@ -988,22 +689,30 @@ def _get_cuda_config(repository_ctx):
           compute_capabilities: A list of the system's CUDA compute capabilities.
           cpu_value: The name of the host operating system.
       """
+    config = find_cuda_config(repository_ctx, ["cuda", "cudnn"])
     cpu_value = get_cpu_value(repository_ctx)
-    toolkit_path = cuda_toolkit_path(repository_ctx)
-    cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value)
-    cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
-    cudnn_version = _cudnn_version(
-        repository_ctx,
-        cudnn_install_basedir,
-        cpu_value,
-    )
+    toolkit_path = config["cuda_toolkit_path"]
+
+    is_windows = _is_windows(repository_ctx)
+    cuda_version = config["cuda_version"].split(".")
+    cuda_version = ("64_%s%s" if is_windows else "%s.%s") % (cuda_version[0], cuda_version[1])
+    cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"]
+
+    # cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc.
+    # It changed from 'x.y' to just 'x' in CUDA 10.1.
+    if (int(cuda_version[0]), int(cuda_version[1])) >= (10, 1):
+        cuda_lib_version = ("64_%s" if is_windows else "%s") % cuda_version[0]
+    else:
+        cuda_lib_version = cuda_version
+
     return struct(
         cuda_toolkit_path = toolkit_path,
-        cudnn_install_basedir = cudnn_install_basedir,
         cuda_version = cuda_version,
         cudnn_version = cudnn_version,
+        cuda_lib_version = cuda_lib_version,
         compute_capabilities = compute_capabilities(repository_ctx),
         cpu_value = cpu_value,
+        config = config,
     )
 
 def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
@@ -1108,13 +817,14 @@ def _create_dummy_repository(repository_ctx):
         repository_ctx,
         "cuda:cuda_config.h",
         {
-            "%{cuda_version}": _DEFAULT_CUDA_VERSION,
-            "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
+            "%{cuda_version}": "",
+            "%{cuda_lib_version}": "",
+            "%{cudnn_version}": "",
             "%{cuda_compute_capabilities}": ",".join([
                 "CudaVersion(\"%s\")" % c
                 for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES
             ]),
-            "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH,
+            "%{cuda_toolkit_path}": "",
         },
         "cuda/cuda/cuda_config.h",
     )
@@ -1249,13 +959,11 @@ def _create_local_cuda_repository(repository_ctx):
     """Creates the repository containing files set up to build with CUDA."""
     cuda_config = _get_cuda_config(repository_ctx)
 
-    cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config)
-    cudnn_header_dir = _find_cudnn_header_dir(
-        repository_ctx,
-        cuda_config.cudnn_install_basedir,
-    )
-    cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config)
-    nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config)
+    cuda_include_path = cuda_config.config["cuda_include_dir"]
+    cublas_include_path = cuda_config.config["cublas_include_dir"]
+    cudnn_header_dir = cuda_config.config["cudnn_include_dir"]
+    cupti_header_dir = cuda_config.config["cupti_include_dir"]
+    nvvm_libdevice_dir = cuda_config.config["nvvm_library_dir"]
 
     # Create genrule to copy files from the installed CUDA toolkit into execroot.
     copy_rules = [
@@ -1279,6 +987,24 @@ def _create_local_cuda_repository(repository_ctx):
         ),
     ]
 
+    if cublas_include_path != cuda_include_path:
+        copy_rules.append(make_copy_files_rule(
+            repository_ctx,
+            name = "cublas-include",
+            srcs = [
+                cublas_include_path + "/cublas.h",
+                cublas_include_path + "/cublas_v2.h",
+                cublas_include_path + "/cublas_api.h",
+            ],
+            outs = [
+                "cuda/include/cublas.h",
+                "cuda/include/cublas_v2.h",
+                "cuda/include/cublas_api.h",
+            ],
+        ))
+    else:
+        copy_rules.append("filegroup(name = 'cublas-include')\n")
+
     cuda_libs = _find_libs(repository_ctx, cuda_config)
     cuda_lib_srcs = []
     cuda_lib_outs = []
@@ -1340,7 +1066,9 @@ def _create_local_cuda_repository(repository_ctx):
             "%{cusparse_lib}": cuda_libs["cusparse"].basename,
             "%{copy_rules}": "\n".join(copy_rules),
             "%{cuda_headers}": (
-                '":cuda-include",\n' + '        ":cudnn-include",'
+                '":cuda-include",\n' +
+                '        ":cublas-include",' +
+                '        ":cudnn-include",'
             ),
         },
         "cuda/BUILD",
@@ -1423,8 +1151,8 @@ def _create_local_cuda_repository(repository_ctx):
             "flag: \"-fno-canonical-system-headers\""
         )
         nvcc_path = str(
-            repository_ctx.path("%s/bin/nvcc%s" % (
-                cuda_config.cuda_toolkit_path,
+            repository_ctx.path("%s/nvcc%s" % (
+                cuda_config.config["cuda_binary_dir"],
                 ".exe" if _is_windows(repository_ctx) else "",
             )),
         )
@@ -1472,6 +1200,7 @@ def _create_local_cuda_repository(repository_ctx):
         "cuda:cuda_config.h",
         {
             "%{cuda_version}": cuda_config.cuda_version,
+            "%{cuda_lib_version}": cuda_config.cuda_lib_version,
             "%{cudnn_version}": cuda_config.cudnn_version,
             "%{cuda_compute_capabilities}": ",".join([
                 "CudaVersion(\"%s\")" % c
@@ -1546,6 +1275,7 @@ cuda_configure = repository_rule(
         _PYTHON_BIN_PATH,
         "TMP",
         "TMPDIR",
+        "TF_CUDA_PATHS",
     ],
 )
 
diff --git a/third_party/gpus/find_cuda_config.py b/third_party/gpus/find_cuda_config.py
new file mode 100644
index 00000000000..87be0b28f5f
--- /dev/null
+++ b/third_party/gpus/find_cuda_config.py
@@ -0,0 +1,457 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prints CUDA library and header directories and versions found on the system.
+
+The script searches for CUDA library and header files on the system, inspects
+them to determine their version and prints the configuration to stdout.
+The paths to inspect and the required versions are specified through environment
+variables. If no valid configuration is found, the script prints to stderr and
+returns an error code.
+
+The list of libraries to find is specified as arguments. Supported libraries are
+CUDA (includes cuBLAS), cuDNN, NCCL, and TensorRT.
+
+The script takes a list of base directories specified by the TF_CUDA_PATHS
+environment variable as comma-separated glob list. The script looks for headers
+and library files in a hard-coded set of subdirectories from these base paths.
+If TF_CUDA_PATHS is not specified, a OS specific default is used:
+
+  Linux:   /usr/local/cuda, /usr, and paths from 'ldconfig -p'.
+  Windows: CUDA_PATH environment variable, or
+           C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\*
+
+For backwards compatibility, some libraries also use alternative base
+directories from other environment variables if they are specified. List of
+library-specific environment variables:
+
+  Library   Version env variable  Additional base directories
+  ----------------------------------------------------------------
+  CUDA      TF_CUDA_VERSION       CUDA_TOOLKIT_PATH
+  cuBLAS    TF_CUBLAS_VERSION     CUDA_TOOLKIT_PATH
+  cuDNN     TF_CUDNN_VERSION      CUDNN_INSTALL_PATH
+  NCCL      TF_NCCL_VERSION       NCCL_INSTALL_PATH, NCCL_HDR_PATH
+  TensorRT  TF_TENSORRT_VERSION   TENSORRT_INSTALL_PATH
+
+Versions environment variables can be of the form 'x' or 'x.y' to request a
+specific version, empty or unspecified to accept any version.
+
+The output of a found library is of the form:
+tf_<library>_version: x.y.z
+tf_<library>_header_dir: ...
+tf_<library>_library_dir: ...
+"""
+
+import os
+import glob
+import platform
+import re
+import subprocess
+import sys
+
+# pylint: disable=g-import-not-at-top
+try:
+  from shutil import which
+except ImportError:
+  from distutils.spawn import find_executable as which
+# pylint: enable=g-import-not-at-top
+
+
+class ConfigError(Exception):
+  pass
+
+
+def _is_linux():
+  return platform.system() == "Linux"
+
+
+def _is_windows():
+  return platform.system() == "Windows"
+
+
+def _is_macos():
+  return platform.system() == "Darwin"
+
+
+def _matches_version(actual_version, required_version):
+  """Checks whether some version meets the requirements.
+
+      All elements of the required_version need to be present in the
+      actual_version.
+
+          required_version  actual_version  result
+          -----------------------------------------
+          1                 1.1             True
+          1.2               1               False
+          1.2               1.3             False
+                            1               True
+
+      Args:
+        required_version: The version specified by the user.
+        actual_version: The version detected from the CUDA installation.
+      Returns: Whether the actual version matches the required one.
+  """
+  if actual_version is None:
+    return False
+  return actual_version.startswith(required_version)
+
+
+def _at_least_version(actual_version, required_version):
+  actual = [int(v) for v in actual_version.split(".")]
+  required = [int(v) for v in required_version.split(".")]
+  return actual >= required
+
+
+def _get_header_version(path, name):
+  """Returns preprocessor defines in C header file."""
+  for line in open(path, "r").readlines():
+    match = re.match("#define %s (\d+)" % name, line)
+    if match:
+      return match.group(1)
+  return ""
+
+
+def _cartesian_product(first, second):
+  """Returns all path combinations of first and second."""
+  return [os.path.join(f, s) for f in first for s in second]
+
+
+def _get_ld_config_paths():
+  """Returns all directories from 'ldconfig -p'."""
+  if not _is_linux():
+    return []
+  ldconfig_path = which("ldconfig") or "/sbin/ldconfig"
+  output = subprocess.check_output([ldconfig_path, "-p"])
+  pattern = re.compile(".* => (.*)")
+  result = set()
+  for line in output.splitlines():
+    match = pattern.match(line.decode("ascii"))
+    if match:
+      result.add(os.path.dirname(match.group(1)))
+  return list(result)
+
+
+def _get_default_cuda_paths(cuda_version):
+  if not cuda_version:
+    cuda_version = "*"
+  elif not "." in cuda_version:
+    cuda_version = cuda_version + ".*"
+
+  if _is_windows():
+    return [
+        os.environ.get(
+            "CUDA_PATH",
+            "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v%s\\" %
+            cuda_version)
+    ]
+  return ["/usr/local/cuda-%s" % cuda_version, "/usr"] + _get_ld_config_paths()
+
+
+def _header_paths(base_paths):
+  return _cartesian_product(base_paths, [
+      "",
+      "include",
+      "include/cuda",
+      "include/*-linux-gnu",
+      "extras/CUPTI/include",
+      "include/cuda/CUPTI",
+  ])
+
+
+def _library_paths(base_paths):
+  return _cartesian_product(base_paths, [
+      "",
+      "lib64",
+      "lib",
+      "lib/*-linux-gnu",
+      "lib/x64",
+      "extras/CUPTI/*",
+  ])
+
+
+def _not_found_error(paths, filepattern):
+  return ConfigError(
+      "Could not find any %s in:%s" %
+      (filepattern, "".join(["\n        %s" % path for path in sorted(paths)])))
+
+
+def _find_file(paths, filepattern):
+  for path in paths:
+    for file in glob.glob(os.path.join(path, filepattern)):
+      return file
+  raise _not_found_error(paths, filepattern)
+
+
+def _find_library(base_paths, library_name, required_version):
+  """Returns first valid path to the requested library."""
+  if _is_windows():
+    filepattern = library_name + ".lib"
+  elif _is_macos():
+    filepattern = "%s*.dylib" % (".".join(["lib" + library_name] +
+                                          required_version.split(".")[:1]))
+  else:
+    filepattern = ".".join(["lib" + library_name, "so"] +
+                           required_version.split(".")[:1]) + "*"
+  return _find_file(_library_paths(base_paths), filepattern)
+
+
+def _find_versioned_file(paths, filepattern, required_version, get_version):
+  """Returns first valid path to a file that matches the requested version."""
+  for path in paths:
+    for file in glob.glob(os.path.join(path, filepattern)):
+      actual_version = get_version(file)
+      if _matches_version(actual_version, required_version):
+        return file, actual_version
+  raise _not_found_error(
+      paths, filepattern + " matching version '%s'" % required_version)
+
+
+def _find_header(base_paths, header_name, required_version, get_version):
+  """Returns first valid path to a header that matches the requested version."""
+  return _find_versioned_file(
+      _header_paths(base_paths), header_name, required_version, get_version)
+
+
+def _find_cuda_config(base_paths, required_version):
+
+  def get_header_version(path):
+    version = int(_get_header_version(path, "CUDA_VERSION"))
+    if not version:
+      return None
+    return "%d.%d" % (version // 1000, version % 1000 // 10)
+
+  cuda_header_path, header_version = _find_header(base_paths, "cuda.h",
+                                                  required_version,
+                                                  get_header_version)
+  cuda_version = header_version  # x.y, see above.
+
+  cuda_library_path = _find_library(base_paths, "cudart", cuda_version)
+
+  def get_nvcc_version(path):
+    pattern = "Cuda compilation tools, release \d+\.\d+, V(\d+\.\d+\.\d+)"
+    for line in subprocess.check_output([path, "--version"]).splitlines():
+      match = re.match(pattern, line.decode("ascii"))
+      if match:
+        return match.group(1)
+    return None
+
+  nvcc_name = "nvcc.exe" if _is_windows() else "nvcc"
+  nvcc_path, nvcc_version = _find_versioned_file(
+      _cartesian_product(base_paths, [
+          "",
+          "bin",
+      ]), nvcc_name, cuda_version, get_nvcc_version)
+
+  nvvm_path = _find_file(
+      _cartesian_product(base_paths, [
+          "nvvm/libdevice",
+          "share/cuda",
+          "lib/nvidia-cuda-toolkit/libdevice",
+      ]), "libdevice*.10.bc")
+
+  cupti_header_path = _find_file(_header_paths(base_paths), "cupti.h")
+  cupti_library_path = _find_library(base_paths, "cupti", required_version)
+
+  cuda_binary_dir = os.path.dirname(nvcc_path)
+  nvvm_library_dir = os.path.dirname(nvvm_path)
+
+  # XLA requires the toolkit path to find ptxas and libdevice.
+  # TODO(csigg): pass in both directories instead.
+  cuda_toolkit_paths = (
+      os.path.normpath(os.path.join(cuda_binary_dir, "..")),
+      os.path.normpath(os.path.join(nvvm_library_dir, "../..")),
+  )
+  if cuda_toolkit_paths[0] != cuda_toolkit_paths[1]:
+    raise ConfigError("Inconsistent CUDA toolkit path: %s vs %s" %
+                      cuda_toolkit_paths)
+
+  return {
+      "cuda_version": cuda_version,
+      "cuda_include_dir": os.path.dirname(cuda_header_path),
+      "cuda_library_dir": os.path.dirname(cuda_library_path),
+      "cuda_binary_dir": cuda_binary_dir,
+      "nvvm_library_dir": nvvm_library_dir,
+      "cupti_include_dir": os.path.dirname(cupti_header_path),
+      "cupti_library_dir": os.path.dirname(cupti_library_path),
+      "cuda_toolkit_path": cuda_toolkit_paths[0],
+  }
+
+
+def _find_cublas_config(base_paths, required_version, cuda_version):
+
+  if _at_least_version(cuda_version, "10.1"):
+
+    def get_header_version(path):
+      version = (
+          _get_header_version(path, name)
+          for name in ("CUBLAS_VER_MAJOR", "CUBLAS_VER_MINOR",
+                       "CUBLAS_VER_PATCH"))
+      return ".".join(version)
+
+    header_path, header_version = _find_header(base_paths, "cublas_api.h",
+                                               required_version,
+                                               get_header_version)
+    # cuBLAS uses the major version only.
+    cublas_version = header_version.split(".")[0]
+
+    if not _matches_version(cuda_version, cublas_version):
+      raise ConfigError("cuBLAS version %s does not match CUDA version %s" %
+                        (cublas_version, cuda_version))
+
+  else:
+    # There is no version info available before CUDA 10.1, just find the file.
+    header_path = _find_file(_header_paths(base_paths), "cublas_api.h")
+    # cuBLAS version is the same as CUDA version (x.y).
+    cublas_version = required_version
+
+  library_path = _find_library(base_paths, "cublas", cublas_version)
+
+  return {
+      "cublas_include_dir": os.path.dirname(header_path),
+      "cublas_library_dir": os.path.dirname(library_path),
+  }
+
+
+def _find_cudnn_config(base_paths, required_version):
+
+  def get_header_version(path):
+    version = (
+        _get_header_version(path, name)
+        for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL"))
+    return ".".join(version)
+
+  header_path, header_version = _find_header(base_paths, "cudnn.h",
+                                             required_version,
+                                             get_header_version)
+  cudnn_version = header_version.split(".")[0]
+
+  library_path = _find_library(base_paths, "cudnn", cudnn_version)
+
+  return {
+      "cudnn_version": cudnn_version,
+      "cudnn_include_dir": os.path.dirname(header_path),
+      "cudnn_library_dir": os.path.dirname(library_path),
+  }
+
+
+def _find_nccl_config(base_paths, required_version):
+
+  def get_header_version(path):
+    version = (
+        _get_header_version(path, name)
+        for name in ("NCCL_MAJOR", "NCCL_MINOR", "NCCL_PATCH"))
+    return ".".join(version)
+
+  header_path, header_version = _find_header(base_paths, "nccl.h",
+                                             required_version,
+                                             get_header_version)
+  nccl_version = header_version.split(".")[0]
+
+  library_path = _find_library(base_paths, "nccl", nccl_version)
+
+  return {
+      "nccl_version": nccl_version,
+      "nccl_include_dir": os.path.dirname(header_path),
+      "nccl_library_dir": os.path.dirname(library_path),
+  }
+
+
+def _find_tensorrt_config(base_paths, required_version):
+
+  def get_header_version(path):
+    version = (
+        _get_header_version(path, name)
+        for name in ("NV_TENSORRT_MAJOR", "NV_TENSORRT_MINOR",
+                     "NV_TENSORRT_PATCH"))
+    return ".".join(version)
+
+  header_path, header_version = _find_header(base_paths, "NvInfer.h",
+                                             required_version,
+                                             get_header_version)
+  tensorrt_version = header_version.split(".")[0]
+
+  library_path = _find_library(base_paths, "nvinfer", tensorrt_version)
+
+  return {
+      "tensorrt_version": tensorrt_version,
+      "tensorrt_include_dir": os.path.dirname(header_path),
+      "tensorrt_library_dir": os.path.dirname(library_path),
+  }
+
+
+def _list_from_env(env_name, default=[]):
+  """Returns comma-separated list from environment variable."""
+  if env_name in os.environ:
+    return os.environ[env_name].split(",")
+  return default
+
+
+def _normalize_path(path):
+  """Returns normalized path, with forward slashes on Windows."""
+  path = os.path.normpath(path)
+  if _is_windows():
+    path = path.replace("\\", "/")
+  return path
+
+
+def find_cuda_config():
+  """Returns a dictionary of CUDA library and header file paths."""
+  libraries = [argv.lower() for argv in sys.argv[1:]]
+  cuda_version = os.environ.get("TF_CUDA_VERSION", "")
+  base_paths = _list_from_env("TF_CUDA_PATHS",
+                              _get_default_cuda_paths(cuda_version))
+
+  result = {}
+  if "cuda" in libraries:
+    cuda_paths = _list_from_env("CUDA_TOOLKIT_PATH", base_paths)
+    result.update(_find_cuda_config(cuda_paths, cuda_version))
+
+    cuda_version = result["cuda_version"]
+    cublas_version = os.environ.get("TF_CUBLAS_VERSION", "")
+    result.update(_find_cublas_config(cuda_paths, cublas_version, cuda_version))
+
+  if "cudnn" in libraries:
+    cudnn_paths = _list_from_env("CUDNN_INSTALL_PATH", base_paths)
+    cudnn_version = os.environ.get("TF_CUDNN_VERSION", "")
+    result.update(_find_cudnn_config(cudnn_paths, cudnn_version))
+
+  if "nccl" in libraries:
+    nccl_paths = _list_from_env("NCCL_INSTALL_PATH",
+                                base_paths) + _list_from_env("NCCL_HDR_PATH")
+    nccl_version = os.environ.get("TF_NCCL_VERSION", "")
+    result.update(_find_nccl_config(nccl_paths, nccl_version))
+
+  if "tensorrt" in libraries:
+    tensorrt_paths = _list_from_env("TENSORRT_INSTALL_PATH", base_paths)
+    tensorrt_version = os.environ.get("TF_TENSORRT_VERSION", "")
+    result.update(_find_tensorrt_config(tensorrt_paths, tensorrt_version))
+
+  for k, v in result.items():
+    if k.endswith("_dir") or k.endswith("_path"):
+      result[k] = _normalize_path(v)
+
+  return result
+
+
+def main():
+  try:
+    for key, value in sorted(find_cuda_config().items()):
+      print("%s: %s" % (key, value))
+  except ConfigError as e:
+    sys.stderr.write(str(e))
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+  main()
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index 07e4ad7beac..cfb2599ae44 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -4,19 +4,20 @@
 `nccl_configure` depends on the following environment variables:
 
   * `TF_NCCL_VERSION`: Installed NCCL version or empty to build from source.
-  * `NCCL_INSTALL_PATH`: The installation path of the NCCL library.
-  * `NCCL_HDR_PATH`: The installation path of the NCCL header files.
+  * `NCCL_INSTALL_PATH` (deprecated): The installation path of the NCCL library.
+  * `NCCL_HDR_PATH` (deprecated): The installation path of the NCCL header 
+    files.
+  * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
+    `/usr/local/cuda,usr/`.
+
 """
 
 load(
     "//third_party/gpus:cuda_configure.bzl",
-    "auto_configure_fail",
     "compute_capabilities",
-    "cuda_toolkit_path",
     "enable_cuda",
-    "find_cuda_define",
+    "find_cuda_config",
     "get_cpu_value",
-    "matches_version",
 )
 
 _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
@@ -59,63 +60,6 @@ alias(
 def _label(file):
     return Label("//third_party/nccl:{}".format(file))
 
-def _find_nccl_header(repository_ctx, nccl_install_path):
-    """Finds the NCCL header on the system.
-
-    Args:
-      repository_ctx: The repository context.
-      nccl_install_path: The NCCL library install directory.
-
-    Returns:
-      The path to the NCCL header.
-    """
-    header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path)
-    if not header_path.exists:
-        auto_configure_fail("Cannot find %s" % str(header_path))
-    return header_path
-
-def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version):
-    """Checks whether the header file matches the specified version of NCCL.
-
-    Args:
-      repository_ctx: The repository context.
-      nccl_install_path: The NCCL library install directory.
-      nccl_hdr_path: The NCCL header path.
-      nccl_version: The expected NCCL version.
-
-    Returns:
-      A string containing the library version of NCCL.
-    """
-    header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path)
-    if not header_path.exists:
-        header_path = _find_nccl_header(repository_ctx, nccl_install_path)
-    header_dir = str(header_path.realpath.dirname)
-    major_version = find_cuda_define(
-        repository_ctx,
-        header_dir,
-        "nccl.h",
-        _DEFINE_NCCL_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        header_dir,
-        "nccl.h",
-        _DEFINE_NCCL_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        header_dir,
-        "nccl.h",
-        _DEFINE_NCCL_PATCH,
-    )
-    header_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-    if not matches_version(nccl_version, header_version):
-        auto_configure_fail(
-            ("NCCL library version detected from %s/nccl.h (%s) does not " +
-             "match TF_NCCL_VERSION (%s). To fix this rerun configure again.") %
-            (header_dir, header_version, nccl_version),
-        )
-
 def _nccl_configure_impl(repository_ctx):
     """Implementation of the nccl_configure repository rule."""
     if (not enable_cuda(repository_ctx) or
@@ -146,14 +90,8 @@ def _nccl_configure_impl(repository_ctx):
         })
     else:
         # Create target for locally installed NCCL.
-        nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
-        nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip()
-        _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version)
-        repository_ctx.template("BUILD", _label("system.BUILD.tpl"), {
-            "%{version}": nccl_version,
-            "%{install_path}": nccl_install_path,
-            "%{hdr_path}": nccl_hdr_path,
-        })
+        config = find_cuda_config(repository_ctx, ["nccl"])
+        repository_ctx.template("BUILD", _label("system.BUILD.tpl"), config)
 
 nccl_configure = repository_rule(
     implementation = _nccl_configure_impl,
@@ -164,6 +102,7 @@ nccl_configure = repository_rule(
         _TF_NCCL_VERSION,
         _TF_CUDA_COMPUTE_CAPABILITIES,
         _TF_NEED_CUDA,
+        "TF_CUDA_PATHS",
     ],
 )
 """Detects and configures the NCCL configuration.
diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl
index 970dddb1178..b970aeec7ba 100644
--- a/third_party/nccl/system.BUILD.tpl
+++ b/third_party/nccl/system.BUILD.tpl
@@ -5,7 +5,7 @@ filegroup(
 
 cc_library(
     name = "nccl",
-    srcs = ["libnccl.so.%{version}"],
+    srcs = ["libnccl.so.%{nccl_version}"],
     hdrs = ["nccl.h"],
     include_prefix = "third_party/nccl",
     visibility = ["//visibility:public"],
@@ -17,9 +17,12 @@ cc_library(
 genrule(
     name = "nccl-files",
     outs = [
-        "libnccl.so.%{version}",
+        "libnccl.so.%{nccl_version}",
         "nccl.h",
     ],
-    cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" &&
-           cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
+    cmd = """
+cp "%{nccl_header_dir}/nccl.h" "$(@D)/nccl.h" &&
+cp "%{nccl_library_dir}/libnccl.so.%{nccl_version}" \
+  "$(@D)/libnccl.so.%{nccl_version}"
+""",
 )
diff --git a/third_party/tensorrt/BUILD.tpl b/third_party/tensorrt/BUILD.tpl
index 4ff7da9ecc7..1db590389e0 100644
--- a/third_party/tensorrt/BUILD.tpl
+++ b/third_party/tensorrt/BUILD.tpl
@@ -11,15 +11,15 @@ exports_files(["LICENSE"])
 
 cc_library(
     name = "tensorrt_headers",
-    hdrs = [%{tensorrt_headers}],
+    hdrs = [":tensorrt_include"],
     visibility = ["//visibility:public"],
 )
 
 cc_library(
     name = "tensorrt",
-    srcs = %{tensorrt_libs},
+    srcs = [":tensorrt_lib"],
     copts = cuda_default_copts(),
-    data = %{tensorrt_libs},
+    data = [":tensorrt_lib"],
     linkstatic = 1,
     visibility = ["//visibility:public"],
     deps = [
diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl
index 9edcd60a764..004cc0e443a 100644
--- a/third_party/tensorrt/tensorrt_configure.bzl
+++ b/third_party/tensorrt/tensorrt_configure.bzl
@@ -9,14 +9,10 @@
 
 load(
     "//third_party/gpus:cuda_configure.bzl",
-    "auto_configure_fail",
-    "find_cuda_define",
-    "find_lib",
+    "find_cuda_config",
     "get_cpu_value",
     "lib_name",
-    "make_copy_dir_rule",
     "make_copy_files_rule",
-    "matches_version",
 )
 
 _TENSORRT_INSTALL_PATH = "TENSORRT_INSTALL_PATH"
@@ -30,108 +26,6 @@ _DEFINE_TENSORRT_SONAME_MAJOR = "#define NV_TENSORRT_SONAME_MAJOR"
 _DEFINE_TENSORRT_SONAME_MINOR = "#define NV_TENSORRT_SONAME_MINOR"
 _DEFINE_TENSORRT_SONAME_PATCH = "#define NV_TENSORRT_SONAME_PATCH"
 
-def _headers_exist(repository_ctx, path):
-    """Returns whether all TensorRT header files could be found in 'path'.
-
-    Args:
-      repository_ctx: The repository context.
-      path: The TensorRT include path to check.
-
-    Returns:
-      True if all TensorRT header files can be found in the path.
-    """
-    for h in _TF_TENSORRT_HEADERS:
-        if not repository_ctx.path("%s/%s" % (path, h)).exists:
-            return False
-    return True
-
-def _find_trt_header_dir(repository_ctx, trt_install_path):
-    """Returns the path to the directory containing headers of TensorRT.
-
-    Args:
-      repository_ctx: The repository context.
-      trt_install_path: The TensorRT library install directory.
-
-    Returns:
-      The path of the directory containing the TensorRT header.
-    """
-    if trt_install_path == "/usr/lib/x86_64-linux-gnu":
-        path = "/usr/include/x86_64-linux-gnu"
-        if _headers_exist(repository_ctx, path):
-            return path
-    if trt_install_path == "/usr/lib/aarch64-linux-gnu":
-        path = "/usr/include/aarch64-linux-gnu"
-        if _headers_exist(repository_ctx, path):
-            return path
-    path = str(repository_ctx.path("%s/../include" % trt_install_path).realpath)
-    if _headers_exist(repository_ctx, path):
-        return path
-    auto_configure_fail(
-        "Cannot find NvInfer.h with TensorRT install path %s" % trt_install_path,
-    )
-
-def _trt_lib_version(repository_ctx, trt_install_path):
-    """Detects the library (e.g. libnvinfer) version of TensorRT.
-
-    Args:
-      repository_ctx: The repository context.
-      trt_install_path: The TensorRT library install directory.
-
-    Returns:
-      A string containing the library version of TensorRT.
-    """
-    trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
-    major_version = find_cuda_define(
-        repository_ctx,
-        trt_header_dir,
-        "NvInfer.h",
-        _DEFINE_TENSORRT_SONAME_MAJOR,
-    )
-    minor_version = find_cuda_define(
-        repository_ctx,
-        trt_header_dir,
-        "NvInfer.h",
-        _DEFINE_TENSORRT_SONAME_MINOR,
-    )
-    patch_version = find_cuda_define(
-        repository_ctx,
-        trt_header_dir,
-        "NvInfer.h",
-        _DEFINE_TENSORRT_SONAME_PATCH,
-    )
-    full_version = "%s.%s.%s" % (major_version, minor_version, patch_version)
-    environ_version = repository_ctx.os.environ[_TF_TENSORRT_VERSION].strip()
-    if not matches_version(environ_version, full_version):
-        auto_configure_fail(
-            ("TensorRT library version detected from %s/%s (%s) does not match " +
-             "TF_TENSORRT_VERSION (%s). To fix this rerun configure again.") %
-            (trt_header_dir, "NvInfer.h", full_version, environ_version),
-        )
-
-    # Only use the major version to match the SONAME of the library.
-    return major_version
-
-def _find_trt_libs(repository_ctx, cpu_value, trt_install_path, trt_lib_version):
-    """Finds the given TensorRT library on the system.
-
-    Adapted from code contributed by Sami Kama (https://github.com/samikama).
-
-    Args:
-      repository_ctx: The repository context.
-      trt_install_path: The TensorRT library installation directory.
-      trt_lib_version: The version of TensorRT library files as returned
-        by _trt_lib_version.
-
-    Returns:
-      The path to the library.
-    """
-    result = {}
-    for lib in _TF_TENSORRT_LIBS:
-        file_name = lib_name(lib, cpu_value, trt_lib_version)
-        path = find_lib(repository_ctx, ["%s/%s" % (trt_install_path, file_name)])
-        result[file_name] = path
-    return result
-
 def _tpl(repository_ctx, tpl, substitutions):
     repository_ctx.template(
         tpl,
@@ -162,54 +56,33 @@ def _tensorrt_configure_impl(repository_ctx):
         )
         return
 
-    if _TENSORRT_INSTALL_PATH not in repository_ctx.os.environ:
+    if _TF_TENSORRT_VERSION not in repository_ctx.os.environ:
         _create_dummy_repository(repository_ctx)
         return
 
+    config = find_cuda_config(repository_ctx, ["tensorrt"])
+    trt_version = config["tensorrt_version"]
     cpu_value = get_cpu_value(repository_ctx)
-    if (cpu_value != "Linux"):
-        auto_configure_fail("TensorRT is supported only on Linux.")
-    if _TF_TENSORRT_VERSION not in repository_ctx.os.environ:
-        auto_configure_fail("TensorRT library (libnvinfer) version is not set.")
-    trt_install_path = repository_ctx.os.environ[_TENSORRT_INSTALL_PATH].strip()
-    if not repository_ctx.path(trt_install_path).exists:
-        auto_configure_fail(
-            "Cannot find TensorRT install path %s." % trt_install_path,
-        )
 
-    # Copy the library files.
-    trt_lib_version = _trt_lib_version(repository_ctx, trt_install_path)
-    trt_libs = _find_trt_libs(repository_ctx, cpu_value, trt_install_path, trt_lib_version)
-    trt_lib_srcs = []
-    trt_lib_outs = []
-    for path in trt_libs.values():
-        trt_lib_srcs.append(str(path))
-        trt_lib_outs.append("tensorrt/lib/" + path.basename)
-    copy_rules = [make_copy_files_rule(
-        repository_ctx,
-        name = "tensorrt_lib",
-        srcs = trt_lib_srcs,
-        outs = trt_lib_outs,
-    )]
-
-    # Copy the header files header files.
-    trt_header_dir = _find_trt_header_dir(repository_ctx, trt_install_path)
-    trt_header_srcs = [
-        "%s/%s" % (trt_header_dir, header)
-        for header in _TF_TENSORRT_HEADERS
-    ]
-    trt_header_outs = [
-        "tensorrt/include/" + header
-        for header in _TF_TENSORRT_HEADERS
-    ]
-    copy_rules.append(
+    # Copy the library and header files.
+    libraries = [lib_name(lib, cpu_value, trt_version) for lib in _TF_TENSORRT_LIBS]
+    library_dir = config["tensorrt_library_dir"] + "/"
+    headers = _TF_TENSORRT_HEADERS
+    include_dir = config["tensorrt_include_dir"] + "/"
+    copy_rules = [
+        make_copy_files_rule(
+            repository_ctx,
+            name = "tensorrt_lib",
+            srcs = [library_dir + library for library in libraries],
+            outs = ["tensorrt/lib/" + library for library in libraries],
+        ),
         make_copy_files_rule(
             repository_ctx,
             name = "tensorrt_include",
-            srcs = trt_header_srcs,
-            outs = trt_header_outs,
+            srcs = [include_dir + header for header in headers],
+            outs = ["tensorrt/incude/" + header for header in headers],
         ),
-    )
+    ]
 
     # Set up config file.
     _tpl(repository_ctx, "build_defs.bzl", {"%{if_tensorrt}": "if_true"})
@@ -217,8 +90,7 @@ def _tensorrt_configure_impl(repository_ctx):
     # Set up BUILD file.
     _tpl(repository_ctx, "BUILD", {
         "%{copy_rules}": "\n".join(copy_rules),
-        "%{tensorrt_headers}": '":tensorrt_include"',
-        "%{tensorrt_libs}": str(trt_lib_outs),
+        "%{tensorrt_libs}": str(libraries),
     })
 
 tensorrt_configure = repository_rule(
@@ -226,6 +98,8 @@ tensorrt_configure = repository_rule(
     environ = [
         _TENSORRT_INSTALL_PATH,
         _TF_TENSORRT_VERSION,
+        _TF_TENSORRT_CONFIG_REPO,
+        "TF_CUDA_PATHS",
     ],
 )
 """Detects and configures the local CUDA toolchain.
diff --git a/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/cuda/cuda_config.h b/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
index 783d6784176..72a7cf77346 100644
--- a/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
+++ b/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
@@ -19,6 +19,7 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES CudaVersion("3.0"), CudaVersion("6.0")
 
 #define TF_CUDA_VERSION "10.0"
+#define TF_CUDA_LIB_VERSION "10.0"
 #define TF_CUDNN_VERSION "7"
 
 #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-10.0"
diff --git a/third_party/toolchains/preconfig/centos7/cuda10.0-cudnn7/cuda/cuda/cuda_config.h b/third_party/toolchains/preconfig/centos7/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
index 783d6784176..72a7cf77346 100644
--- a/third_party/toolchains/preconfig/centos7/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
+++ b/third_party/toolchains/preconfig/centos7/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
@@ -19,6 +19,7 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES CudaVersion("3.0"), CudaVersion("6.0")
 
 #define TF_CUDA_VERSION "10.0"
+#define TF_CUDA_LIB_VERSION "10.0"
 #define TF_CUDNN_VERSION "7"
 
 #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-10.0"
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/cuda/cuda_config.h b/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
index 783d6784176..72a7cf77346 100644
--- a/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
+++ b/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/cuda/cuda_config.h
@@ -19,6 +19,7 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES CudaVersion("3.0"), CudaVersion("6.0")
 
 #define TF_CUDA_VERSION "10.0"
+#define TF_CUDA_LIB_VERSION "10.0"
 #define TF_CUDNN_VERSION "7"
 
 #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-10.0"
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/cuda/cuda_config.h b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/cuda/cuda_config.h
index 5d0d3013a98..09776bd0709 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/cuda/cuda_config.h
+++ b/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/cuda/cuda_config.h
@@ -19,6 +19,7 @@ limitations under the License.
 #define TF_CUDA_CAPABILITIES CudaVersion("3.0")
 
 #define TF_CUDA_VERSION "9.0"
+#define TF_CUDA_LIB_VERSION "9.0"
 #define TF_CUDNN_VERSION "7"
 
 #define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-9.0"