diff --git a/.gitignore b/.gitignore
index 95ff5b1eca5..e34d5a168fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,13 +9,6 @@ node_modules
 /bazel-testlogs
 /bazel-tf
 /tensorflow/contrib/cmake/build
-/third_party/gpus/cuda/bin
-/third_party/gpus/cuda/cuda.config
-/third_party/gpus/cuda/extras
-/third_party/gpus/cuda/include
-/third_party/gpus/cuda/lib
-/third_party/gpus/cuda/lib64
-/third_party/gpus/cuda/nvvm
 /third_party/py/numpy/numpy_include
 /tools/bazel.rc
 /tools/python_bin_path.sh
@@ -25,3 +18,4 @@ node_modules
 /_python_build
 *.pyc
 __pycache__
+*.swp
diff --git a/configure b/configure
index 9ab6ea6b1cd..bcef37bd26b 100755
--- a/configure
+++ b/configure
@@ -80,6 +80,7 @@ while [ "$TF_NEED_CUDA" == "" ]; do
   esac
 done
 
+export TF_NEED_CUDA
 if [ "$TF_NEED_CUDA" == "0" ]; then
   echo "Configuration finished"
   exit
@@ -97,6 +98,7 @@ while true; do
     fi
   fi
   if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
+    export CC=$GCC_HOST_COMPILER_PATH
     break
   fi
   echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
@@ -107,7 +109,6 @@ while true; do
   # Retry
 done
 
-
 # Find out where the CUDA toolkit is installed
 OSNAME=`uname -s`
 
@@ -140,6 +141,8 @@ while true; do
   fi
 
   if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
+    export CUDA_TOOLKIT_PATH
+    export CUDA_VERSION=$TF_CUDA_VERSION
     break
   fi
   echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
@@ -200,13 +203,16 @@ while true; do
   fi
 
   if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
+    export CUDNN_VERSION=$TF_CUDNN_VERSION
+    export CUDNN_INSTALL_PATH
     break
   fi
 
   if [ "$OSNAME" == "Linux" ]; then
     CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
     if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
-      CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
+      export CUDNN_VERSION=$TF_CUDNN_VERSION
+      export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
       break
     fi
   fi
@@ -225,42 +231,11 @@ while true; do
   CUDNN_INSTALL_PATH=""
 done
 
-cat > third_party/gpus/cuda/cuda.config <<EOF
-# CUDA_TOOLKIT_PATH refers to the CUDA toolkit.
-CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
-# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
-# files can be either in this directory, or under include/ and lib64/
-# directories separately.
-CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
-
-# The Cuda SDK version that should be used in this build (empty to use libcudart.so symlink)
-TF_CUDA_VERSION=$TF_CUDA_VERSION
-
-# The Cudnn version that should be used in this build
-TF_CUDNN_VERSION=$TF_CUDNN_VERSION
-EOF
-
-# Configure the gcc host compiler to use
-export WARNING=$DO_NOT_SUBMIT_WARNING
-perl -pi -e "s,CPU_COMPILER = \('.*'\),# \$ENV{WARNING}\nCPU_COMPILER = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
-perl -pi -e "s,GCC_HOST_COMPILER_PATH = \('.*'\),# \$ENV{WARNING}\nGCC_HOST_COMPILER_PATH = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
-
-# Configure the platform name.
-perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/gpus/cuda/platform.bzl
-
-# Configure the Cuda toolkit version to work with.
-perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
-perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_VERSION\",s" third_party/gpus/cuda/platform.bzl
-
-# Configure the Cudnn version to work with.
-perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
-perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_VERSION\",s" third_party/gpus/cuda/platform.bzl
-
-
 # Configure the compute capabilities that TensorFlow builds for.
 # Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
 while true; do
   fromuser=""
+  default_cuda_compute_capabilities="3.5,5.2"
   if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
 cat << EOF
 Please specify a list of comma-separated Cuda compute capabilities you want to build with.
@@ -270,6 +245,9 @@ EOF
     read -p "[Default is: \"3.5,5.2\"]: " TF_CUDA_COMPUTE_CAPABILITIES
     fromuser=1
   fi
+  if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
+    TF_CUDA_COMPUTE_CAPABILITIES=$default_cuda_compute_capabilities
+  fi
   # Check whether all capabilities from the input is valid
   COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES//,/ }
   ALL_VALID=1
@@ -285,34 +263,13 @@ EOF
       exit 1
     fi
   else
+    export CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
     break
   fi
   TF_CUDA_COMPUTE_CAPABILITIES=""
 done
 
-if [ ! -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
-  export WARNING=$DO_NOT_SUBMIT_WARNING
-  function CudaGenCodeOpts() {
-    OUTPUT=""
-    for CAPABILITY in $@; do
-      OUTPUT=${OUTPUT}"   \"${CAPABILITY}\",     "
-    done
-    echo $OUTPUT
-  }
-  export CUDA_GEN_CODES_OPTS=$(CudaGenCodeOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
-  perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\[).*?(\]),\n\1# $ENV{WARNING}\n\1\2$ENV{CUDA_GEN_CODES_OPTS}\3,s' third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
-  function CudaVersionOpts() {
-    OUTPUT=""
-    for CAPABILITY in $@; do
-      OUTPUT=$OUTPUT"CudaVersion(\"${CAPABILITY}\"), "
-    done
-    echo $OUTPUT
-  }
-  export CUDA_VERSION_OPTS=$(CudaVersionOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
-  perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\{).*?(\}),\n\1// $ENV{WARNING}\n\1\2$ENV{CUDA_VERSION_OPTS}\3,s' tensorflow/core/common_runtime/gpu/gpu_device.cc
-fi
-
-# Invoke the cuda_config.sh and set up the TensorFlow's canonical view of the Cuda libraries
-(cd third_party/gpus/cuda; ./cuda_config.sh;) || exit -1
+bazel clean --expunge
+bazel fetch //...
 
 echo "Configuration finished"
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index c9163d6e081..72973b28b66 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -759,10 +759,9 @@ struct CudaVersion {
   int minor_part = -1;
 };
 
-// "configure" uses the specific name to substitute the following string.
-// If you change it, make sure you modify "configure" as well.
 std::vector<CudaVersion> supported_cuda_compute_capabilities = {
-    CudaVersion("3.5"), CudaVersion("5.2")};
+  TF_CUDA_CAPABILITIES,
+};
 
 std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
   auto cuda_caps = supported_cuda_compute_capabilities;
diff --git a/tensorflow/core/kernels/lrn_op.cc b/tensorflow/core/kernels/lrn_op.cc
index 7b99aee6c6f..3435486c953 100644
--- a/tensorflow/core/kernels/lrn_op.cc
+++ b/tensorflow/core/kernels/lrn_op.cc
@@ -31,7 +31,7 @@ limitations under the License.
 #endif
 
 #if GOOGLE_CUDA
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuda.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/util/stream_executor_util.h"
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc
index 7852b90c73e..03a6d298395 100644
--- a/tensorflow/core/kernels/matmul_op.cc
+++ b/tensorflow/core/kernels/matmul_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/fill_functor.h"
 
 #if GOOGLE_CUDA
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuda.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index f372d2ef0da..158c42b5ad1 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -9,7 +9,7 @@ exports_files(["LICENSE"])
 
 load("//tensorflow:tensorflow.bzl", "tf_copts")
 load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("//third_party/gpus/cuda:platform.bzl", "cuda_library_path")
+load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
 
 cc_library(
     name = "gtest",
@@ -32,7 +32,7 @@ tf_cuda_library(
     deps = [
         "//tensorflow/stream_executor",
     ] + select({
-        "//third_party/gpus/cuda:darwin": ["IOKit"],
+        "@local_config_cuda//cuda:darwin": ["IOKit"],
         "//conditions:default": [],
     }),
 )
@@ -91,20 +91,20 @@ filegroup(
 cc_library(
     name = "cuda",
     data = [
-        "//third_party/gpus/cuda:{}".format(cuda_library_path("cudart")),
+        "@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")),
     ],
     linkopts = select({
-        "//third_party/gpus/cuda:darwin": [
-            "-Wl,-rpath,third_party/gpus/cuda/lib",
-            "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib",
+        "@local_config_cuda//cuda:darwin": [
+            "-Wl,-rpath,../local_config_cuda/cuda/lib",
+            "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib",
         ],
         "//conditions:default": [
-            "-Wl,-rpath,third_party/gpus/cuda/lib64",
-            "-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64",
+            "-Wl,-rpath,../local_config_cuda/cuda/lib64",
+            "-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib64",
         ],
     }),
     deps = [
-        "//third_party/gpus/cuda:cudart",
+        "@local_config_cuda//cuda:cudart",
     ],
 )
 
diff --git a/tensorflow/core/platform/default/gpu/BUILD b/tensorflow/core/platform/default/gpu/BUILD
index 93b62278480..6b0c919a89a 100644
--- a/tensorflow/core/platform/default/gpu/BUILD
+++ b/tensorflow/core/platform/default/gpu/BUILD
@@ -15,9 +15,9 @@ tf_cuda_library(
     copts = tf_copts(),
     cuda_deps = [
         "//tensorflow/core:stream_executor",
-        "//third_party/gpus/cuda:cuda_headers",
-        "//third_party/gpus/cuda:cupti_headers",
+        "@local_config_cuda//cuda:cuda_headers",
+        "@local_config_cuda//cuda:cupti_headers",
     ],
-    data = ["//third_party/gpus/cuda:cupti_dsos"],
+    data = ["@local_config_cuda//cuda:cupti_dsos"],
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h
index 5829172c474..e482f8607f4 100644
--- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h
+++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include <stddef.h>
 #include <stdint.h>
 
-#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
+#include "cuda/extras/CUPTI/include/cupti.h"
 
 namespace perftools {
 namespace gputools {
diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc
index 42375770f4b..d93b971f856 100644
--- a/tensorflow/core/util/port.cc
+++ b/tensorflow/core/util/port.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #include "tensorflow/core/util/port.h"
 
 #if GOOGLE_CUDA
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuda.h"
 #endif
 
 namespace tensorflow {
diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 24e8305d315..256b1287506 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -27,9 +27,10 @@ cc_library(
     ]),
     data = [
         "//tensorflow/core:cuda",
-        "//third_party/gpus/cuda:cublas",
-        "//third_party/gpus/cuda:cudnn",
-        "//third_party/gpus/cuda:cufft",
+        "@local_config_cuda//cuda:cublas",
+        "@local_config_cuda//cuda:cudnn",
+        "@local_config_cuda//cuda:cufft",
+        "@local_config_cuda//cuda:curand",
     ],
     linkopts = [
         "-ldl",
@@ -37,7 +38,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:lib",
-        "//third_party/gpus/cuda:cuda_headers",
+        "@local_config_cuda//cuda:cuda_headers",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc
index a9dd2953e51..e2611cd3d04 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -18,8 +18,8 @@ limitations under the License.
 // cuda.h). This ensures that Eigen's Half.h does not attempt to make its own
 // __half typedef if CUDA has already defined one (and conversely, that we do
 // not include <cuda_fp16.h> after Half.h has made its typedef).
-#include "third_party/gpus/cuda/include/cuda.h"
-#include "third_party/gpus/cuda/include/cublas_v2.h"
+#include "cuda/include/cuda.h"
+#include "cuda/include/cublas_v2.h"
 
 #if CUDA_VERSION >= 7050
 #define EIGEN_HAS_CUDA_FP16
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index b042dda29f0..e9e6d531c25 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -39,7 +39,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/stream.h"
 #include "tensorflow/stream_executor/stream_executor_pimpl.h"
 // clang-format off
-#include "third_party/gpus/cuda/include/cudnn.h"
+#include "cuda/include/cudnn.h"
 // clang-format on
 
 namespace {
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index a5de5d0f597..ab118e5d40a 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -25,7 +25,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/status.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/platform/port.h"
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuda.h"
 
 namespace perftools {
 namespace gputools {
diff --git a/tensorflow/stream_executor/cuda/cuda_fft.h b/tensorflow/stream_executor/cuda/cuda_fft.h
index 0c7aa34df38..95b3e8de63d 100644
--- a/tensorflow/stream_executor/cuda/cuda_fft.h
+++ b/tensorflow/stream_executor/cuda/cuda_fft.h
@@ -23,7 +23,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/fft.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/plugin_registry.h"
-#include "third_party/gpus/cuda/include/cufft.h"
+#include "cuda/include/cufft.h"
 
 namespace perftools {
 namespace gputools {
diff --git a/tensorflow/stream_executor/cuda/cuda_helpers.h b/tensorflow/stream_executor/cuda/cuda_helpers.h
index 7753866560d..6a6134bf881 100644
--- a/tensorflow/stream_executor/cuda/cuda_helpers.h
+++ b/tensorflow/stream_executor/cuda/cuda_helpers.h
@@ -24,8 +24,8 @@ limitations under the License.
 #include <stddef.h>
 #include <complex>
 
-#include "third_party/gpus/cuda/include/cuComplex.h"
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuComplex.h"
+#include "cuda/include/cuda.h"
 
 namespace perftools {
 namespace gputools {
diff --git a/tensorflow/stream_executor/cuda/cuda_kernel.h b/tensorflow/stream_executor/cuda/cuda_kernel.h
index 412e7d9a402..88d29fddd06 100644
--- a/tensorflow/stream_executor/cuda/cuda_kernel.h
+++ b/tensorflow/stream_executor/cuda/cuda_kernel.h
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/casts.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/platform/logging.h"
-#include "third_party/gpus/cuda/include/cuda.h"
+#include "cuda/include/cuda.h"
 
 #ifdef PLATFORMS_GPUS_CUDA_DYNAMIC_LIBCUDA_DYNAMIC_LIBCUDA_H_
 #error \
diff --git a/tensorflow/stream_executor/cuda/cuda_rng.cc b/tensorflow/stream_executor/cuda/cuda_rng.cc
index 334c6af9703..367eba4d519 100644
--- a/tensorflow/stream_executor/cuda/cuda_rng.cc
+++ b/tensorflow/stream_executor/cuda/cuda_rng.cc
@@ -28,7 +28,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/status.h"
 #include "tensorflow/stream_executor/platform/logging.h"
 #include "tensorflow/stream_executor/rng.h"
-#include "third_party/gpus/cuda/include/curand.h"
+#include "cuda/include/curand.h"
 
 // Formats curandStatus_t to output prettified values into a log stream.
 std::ostream &operator<<(std::ostream &in, const curandStatus_t &status) {
diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc
index cce31ef4dcf..5113f2febe2 100644
--- a/tensorflow/stream_executor/dso_loader.cc
+++ b/tensorflow/stream_executor/dso_loader.cc
@@ -28,23 +28,22 @@ limitations under the License.
 #include "tensorflow/core/platform/load_library.h"
 #include "tensorflow/stream_executor/lib/error.h"
 #include "tensorflow/stream_executor/lib/str_util.h"
+#include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/strcat.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/platform/logging.h"
 #include "tensorflow/stream_executor/platform/port.h"
-#include "tensorflow/stream_executor/lib/str_util.h"
 
 namespace perftools {
 namespace gputools {
 namespace internal {
 
-// TensorFlow OSS configure uses the following lines to configure versions. For
-// any modifications of the format, please make sure the script still works.
-string GetCudaVersion() { return ""; }
-string GetCudnnVersion() { return ""; }
+string GetCudaVersion() { return TF_CUDA_VERSION; }
+string GetCudnnVersion() { return TF_CUDNN_VERSION; }
 
 /* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cublas", GetCudaVersion()),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+                                      "cublas", GetCudaVersion()),
                                   GetCudaLibraryDirPath()),
                       dso_handle);
 }
@@ -53,35 +52,38 @@ string GetCudnnVersion() { return ""; }
   // libcudnn is versioned differently than the other libraries and may have a
   // different version number than other CUDA libraries.  See b/22397368 for
   // some details about the complications surrounding this.
-  return GetDsoHandle(
-      FindDsoPath(tensorflow::internal::FormatLibraryFileName("cudnn", GetCudnnVersion()),
-                              GetCudaLibraryDirPath()),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+                                      "cudnn", GetCudnnVersion()),
+                                  GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cufft", GetCudaVersion()),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+                                      "cufft", GetCudaVersion()),
                                   GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("curand", GetCudaVersion()),
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+                                      "curand", GetCudaVersion()),
                                   GetCudaLibraryDirPath()),
                       dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
-  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
-                                  GetCudaDriverLibraryPath()),
-                      dso_handle);
+  return GetDsoHandle(
+      FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
+                  GetCudaDriverLibraryPath()),
+      dso_handle);
 }
 
 /* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
-  return GetDsoHandle(
-      FindDsoPath(tensorflow::internal::FormatLibraryFileName("cupti", GetCudaVersion()),
-                  GetCudaCuptiLibraryPath()),
-      dso_handle);
+  return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
+                                      "cupti", GetCudaVersion()),
+                                  GetCudaCuptiLibraryPath()),
+                      dso_handle);
 }
 
 /* static */ void DsoLoader::RegisterRpath(port::StringPiece path) {
@@ -89,11 +91,9 @@ string GetCudnnVersion() { return ""; }
   GetRpaths()->push_back(path.ToString());
 }
 
-
 /* static */ port::Status DsoLoader::GetDsoHandle(port::StringPiece path,
                                                   void** dso_handle,
                                                   LoadKind load_kind) {
-
   int dynload_flags =
       RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
   string path_string = path.ToString();
@@ -138,9 +138,9 @@ string GetCudnnVersion() { return ""; }
 static std::vector<string>* CreatePrimordialRpaths() {
   auto rpaths = new std::vector<string>;
 #if defined(__APPLE__)
-  rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib");
+  rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib");
 #else
-  rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib64");
+  rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64");
 #endif
   return rpaths;
 }
@@ -165,7 +165,6 @@ static std::vector<string>* CreatePrimordialRpaths() {
 
 /* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
                                            port::StringPiece runfiles_relpath) {
-
   // Keep a record of the paths we attempted so we can dump out meaningful
   // diagnostics if no path is found.
   std::vector<string> attempted;
@@ -191,29 +190,28 @@ static std::vector<string>* CreatePrimordialRpaths() {
 
 /* static */ string DsoLoader::GetCudaLibraryDirPath() {
 #if defined(__APPLE__)
-  return "third_party/gpus/cuda/lib";
+  return "external/local_config_cuda/cuda/lib";
 #else
-  return "third_party/gpus/cuda/lib64";
+  return "external/local_config_cuda/cuda/lib64";
 #endif
 }
 
 /* static */ string DsoLoader::GetCudaDriverLibraryPath() {
 #if defined(__APPLE__)
-  return "third_party/gpus/cuda/driver/lib";
+  return "external/local_config_cuda/cuda/driver/lib";
 #else
-  return "third_party/gpus/cuda/driver/lib64";
+  return "external/local_config_cuda/cuda/driver/lib64";
 #endif
 }
 
 /* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
 #if defined(__APPLE__)
-  return "third_party/gpus/cuda/extras/CUPTI/lib";
+  return "external/local_config_cuda/cuda/extras/CUPTI/lib";
 #else
-  return "third_party/gpus/cuda/extras/CUPTI/lib64";
+  return "external/local_config_cuda/cuda/extras/CUPTI/lib64";
 #endif
 }
 
-
 // -- CachedDsoLoader
 
 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h
index a3d46782555..64419e46f9e 100644
--- a/tensorflow/stream_executor/dso_loader.h
+++ b/tensorflow/stream_executor/dso_loader.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/platform/port.h"
 #include <vector>
 
+#include "cuda/cuda_config.h"
 #include "tensorflow/stream_executor/lib/status.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
 #include "tensorflow/stream_executor/lib/stringpiece.h"
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 194309b134b..f4a846ad5a5 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -32,7 +32,7 @@ load(
     "tf_cuda_tests_tags",
 )
 load(
-    "//third_party/gpus/cuda:build_defs.bzl",
+    "@local_config_cuda//cuda:build_defs.bzl",
     "if_cuda",
 )
 
@@ -295,11 +295,11 @@ def tf_cc_tests(tests, deps, linkstatic=0, tags=[], size="medium", args=None,
     tf_cc_test(t, deps, linkstatic, tags=tags, size=size, args=args,
                linkopts=linkopts)
 
-def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium", args=None):
+def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium",
+                    args=None):
   tf_cc_tests(tests, deps, linkstatic, tags=tags, size=size, args=args)
 
 
-
 def tf_cuda_cc_tests(tests, deps, tags=[], size="medium", linkstatic=0,
                      args=None, linkopts=[]):
   for t in tests:
@@ -316,29 +316,29 @@ def _cuda_copts():
     common_cuda_opts = ["-x", "cuda", "-DGOOGLE_CUDA=1"]
     return select({
         "//conditions:default": [],
-        "//third_party/gpus/cuda:using_nvcc": (
+        "@local_config_cuda//cuda:using_nvcc": (
             common_cuda_opts +
             [
                 "-nvcc_options=relaxed-constexpr",
                 "-nvcc_options=ftz=true",
             ]
         ),
-        "//third_party/gpus/cuda:using_gcudacc": (
+        "@local_config_cuda//cuda:using_gcudacc": (
             common_cuda_opts +
             ["--gcudacc_flag=-ftz=true"]
         ),
-        "//third_party/gpus/cuda:using_clang": (
+        "@local_config_cuda//cuda:using_clang": (
             common_cuda_opts +
             [
                 "-fcuda-flush-denormals-to-zero",
-                "--cuda-path=third_party/gpus/cuda",
+                "--cuda-path=external/local_config_cuda/cuda",
                 "--cuda-gpu-arch=sm_35",
             ]
         ),
     }) + select({
         # Pass -O3 when building CUDA code with clang; some important
         # optimizations are not enabled at O2.
-        "//third_party/gpus/cuda:using_clang_opt": ["-O3"],
+        "@local_config_cuda//cuda:using_clang_opt": ["-O3"],
         "//conditions:default": [],
     })
 
@@ -409,7 +409,8 @@ def tf_kernel_library(name, prefix=None, srcs=None, gpu_srcs=None, hdrs=None,
     * srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
     * hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
     * gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
-                  "cwise_ops.h", "cwise_ops_common.h", "cwise_ops_gpu_common.cu.h"]
+                  "cwise_ops.h", "cwise_ops_common.h",
+                  "cwise_ops_gpu_common.cu.h"]
     * "cwise_ops_test.cc" is excluded
   """
   if not srcs:
@@ -613,7 +614,7 @@ check_deps = rule(
 def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
   cuda_deps = [
       "//tensorflow/core:stream_executor_headers_lib",
-      "//third_party/gpus/cuda:cudart_static",
+      "@local_config_cuda//cuda:cudart_static",
   ]
   deps = deps + tf_custom_op_library_additional_deps()
   if gpu_srcs:
@@ -663,7 +664,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
               module_name=module_name,
               py_module_name=name)
   extra_linkopts = select({
-      "//third_party/gpus/cuda:darwin": [
+      "@local_config_cuda//cuda:darwin": [
           "-Wl,-exported_symbols_list",
           "//tensorflow:tf_exported_symbols.lds"
       ],
@@ -672,7 +673,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
           "//tensorflow:tf_version_script.lds"
       ]})
   extra_deps += select({
-      "//third_party/gpus/cuda:darwin": [
+      "@local_config_cuda//cuda:darwin": [
         "//tensorflow:tf_exported_symbols.lds"
       ],
       "//conditions:default": [
@@ -746,13 +747,14 @@ def py_tests(name,
                data=data,
                additional_deps=additional_deps)
 
-def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[], shard_count=1, tags=[], prefix=""):
+def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[],
+                  shard_count=1, tags=[], prefix=""):
   test_tags = tags + tf_cuda_tests_tags()
   py_tests(name=name, size=size, srcs=srcs, additional_deps=additional_deps,
            data=data, tags=test_tags, shard_count=shard_count,prefix=prefix)
 
-# Creates a genrule named <name> for running tools/proto_text's generator to make
-# the proto_text functions, for the protos passed in <srcs>.
+# Creates a genrule named <name> for running tools/proto_text's generator to
+# make the proto_text functions, for the protos passed in <srcs>.
 #
 # Return a struct with fields (hdrs, srcs) containing the names of the
 # generated files.
diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu b/tensorflow/tools/ci_build/Dockerfile.gpu
index 619f003d5b7..7f169165e5e 100644
--- a/tensorflow/tools/ci_build/Dockerfile.gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.gpu
@@ -22,5 +22,6 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
 
 # Configure the build for our CUDA configuration.
 ENV CUDA_TOOLKIT_PATH /usr/local/cuda
-ENV CUDNN_INSTALL_PATH /usr/local/cuda
+ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
 ENV TF_NEED_CUDA 1
+ENV CUDA_COMPUTE_CAPABILITIES 3.0,5.2
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 05787aa3abe..b54db7d474d 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -1,9 +1,12 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.
 
+load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+
 # If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
 # within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
 # local_repository rule (e.g. "@tf").
 def tf_workspace(path_prefix = "", tf_repo_name = ""):
+  cuda_configure(name = "local_config_cuda")
 
   # These lines need to be changed when updating Eigen. They are parsed from
   # this file by the cmake and make builds to determine the eigen version and hash.
diff --git a/third_party/gpus/BUILD b/third_party/gpus/BUILD
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/third_party/gpus/crosstool/BUILD b/third_party/gpus/crosstool/BUILD
index 7c9c8ab884e..e69de29bb2d 100644
--- a/third_party/gpus/crosstool/BUILD
+++ b/third_party/gpus/crosstool/BUILD
@@ -1,42 +0,0 @@
-licenses(["restricted"])
-
-package(default_visibility = ["//visibility:public"])
-
-filegroup(
-    name = "crosstool",
-    srcs = ["CROSSTOOL"],
-    output_licenses = ["unencumbered"],
-)
-
-cc_toolchain(
-    name = "cc-compiler-local",
-    all_files = ":empty",
-    compiler_files = ":empty",
-    cpu = "local",
-    dwp_files = ":empty",
-    dynamic_runtime_libs = [":empty"],
-    linker_files = ":empty",
-    objcopy_files = ":empty",
-    static_runtime_libs = [":empty"],
-    strip_files = ":empty",
-    supports_param_files = 0,
-)
-
-cc_toolchain(
-    name = "cc-compiler-darwin",
-    all_files = ":empty",
-    compiler_files = ":empty",
-    cpu = "darwin",
-    dwp_files = ":empty",
-    dynamic_runtime_libs = [":empty"],
-    linker_files = ":empty",
-    objcopy_files = ":empty",
-    static_runtime_libs = [":empty"],
-    strip_files = ":empty",
-    supports_param_files = 0,
-)
-
-filegroup(
-    name = "empty",
-    srcs = [],
-)
diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
new file mode 100644
index 00000000000..7c9c8ab884e
--- /dev/null
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -0,0 +1,42 @@
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "crosstool",
+    srcs = ["CROSSTOOL"],
+    output_licenses = ["unencumbered"],
+)
+
+cc_toolchain(
+    name = "cc-compiler-local",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "local",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 0,
+)
+
+cc_toolchain(
+    name = "cc-compiler-darwin",
+    all_files = ":empty",
+    compiler_files = ":empty",
+    cpu = "darwin",
+    dwp_files = ":empty",
+    dynamic_runtime_libs = [":empty"],
+    linker_files = ":empty",
+    objcopy_files = ":empty",
+    static_runtime_libs = [":empty"],
+    strip_files = ":empty",
+    supports_param_files = 0,
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
diff --git a/third_party/gpus/crosstool/CROSSTOOL b/third_party/gpus/crosstool/CROSSTOOL.tpl
similarity index 96%
rename from third_party/gpus/crosstool/CROSSTOOL
rename to third_party/gpus/crosstool/CROSSTOOL.tpl
index f72bb9321a7..a367aa8f661 100644
--- a/third_party/gpus/crosstool/CROSSTOOL
+++ b/third_party/gpus/crosstool/CROSSTOOL.tpl
@@ -47,7 +47,7 @@ toolchain {
   tool_path { name: "cpp" path: "/usr/bin/cpp" }
   tool_path { name: "dwp" path: "/usr/bin/dwp" }
   # As part of the TensorFlow release, we place some cuda-related compilation
-  # files in third_party/gpus/crosstool/clang/bin, and this relative
+  # files in @local_config_cuda//crosstool/clang/bin, and this relative
   # path, combined with the rest of our Bazel configuration causes our
   # compilation to use those files.
   tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
@@ -125,6 +125,9 @@ toolchain {
   # linker_flag: "-Wl,--warn-execstack"
   # linker_flag: "-Wl,--detect-odr-violations"
 
+  # Include directory for cuda headers.
+  cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
+
   compilation_mode_flags {
     mode: DBG
     # Enable debug symbols.
@@ -221,6 +224,9 @@ toolchain {
   # Anticipated future default.
   linker_flag: "-no-canonical-prefixes"
 
+  # Include directory for cuda headers.
+  cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
+
   compilation_mode_flags {
     mode: DBG
     # Enable debug symbols.
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
similarity index 96%
rename from third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
rename to third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
index 389444e731b..20449a1137d 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
@@ -45,10 +45,9 @@ import re
 import sys
 import pipes
 
-# "configure" uses the specific format to substitute the following string.
-# If you change it, make sure you modify "configure" as well.
-CPU_COMPILER = ('/usr/bin/gcc')
-GCC_HOST_COMPILER_PATH = ('/usr/bin/gcc')
+# Template values set by cuda_autoconf.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
 
 CURRENT_DIR = os.path.dirname(sys.argv[0])
 NVCC_PATH = CURRENT_DIR + '/../../../cuda/bin/nvcc'
@@ -229,9 +228,7 @@ def InvokeNvcc(argv, log=False):
   srcs = ' '.join(src_files)
   out = ' -o ' + out_file[0]
 
-  # "configure" uses the specific format to substitute the following string.
-  # If you change it, make sure you modify "configure" as well.
-  supported_cuda_compute_capabilities = [ "3.5", "5.2" ]
+  supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
   nvccopts = ''
   for capability in supported_cuda_compute_capabilities:
     capability = capability.replace('.', '')
diff --git a/third_party/gpus/cuda/BUILD b/third_party/gpus/cuda/BUILD
index 79c6227687b..e69de29bb2d 100644
--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@@ -1,224 +0,0 @@
-licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
-
-load("//third_party/gpus/cuda:build_defs.bzl", "if_cuda")
-load("platform", "cuda_library_path")
-load("platform", "cuda_static_library_path")
-load("platform", "cudnn_library_path")
-load("platform", "cupti_library_path")
-load("platform", "readlink_command")
-
-package(default_visibility = ["//visibility:public"])
-
-config_setting(
-    name = "using_gcudacc",
-    values = {
-        "define": "using_cuda_gcudacc=true",
-    },
-    visibility = ["//visibility:public"],
-)
-
-config_setting(
-    name = "using_nvcc",
-    values = {
-        "define": "using_cuda_nvcc=true",
-    },
-)
-
-config_setting(
-    name = "using_clang",
-    values = {
-        "define": "using_cuda_clang=true",
-    },
-)
-
-# Equivalent to using_clang && -c opt.
-config_setting(
-    name = "using_clang_opt",
-    values = {
-        "define": "using_cuda_clang=true",
-        "compilation_mode": "opt",
-    },
-)
-
-config_setting(
-    name = "darwin",
-    values = {"cpu": "darwin"},
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cuda_headers",
-    hdrs = glob([
-        "**/*.h",
-    ]),
-    includes = [
-        ".",
-        "include",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cudart_static",
-    srcs = [
-        cuda_static_library_path("cudart"),
-    ],
-    includes = ["include/"],
-    linkopts = [
-        "-ldl",
-        "-lpthread",
-    ] + select({
-        "//tensorflow:darwin": [],
-        "//conditions:default": ["-lrt"],
-    }),
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cudart",
-    srcs = [
-        cuda_library_path("cudart"),
-    ],
-    data = [
-        cuda_library_path("cudart"),
-    ],
-    includes = ["include/"],
-    linkstatic = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cublas",
-    srcs = [
-        cuda_library_path("cublas"),
-    ],
-    data = [
-        cuda_library_path("cublas"),
-    ],
-    includes = ["include/"],
-    linkstatic = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cudnn",
-    srcs = [
-        cudnn_library_path(),
-    ],
-    data = [
-        cudnn_library_path(),
-    ],
-    includes = ["include/"],
-    linkstatic = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cufft",
-    srcs = [
-        cuda_library_path("cufft"),
-    ],
-    data = [
-        cuda_library_path("cufft"),
-    ],
-    includes = ["include/"],
-    linkstatic = 1,
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cuda",
-    visibility = ["//visibility:public"],
-    deps = [
-        ":cublas",
-        ":cuda_headers",
-        ":cudart",
-        ":cudnn",
-        ":cufft",
-    ],
-)
-
-cc_library(
-    name = "cupti_headers",
-    hdrs = glob([
-        "**/*.h",
-    ]),
-    includes = [
-        ".",
-        "extras/CUPTI/include/",
-    ],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "cupti_dsos",
-    data = [
-        cupti_library_path(),
-    ],
-    visibility = ["//visibility:public"],
-)
-
-# TODO(opensource): for now, we have to invoke the cuda_config.sh manually in the source tree.
-# This rule checks if Cuda libraries in the source tree has been properly configured.
-# The output list makes bazel runs this rule first if the Cuda files are missing.
-# This gives us an opportunity to check and print a meaningful error message.
-# But we will need to create the output file list to make bazel happy in a successful run.
-genrule(
-    name = "cuda_check",
-    srcs = [
-        "cuda.config",
-        "cuda_config.sh",
-    ],
-    outs = [
-        "include/cuda.h",
-        "include/cublas.h",
-        "include/cudnn.h",
-        "extras/CUPTI/include/cupti.h",
-        cuda_static_library_path("cudart"),
-        cuda_library_path("cublas"),
-        cudnn_library_path(),
-        cuda_library_path("cudart"),
-        cuda_library_path("cufft"),
-        cupti_library_path(),
-    ],
-    cmd = if_cuda(
-        # Under cuda config, create all the symbolic links to the actual cuda files
-        "OUTPUTDIR=`{} -f $(@D)/../../..`; cd `dirname $(location :cuda_config.sh)`; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;".format(readlink_command()),
-
-        # Under non-cuda config, create all dummy files to make the build go through
-        ";".join([
-            "mkdir -p $(@D)/include",
-            "mkdir -p $(@D)/lib64",
-            "mkdir -p $(@D)/extras/CUPTI/include",
-            "mkdir -p $(@D)/extras/CUPTI/lib64",
-            "touch $(@D)/include/cuda.h",
-            "touch $(@D)/include/cublas.h",
-            "touch $(@D)/include/cudnn.h",
-            "touch $(@D)/extras/CUPTI/include/cupti.h",
-            "touch $(@D)/{}".format(cuda_static_library_path("cudart")),
-            "touch $(@D)/{}".format(cuda_library_path("cublas")),
-            "touch $(@D)/{}".format(cudnn_library_path()),
-            "touch $(@D)/{}".format(cuda_library_path("cudart")),
-            "touch $(@D)/{}".format(cuda_library_path("cufft")),
-            "touch $(@D)/{}".format(cupti_library_path()),
-        ]),
-    ),
-    local = 1,
-)
-
-genrule(
-    name = "cuda_config_check",
-    outs = [
-        "cuda.config",
-    ],
-    cmd = if_cuda(
-        # Under cuda config, create the symbolic link to the actual cuda.config
-        "configfile=$(location :cuda.config); ln -sf `{} -f $${{configfile#*/*/*/}}` $(@D)/;".format(readlink_command()),
-
-        # Under non-cuda config, create the dummy file
-        ";".join([
-            "touch $(@D)/cuda.config",
-        ]),
-    ),
-    local = 1,
-)
diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl
new file mode 100644
index 00000000000..db6db87162f
--- /dev/null
+++ b/third_party/gpus/cuda/BUILD.tpl
@@ -0,0 +1,172 @@
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
+load("@local_config_cuda//cuda:platform.bzl", "cuda_static_library_path")
+load("@local_config_cuda//cuda:platform.bzl", "cudnn_library_path")
+load("@local_config_cuda//cuda:platform.bzl", "cupti_library_path")
+load("@local_config_cuda//cuda:platform.bzl", "readlink_command")
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_gcudacc",
+    values = {
+        "define": "using_cuda_gcudacc=true",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "using_nvcc",
+    values = {
+        "define": "using_cuda_nvcc=true",
+    },
+)
+
+config_setting(
+    name = "using_clang",
+    values = {
+        "define": "using_cuda_clang=true",
+    },
+)
+
+# Equivalent to using_clang && -c opt.
+config_setting(
+    name = "using_clang_opt",
+    values = {
+        "define": "using_cuda_clang=true",
+        "compilation_mode": "opt",
+    },
+)
+
+config_setting(
+    name = "darwin",
+    values = {"cpu": "darwin"},
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cuda_headers",
+    hdrs = glob([
+        "**/*.h",
+    ]),
+    includes = [
+        ".",
+        "include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cudart_static",
+    srcs = [
+        cuda_static_library_path("cudart"),
+    ],
+    includes = ["include/"],
+    linkopts = [
+        "-ldl",
+        "-lpthread",
+    ] + select({
+        "@//tensorflow:darwin": [],
+        "//conditions:default": ["-lrt"],
+    }),
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cudart",
+    srcs = [
+        cuda_library_path("cudart"),
+    ],
+    data = [
+        cuda_library_path("cudart"),
+    ],
+    includes = ["include/"],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cublas",
+    srcs = [
+        cuda_library_path("cublas"),
+    ],
+    data = [
+        cuda_library_path("cublas"),
+    ],
+    includes = ["include/"],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cudnn",
+    srcs = [
+        cudnn_library_path(),
+    ],
+    data = [
+        cudnn_library_path(),
+    ],
+    includes = ["include/"],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cufft",
+    srcs = [
+        cuda_library_path("cufft"),
+    ],
+    data = [
+        cuda_library_path("cufft"),
+    ],
+    includes = ["include/"],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "curand",
+    srcs = [
+        cuda_library_path("curand"),
+    ],
+    data = [
+        cuda_library_path("curand"),
+    ],
+    includes = ["include/"],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cuda",
+    deps = [
+        ":cuda_headers",
+        ":cudart",
+        ":cublas",
+        ":cudnn",
+        ":cufft",
+        ":curand",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cupti_headers",
+    hdrs = glob([
+        "**/*.h",
+    ]),
+    includes = [
+        ".",
+        "extras/CUPTI/include/",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "cupti_dsos",
+    data = [
+        cupti_library_path(),
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/gpus/cuda/build_defs.bzl b/third_party/gpus/cuda/build_defs.bzl.tpl
similarity index 76%
rename from third_party/gpus/cuda/build_defs.bzl
rename to third_party/gpus/cuda/build_defs.bzl.tpl
index 49b977e93e8..8f7dc006309 100644
--- a/third_party/gpus/cuda/build_defs.bzl
+++ b/third_party/gpus/cuda/build_defs.bzl.tpl
@@ -8,7 +8,7 @@ def if_cuda(if_true, if_false = []):
 
     """
     return select({
-        "//third_party/gpus/cuda:using_nvcc": if_true,
-        "//third_party/gpus/cuda:using_gcudacc": if_true,
+        "@local_config_cuda//cuda:using_nvcc": if_true,
+        "@local_config_cuda//cuda:using_gcudacc": if_true,
         "//conditions:default": if_false
     })
diff --git a/third_party/gpus/cuda/cuda_config.h.tpl b/third_party/gpus/cuda/cuda_config.h.tpl
new file mode 100644
index 00000000000..ea51fbb26f0
--- /dev/null
+++ b/third_party/gpus/cuda/cuda_config.h.tpl
@@ -0,0 +1,24 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef CUDA_CUDA_CONFIG_H_
+#define CUDA_CUDA_CONFIG_H_
+
+#define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities}
+
+#define TF_CUDA_VERSION "%{cuda_version}"
+#define TF_CUDNN_VERSION "%{cudnn_version}"
+
+#endif  // CUDA_CUDA_CONFIG_H_
diff --git a/third_party/gpus/cuda/cuda_config.sh b/third_party/gpus/cuda/cuda_config.sh
deleted file mode 100755
index 8a0c70cbbd3..00000000000
--- a/third_party/gpus/cuda/cuda_config.sh
+++ /dev/null
@@ -1,234 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-
-# A simple script to configure the Cuda tree needed for the TensorFlow GPU
-# build. We need both Cuda toolkit $TF_CUDA_VERSION and Cudnn $TF_CUDNN_VERSION.
-# Useage:
-#    * User edit cuda.config to point both Cuda toolkit and Cudnn libraries to their local path
-#    * run cuda_config.sh to generate symbolic links in the source tree to reflect
-#    * the file organizations needed by TensorFlow.
-
-print_usage() {
-cat << EOF
-Usage: $0 [--check]
-  Configure TensorFlow's canonical view of Cuda libraries using cuda.config.
-Arguments:
-  --check: Only check that the proper Cuda dependencies has already been
-       properly configured in the source tree. It also creates symbolic links to
-       the files in the gen-tree to make bazel happy.
-EOF
-}
-
-CHECK_ONLY=0
-# Parse the arguments. Add more arguments as the "case" line when needed.
-while [[ $# -gt 0 ]]; do
-  argument="$1"
-  shift
-  case $argument in
-    --check)
-      CHECK_ONLY=1
-      ;;
-    *)
-      echo "Error: unknown arguments"
-      print_usage
-      exit -1
-      ;;
-  esac
-done
-
-source cuda.config || exit -1
-
-OUTPUTDIR=${OUTPUTDIR:-../../..}
-CUDA_TOOLKIT_PATH=${CUDA_TOOLKIT_PATH:-/usr/local/cuda}
-CUDNN_INSTALL_BASEDIR=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
-
-if [[ -z "$TF_CUDA_VERSION" ]]; then
-  TF_CUDA_EXT=""
-else
-  TF_CUDA_EXT=".$TF_CUDA_VERSION"
-fi
-
-if [[ -z "$TF_CUDNN_VERSION" ]]; then
-  TF_CUDNN_EXT=""
-else
-  TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
-fi
-
-# An error message when the Cuda toolkit is not found
-function CudaError {
-  echo ERROR: $1
-cat << EOF
-##############################################################################
-##############################################################################
-Cuda $TF_CUDA_VERSION toolkit is missing.
-1. Download and install the CUDA $TF_CUDA_VERSION toolkit and CUDNN $TF_CUDNN_VERSION library;
-2. Run configure from the root of the source tree, before rerunning bazel;
-Please refer to README.md for more details.
-##############################################################################
-##############################################################################
-EOF
-  exit -1
-}
-
-# An error message when CUDNN is not found
-function CudnnError {
-  echo ERROR: $1
-cat << EOF
-##############################################################################
-##############################################################################
-Cudnn $TF_CUDNN_VERSION is missing.
-1. Download and install the CUDA $TF_CUDA_VERSION toolkit and CUDNN $TF_CUDNN_VERSION library;
-2. Run configure from the root of the source tree, before rerunning bazel;
-Please refer to README.md for more details.
-##############################################################################
-##############################################################################
-EOF
-  exit -1
-}
-
-# Check that Cuda libraries has already been properly configured in the source tree.
-# We still need to create links to the gen-tree to make bazel happy.
-function CheckAndLinkToSrcTree {
-  ERROR_FUNC=$1
-  FILE=$2
-  if test ! -e $FILE; then
-    $ERROR_FUNC "$PWD/$FILE cannot be found"
-  fi
-
-  # Link the output file to the source tree, avoiding self links if they are
-  # the same. This could happen if invoked from the source tree by accident.
-  if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
-    mkdir -p $(dirname $OUTPUTDIR/third_party/gpus/cuda/$FILE)
-    ln -sf $PWD/$FILE $OUTPUTDIR/third_party/gpus/cuda/$FILE
-  fi
-}
-
-OSNAME=`uname -s`
-if [ "$OSNAME" == "Linux" ]; then
-  CUDA_LIB_PATH="lib64"
-  CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib64"
-  CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
-  CUDA_RT_LIB_STATIC_PATH="lib64/libcudart_static.a"
-  CUDA_BLAS_LIB_PATH="lib64/libcublas.so${TF_CUDA_EXT}"
-  CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
-  CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
-  CUDA_FFT_LIB_PATH="lib64/libcufft.so${TF_CUDA_EXT}"
-  CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib64/libcupti.so${TF_CUDA_EXT}"
-  READLINK_CMD="readlink"
-elif [ "$OSNAME" == "Darwin" ]; then
-  CUDA_LIB_PATH="lib"
-  CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib"
-  CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
-  CUDA_RT_LIB_STATIC_PATH="lib/libcudart_static.a"
-  CUDA_BLAS_LIB_PATH="lib/libcublas${TF_CUDA_EXT}.dylib"
-  CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
-  CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
-  CUDA_FFT_LIB_PATH="lib/libcufft${TF_CUDA_EXT}.dylib"
-  CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib/libcupti${TF_CUDA_EXT}.dylib"
-  READLINK_CMD="greadlink"
-fi
-
-if [ "$CHECK_ONLY" == "1" ]; then
-  CheckAndLinkToSrcTree CudaError include/cuda.h
-  CheckAndLinkToSrcTree CudaError include/cublas.h
-  CheckAndLinkToSrcTree CudnnError include/cudnn.h
-  CheckAndLinkToSrcTree CudaError extras/CUPTI/include/cupti.h
-  CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_STATIC_PATH
-  CheckAndLinkToSrcTree CudaError $CUDA_BLAS_LIB_PATH
-  CheckAndLinkToSrcTree CudnnError $CUDA_DNN_LIB_PATH
-  CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_PATH
-  CheckAndLinkToSrcTree CudaError $CUDA_FFT_LIB_PATH
-  CheckAndLinkToSrcTree CudaError $CUDA_CUPTI_LIB_PATH
-  exit 0
-fi
-
-# Actually configure the source tree for TensorFlow's canonical view of Cuda
-# libraries.
-
-if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}; then
-  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}"
-fi
-
-if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}; then
-  CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}"
-fi
-
-if test ! -d ${CUDNN_INSTALL_BASEDIR}; then
-  CudnnError "cannot find dir: ${CUDNN_INSTALL_BASEDIR}"
-fi
-
-# Locate cudnn.h
-if test -e ${CUDNN_INSTALL_BASEDIR}/cudnn.h; then
-  CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}
-elif test -e ${CUDNN_INSTALL_BASEDIR}/include/cudnn.h; then
-  CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}/include
-elif test -e /usr/include/cudnn.h; then
-  CUDNN_HEADER_DIR=/usr/include
-else
-  CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_BASEDIR}"
-fi
-
-# Locate libcudnn
-if test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}; then
-  CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}
-elif test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}; then
-  CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}
-else
-  CudnnError "cannot find ${CUDA_DNN_LIB_PATH} or ${CUDA_DNN_LIB_ALT_PATH} under: ${CUDNN_INSTALL_BASEDIR}"
-fi
-
-# Helper function to build symbolic links for all files under a directory.
-function LinkOneDir {
-  SRC_PREFIX=$1
-  DST_PREFIX=$2
-  SRC_DIR=$3
-  DST_DIR=$(echo $SRC_DIR | sed "s,^$SRC_PREFIX,$DST_PREFIX,")
-  mkdir -p $DST_DIR
-  FILE_LIST=$(find -L $SRC_DIR -maxdepth 1 -type f)
-  if test "$FILE_LIST" != ""; then
-    ln -sf $FILE_LIST $DST_DIR/ || exit -1
-  fi
-}
-export -f LinkOneDir
-
-# Build links for all files under the directory, including subdirectoreis.
-function LinkAllFiles {
-  SRC_DIR=$1
-  DST_DIR=$2
-  find -L $SRC_DIR -type d | xargs -I {} bash -c "LinkOneDir $SRC_DIR $DST_DIR {}" || exit -1
-}
-
-# Set up the symbolic links for cuda toolkit. We link at individual file level,
-# not at the directory level.
-# This is because the external library may have different file layout from our desired structure.
-mkdir -p $OUTPUTDIR/third_party/gpus/cuda
-echo "Setting up Cuda include"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/include $OUTPUTDIR/third_party/gpus/cuda/include || exit -1
-echo "Setting up Cuda ${CUDA_LIB_PATH}"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_LIB_PATH} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_LIB_PATH} || exit -1
-echo "Setting up Cuda bin"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/bin $OUTPUTDIR/third_party/gpus/cuda/bin || exit -1
-echo "Setting up Cuda nvvm"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/nvvm $OUTPUTDIR/third_party/gpus/cuda/nvvm || exit -1
-echo "Setting up CUPTI include"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/include $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/include || exit -1
-echo "Setting up CUPTI lib64"
-LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_DIR} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_CUPTI_LIB_DIR} || exit -1
-
-# Set up symbolic link for cudnn
-ln -sf $CUDNN_HEADER_DIR/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
-ln -sf $CUDNN_LIB_INSTALL_PATH $OUTPUTDIR/third_party/gpus/cuda/$CUDA_DNN_LIB_PATH || exit -1
diff --git a/third_party/gpus/cuda/platform.bzl b/third_party/gpus/cuda/platform.bzl.tpl
similarity index 93%
rename from third_party/gpus/cuda/platform.bzl
rename to third_party/gpus/cuda/platform.bzl.tpl
index 06f3d0cff4f..7565dfc1294 100644
--- a/third_party/gpus/cuda/platform.bzl
+++ b/third_party/gpus/cuda/platform.bzl.tpl
@@ -1,6 +1,6 @@
-CUDA_VERSION = ""
-CUDNN_VERSION = ""
-PLATFORM = ""
+CUDA_VERSION = "%{cuda_version}"
+CUDNN_VERSION = "%{cudnn_version}"
+PLATFORM = "%{platform}"
 
 def cuda_sdk_version():
   return CUDA_VERSION
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
new file mode 100644
index 00000000000..3682cb305de
--- /dev/null
+++ b/third_party/gpus/cuda_configure.bzl
@@ -0,0 +1,423 @@
+# -*- Python -*-
+"""Repository rule for CUDA autoconfiguration.
+
+`cuda_configure` depends on the following environment variables:
+
+  * `ENABLE_CUDA`: Whether to enable building with CUDA.
+  * `CC`: The GCC host compiler path
+  * `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
+    `/usr/local/cuda`.
+  * `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
+    use the system default.
+  * `CUDNN_VERSION`: The version of the cuDNN library.
+  * `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
+    `/usr/local/cuda`.
+  * `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
+    `3.5,5.2`.
+"""
+
+
+_DEFAULT_CUDA_VERSION = ""
+_DEFAULT_CUDNN_VERSION = ""
+_DEFAULT_CUDA_TOOLKIT_PATH = "/usr/local/cuda"
+_DEFAULT_CUDNN_INSTALL_PATH = "/usr/local/cuda"
+_DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
+
+
+# TODO(dzc): Once these functions have been factored out of Bazel's
+# cc_configure.bzl, load them from @bazel_tools instead.
+# BEGIN cc_configure common functions.
+def find_cc(repository_ctx):
+  """Find the C++ compiler."""
+  cc_name = "gcc"
+  if "CC" in repository_ctx.os.environ:
+    cc_name = repository_ctx.os.environ["CC"].strip()
+    if not cc_name:
+      cc_name = "gcc"
+  if cc_name.startswith("/"):
+    # Absolute path, maybe we should make this suported by our which function.
+    return cc_name
+  cc = repository_ctx.which(cc_name)
+  if cc == None:
+    fail(
+        "Cannot find gcc, either correct your path or set the CC" +
+        " environment variable")
+  return cc
+
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+
+# OSX add " (framework directory)" at the end of line, strip it.
+_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
+_OSX_FRAMEWORK_SUFFIX_LEN =  len(_OSX_FRAMEWORK_SUFFIX)
+def _cxx_inc_convert(path):
+  """Convert path returned by cc -E xc++ in a complete path."""
+  path = path.strip()
+  if path.endswith(_OSX_FRAMEWORK_SUFFIX):
+    path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
+  return path
+
+
+def get_cxx_inc_directories(repository_ctx, cc):
+  """Compute the list of default C++ include directories."""
+  result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
+  index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+  if index1 == -1:
+    return []
+  index1 = result.stderr.find("\n", index1)
+  if index1 == -1:
+    return []
+  index2 = result.stderr.rfind("\n ")
+  if index2 == -1 or index2 < index1:
+    return []
+  index2 = result.stderr.find("\n", index2 + 1)
+  if index2 == -1:
+    inc_dirs = result.stderr[index1 + 1:]
+  else:
+    inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+  return [repository_ctx.path(_cxx_inc_convert(p))
+          for p in inc_dirs.split("\n")]
+
+# END cc_configure common functions (see TODO above).
+
+
+def _enable_cuda(repository_ctx):
+  if "TF_NEED_CUDA" in repository_ctx.os.environ:
+    enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
+    return enable_cuda == "1"
+  return False
+
+
+def _cuda_toolkit_path(repository_ctx):
+  """Finds the cuda toolkit directory."""
+  cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
+  if "CUDA_TOOLKIT_PATH" in repository_ctx.os.environ:
+    cuda_toolkit_path = repository_ctx.os.environ["CUDA_TOOLKIT_PATH"].strip()
+  if not repository_ctx.path(cuda_toolkit_path).exists:
+    fail("Cannot find cuda toolkit path.")
+  return cuda_toolkit_path
+
+
+def _cudnn_install_basedir(repository_ctx):
+  """Finds the cudnn install directory."""
+  cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
+  if "CUDNN_INSTALL_PATH" in repository_ctx.os.environ:
+    cudnn_install_path = repository_ctx.os.environ["CUDNN_INSTALL_PATH"].strip()
+  if not repository_ctx.path(cudnn_install_path).exists:
+    fail("Cannot find cudnn install path.")
+  return cudnn_install_path
+
+
+def _cuda_version(repository_ctx):
+  """Detects the cuda version."""
+  if "CUDA_VERSION" in repository_ctx.os.environ:
+    return repository_ctx.os.environ["CUDA_VERSION"].strip()
+  else:
+    return ""
+
+
+def _cudnn_version(repository_ctx):
+  """Detects the cudnn version."""
+  if "CUDNN_VERSION" in repository_ctx.os.environ:
+    return repository_ctx.os.environ["CUDNN_VERSION"].strip()
+  else:
+    return ""
+
+
+def _compute_capabilities(repository_ctx):
+  """Returns a list of strings representing cuda compute capabilities."""
+  if "CUDA_COMPUTE_CAPABILITIES" not in repository_ctx.os.environ:
+    return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
+  capabilities_str = repository_ctx.os.environ["CUDA_COMPUTE_CAPABILITIES"]
+  capabilities = capabilities_str.split(",")
+  for capability in capabilities:
+    # Workaround for Skylark's lack of support for regex. This check should
+    # be equivalent to checking:
+    #     if re.match("[0-9]+.[0-9]+", capability) == None:
+    parts = capability.split(".")
+    if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
+      fail("Invalid compute capability: %s" % capability)
+  return capabilities
+
+
+def _cpu_value(repository_ctx):
+  result = repository_ctx.execute(["uname", "-s"])
+  return result.stdout.strip()
+
+
+def _cuda_symlink_files(cpu_value, cuda_version, cudnn_version):
+  """Returns a struct containing platform-specific paths.
+
+  Args:
+    cpu_value: The string representing the host OS.
+    cuda_version: The cuda version as returned by _cuda_version
+    cudnn_version: The cudnn version as returned by _cudnn_version
+  """
+  cuda_ext = ".%s" % cuda_version if cuda_version else ""
+  cudnn_ext = ".%s" % cudnn_version if cudnn_version else ""
+  if cpu_value == "Linux":
+    return struct(
+        cuda_lib_path = "lib64",
+        cuda_rt_lib = "lib64/libcudart.so%s" % cuda_ext,
+        cuda_rt_lib_static = "lib64/libcudart_static.a",
+        cuda_blas_lib = "lib64/libcublas.so%s" % cuda_ext,
+        cuda_dnn_lib = "lib64/libcudnn.so%s" % cudnn_ext,
+        cuda_dnn_lib_alt = "libcudnn.so%s" % cudnn_ext,
+        cuda_rand_lib = "lib64/libcurand.so%s" % cuda_ext,
+        cuda_fft_lib = "lib64/libcufft.so%s" % cuda_ext,
+        cuda_cupti_lib = "extras/CUPTI/lib64/libcupti.so%s" % cuda_ext)
+  elif cpu_value == "Darwin":
+    return struct(
+        cuda_lib_path = "lib",
+        cuda_rt_lib = "lib/libcudart%s.dylib" % cuda_ext,
+        cuda_rt_lib_static = "lib/libcudart_static.a",
+        cuda_blas_lib = "lib/libcublas%s.dylib" % cuda_ext,
+        cuda_dnn_lib = "lib/libcudnn%s.dylib" % cudnn_ext,
+        cuda_dnn_lib_alt = "libcudnn%s.dylib" % cudnn_ext,
+        cuda_rand_lib = "lib/libcurand%s.dylib" % cuda_ext,
+        cuda_fft_lib = "lib/libcufft%s.dylib" % cuda_ext,
+        cuda_cupti_lib = "extras/CUPTI/lib/libcupti%s.dylib" % cuda_ext)
+  else:
+    fail("Not supported CPU value %s" % cpu_value)
+
+
+def _check_lib(repository_ctx, cuda_toolkit_path, cuda_lib):
+  """Checks if cuda_lib exists under cuda_toolkit_path or fail if it doesn't.
+
+  Args:
+    repository_ctx: The repository context.
+    cuda_toolkit_path: The cuda toolkit directory containing the cuda libraries.
+    cuda_lib: The library to look for under cuda_toolkit_path.
+  """
+  lib_path = cuda_toolkit_path + "/" + cuda_lib
+  if not repository_ctx.path(lib_path).exists:
+    fail("Cannot find %s" % lib_path)
+
+
+def _check_dir(repository_ctx, directory):
+  """Checks whether the directory exists and fail if it does not.
+
+  Args:
+    repository_ctx: The repository context.
+    directory: The directory to check the existence of.
+  """
+  if not repository_ctx.path(directory).exists:
+    fail("Cannot find dir: %s" % directory)
+
+
+def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
+  """Returns the path to the directory containing cudnn.h
+
+  Args:
+    repository_ctx: The repository context.
+    cudnn_install_basedir: The cudnn install directory as returned by
+      _cudnn_install_basedir.
+
+  Returns:
+    The path of the directory containing the cudnn header.
+  """
+  if repository_ctx.path(cudnn_install_basedir + "/cudnn.h").exists:
+    return cudnn_install_basedir
+  if repository_ctx.path(cudnn_install_basedir + "/include/cudnn.h").exists:
+    return cudnn_install_basedir + "/include"
+  if repository_ctx.path("/usr/include/cudnn.h").exists:
+    return "/usr/include"
+  fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
+
+
+def _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir, symlink_files):
+  """Returns the path to the directory containing libcudnn
+
+  Args:
+    repository_ctx: The repository context.
+    cudnn_install_basedir: The cudnn install dir as returned by
+      _cudnn_install_basedir.
+    symlink_files: The symlink files as returned by _cuda_symlink_files.
+
+  Returns:
+    The path of the directory containing the cudnn libraries.
+  """
+  lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib
+  if repository_ctx.path(lib_dir).exists:
+    return lib_dir
+  alt_lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib_alt
+  if repository_ctx.path(alt_lib_dir).exists:
+    return alt_lib_dir
+
+  fail("Cannot find %s or %s under %s" %
+       (symlink_files.cuda_dnn_lib, symlink_files.cuda_dnn_lib_alt,
+        cudnn_install_basedir))
+
+
+def _tpl(repository_ctx, tpl, substitutions={}, out=None):
+  if not out:
+    out = tpl.replace(":", "/")
+  repository_ctx.template(
+      out,
+      Label("//third_party/gpus/%s.tpl" % tpl),
+      substitutions)
+
+
+def _file(repository_ctx, label):
+  repository_ctx.template(
+      label.replace(":", "/"),
+      Label("//third_party/gpus/%s.tpl" % label),
+      {})
+
+
+def _create_dummy_repository(repository_ctx):
+  cpu_value = _cpu_value(repository_ctx)
+  symlink_files = _cuda_symlink_files(cpu_value, _DEFAULT_CUDA_VERSION,
+                                      _DEFAULT_CUDNN_VERSION)
+
+  # Set up BUILD file for cuda/.
+  _file(repository_ctx, "cuda:BUILD")
+  _file(repository_ctx, "cuda:build_defs.bzl")
+  _tpl(repository_ctx, "cuda:platform.bzl",
+       {
+           "%{cuda_version}": _DEFAULT_CUDA_VERSION,
+           "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
+           "%{platform}": cpu_value,
+       })
+
+  # Create dummy files for the CUDA toolkit since they are still required by
+  # tensorflow/core/platform/default/build_config:cuda.
+  repository_ctx.file("cuda/include/cuda.h", "")
+  repository_ctx.file("cuda/include/cublas.h", "")
+  repository_ctx.file("cuda/include/cudnn.h", "")
+  repository_ctx.file("cuda/extras/CUPTI/include/cupti.h", "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib_static, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_blas_lib, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_dnn_lib, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_rand_lib, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_fft_lib, "")
+  repository_ctx.file("cuda/%s" % symlink_files.cuda_cupti_lib, "")
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "cuda:cuda_config.h",
+       {
+           "%{cuda_version}": _DEFAULT_CUDA_VERSION,
+           "%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
+           "%{cuda_compute_capabilities}": ",".join([
+               "CudaVersion(\"%s\")" % c
+               for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES]),
+       })
+
+
+def _symlink_dir(repository_ctx, src_dir, dest_dir):
+  """Symlinks all the files in a directory.
+
+  Args:
+    repository_ctx: The repository context.
+    src_dir: The source directory.
+    dest_dir: The destination directory to create the symlinks in.
+  """
+  files = repository_ctx.path(src_dir).readdir()
+  for src_file in files:
+    repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
+
+
+def _create_cuda_repository(repository_ctx):
+  """Creates the repository containing files set up to build with CUDA."""
+  cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
+  cuda_version = _cuda_version(repository_ctx)
+  cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
+  cudnn_version = _cudnn_version(repository_ctx)
+  compute_capabilities = _compute_capabilities(repository_ctx)
+
+  cpu_value = _cpu_value(repository_ctx)
+  symlink_files = _cuda_symlink_files(cpu_value, cuda_version, cudnn_version)
+  _check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_rt_lib)
+  _check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_cupti_lib)
+  _check_dir(repository_ctx, cudnn_install_basedir)
+
+  cudnn_header_dir = _find_cudnn_header_dir(repository_ctx,
+                                            cudnn_install_basedir)
+  cudnn_lib_path = _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir,
+                                        symlink_files)
+
+  # Set up symbolic links for the cuda toolkit. We link at the individual file
+  # level not at the directory level. This is because the external library may
+  # have a different file layout from our desired structure.
+  _symlink_dir(repository_ctx, cuda_toolkit_path + "/include", "cuda/include")
+  _symlink_dir(repository_ctx,
+               cuda_toolkit_path + "/" + symlink_files.cuda_lib_path,
+               "cuda/" + symlink_files.cuda_lib_path)
+  _symlink_dir(repository_ctx, cuda_toolkit_path + "/bin", "cuda/bin")
+  _symlink_dir(repository_ctx, cuda_toolkit_path + "/nvvm", "cuda/nvvm")
+  _symlink_dir(repository_ctx, cuda_toolkit_path + "/extras/CUPTI/include",
+               "cuda/extras/CUPTI/include")
+  repository_ctx.symlink(cuda_toolkit_path + "/" + symlink_files.cuda_cupti_lib,
+                         "cuda/" + symlink_files.cuda_cupti_lib)
+
+  # Set up the symbolic links for cudnn if cudnn was was not installed to
+  # CUDA_TOOLKIT_PATH.
+  if not repository_ctx.path("cuda/include/cudnn.h").exists:
+    repository_ctx.symlink(cudnn_header_dir + "/cudnn.h",
+                           "cuda/include/cudnn.h")
+  if not repository_ctx.path("cuda/" + symlink_files.cuda_dnn_lib).exists:
+    repository_ctx.symlink(cudnn_lib_path, "cuda/" + symlink_files.cuda_dnn_lib)
+
+  # Set up BUILD file for cuda/
+  _file(repository_ctx, "cuda:BUILD")
+  _file(repository_ctx, "cuda:build_defs.bzl")
+  _tpl(repository_ctx, "cuda:platform.bzl",
+       {
+           "%{cuda_version}": cuda_version,
+           "%{cudnn_version}": cudnn_version,
+           "%{platform}": cpu_value,
+       })
+
+  # Set up crosstool/
+  _file(repository_ctx, "crosstool:BUILD")
+  _tpl(repository_ctx, "crosstool:CROSSTOOL",
+       {
+           "%{cuda_version}": ("-%s" % cuda_version) if cuda_version else "",
+       })
+  _tpl(repository_ctx,
+       "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
+       {
+           "%{cpu_compiler}": str(find_cc(repository_ctx)),
+           "%{gcc_host_compiler_path}": str(find_cc(repository_ctx)),
+           "%{cuda_compute_capabilities}": ", ".join(
+               ["\"%s\"" % c for c in compute_capabilities]),
+       })
+
+  # Set up cuda_config.h, which is used by
+  # tensorflow/stream_executor/dso_loader.cc.
+  _tpl(repository_ctx, "cuda:cuda_config.h",
+       {
+           "%{cuda_version}": cuda_version,
+           "%{cudnn_version}": cudnn_version,
+           "%{cuda_compute_capabilities}": ",".join(
+               ["CudaVersion(\"%s\")" % c for c in compute_capabilities]),
+       })
+
+
+def _cuda_autoconf_impl(repository_ctx):
+  """Implementation of the cuda_autoconf repository rule."""
+  if not _enable_cuda(repository_ctx):
+    _create_dummy_repository(repository_ctx)
+  else:
+    _create_cuda_repository(repository_ctx)
+
+
+cuda_configure = repository_rule(
+    implementation = _cuda_autoconf_impl,
+    local = True,
+)
+"""Detects and configures the local CUDA toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+cuda_configure(name = "local_config_cuda")
+```
+
+Args:
+  name: A unique name for this workspace rule.
+"""
diff --git a/tools/bazel.rc.template b/tools/bazel.rc.template
index 02856822c95..9a69cac1f65 100644
--- a/tools/bazel.rc.template
+++ b/tools/bazel.rc.template
@@ -1,4 +1,4 @@
-build:cuda --crosstool_top=//third_party/gpus/crosstool
+build:cuda --crosstool_top=@local_config_cuda//crosstool
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
 
 build --force_python=py$PYTHON_MAJOR_VERSION