Merge pull request #20277 from ROCmSoftwarePlatform:upstream-staging

PiperOrigin-RevId: 214793113
2018-09-27 10:22:55 -07:00 · 2018-09-27 10:22:55 -07:00 · 62e60166de
commit 62e60166de
parent cd1bdeafec 69d3b8faf4
32 changed files with 1731 additions and 18 deletions
--- a/configure.py
+++ b/configure.py
@ -1540,6 +1540,13 @@ def main():
    else:
      set_trisycl_include_dir(environ_cp)

+  set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+  if (environ_cp.get('TF_NEED_ROCM') == '1' and
+      'LD_LIBRARY_PATH' in environ_cp and
+      environ_cp.get('LD_LIBRARY_PATH') != '1'):
+    write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+                                environ_cp.get('LD_LIBRARY_PATH'))
+
  set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
  if (environ_cp.get('TF_NEED_CUDA') == '1' and
      'TF_CUDA_CONFIG_REPO' not in environ_cp):
@ -1580,6 +1587,19 @@ def main():
      write_to_bazelrc('build --config=download_clang')
      write_to_bazelrc('test --config=download_clang')

+  # SYCL / ROCm / CUDA are mutually exclusive.
+  # At most 1 GPU platform can be configured.
+  gpu_platform_count = 0
+  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_ROCM') == '1':
+    gpu_platform_count += 1
+  if environ_cp.get('TF_NEED_CUDA') == '1':
+    gpu_platform_count += 1
+  if gpu_platform_count >= 2:
+    raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+                         'At most 1 GPU platform can be configured.')
+
  set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
  if environ_cp.get('TF_NEED_MPI') == '1':
    set_mpi_home(environ_cp)
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -149,6 +149,7 @@ load(
    "tf_cuda_tests_tags",
 )
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
 load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
 load(
    "//third_party/mkl:build_defs.bzl",
@ -3006,7 +3007,7 @@ tf_cuda_library(
        "platform/device_tracer.h",
    ],
    copts = tf_copts(),
-    cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
+    cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
    visibility = ["//visibility:private"],
    deps = [
        ":core_cpu_internal",
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -19,6 +19,14 @@ load(
    "@local_config_cuda//cuda:build_defs.bzl",
    "cuda_default_copts",
    "if_cuda",
+    "if_cuda_is_configured",
+)
+load(
+    "@local_config_rocm//rocm:build_defs.bzl",
+    "if_rocm",
+    "if_rocm_is_configured",
+    "rocm_copts",
+    "rocm_default_copts",
 )
 load(
    "//third_party/mkl:build_defs.bzl",
@ -39,6 +47,8 @@ load(
 def register_extension_info(**kwargs):
    pass

+# if_cuda_is_configured def placeholder
+
 # Given a source file, generate a test name.
 # i.e. "common_runtime/direct_session_test.cc" becomes
 #      "common_runtime_direct_session_test"
@ -863,12 +873,16 @@ def tf_cuda_only_cc_test(
        srcs = srcs + tf_binary_additional_srcs(),
        size = size,
        args = args,
-        copts = _cuda_copts() + tf_copts(),
+        copts = _cuda_copts() + rocm_copts() + tf_copts(),
        data = data + tf_binary_dynamic_kernel_dsos(kernels),
-        deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
-            clean_dep("//tensorflow/core:cuda"),
-            clean_dep("//tensorflow/core:gpu_lib"),
-        ]),
+        deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
+               if_cuda_is_configured([
+                   clean_dep("//tensorflow/core:cuda"),
+                   clean_dep("//tensorflow/core:gpu_lib"),
+               ]) +
+               if_rocm_is_configured([
+                   clean_dep("//tensorflow/core:gpu_lib"),
+               ]),
        linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
        linkstatic = linkstatic or select({
            # cc_tests with ".so"s in srcs incorrectly link on Darwin
@ -1003,7 +1017,7 @@ register_extension_info(
    label_regex_for_dep = "{extension_name}",
 )

-def _cuda_copts():
+def _cuda_copts(opts = []):
    """Gets the appropriate set of copts for (maybe) CUDA compilation.

      If we're doing CUDA compilation, returns copts for our particular CUDA
@ -1019,13 +1033,17 @@ def _cuda_copts():
        "@local_config_cuda//cuda:using_clang": ([
            "-fcuda-flush-denormals-to-zero",
        ]),
-    })
+    }) + if_cuda_is_configured(opts)

 # Build defs for TensorFlow kernels

 # When this target is built using --config=cuda, a cc_library is built
 # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
 # libraries needed by GPU kernels.
+#
+# When this target is built using --config=rocm, a cc_library is built
+# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
+# libraries needed by GPU kernels.
 def tf_gpu_kernel_library(
        srcs,
        copts = [],
@ -1033,16 +1051,18 @@ def tf_gpu_kernel_library(
        deps = [],
        hdrs = [],
        **kwargs):
-    copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
+    copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + rocm_copts(opts = cuda_copts)
    kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]

    native.cc_library(
        srcs = srcs,
        hdrs = hdrs,
        copts = copts,
-        deps = deps + if_cuda([
+        deps = deps + if_cuda_is_configured([
            clean_dep("//tensorflow/core:cuda"),
            clean_dep("//tensorflow/core:gpu_lib"),
+        ]) + if_rocm_is_configured([
+            clean_dep("//tensorflow/core:gpu_lib"),
        ]),
        alwayslink = 1,
        **kwargs
@ -1081,8 +1101,10 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
        deps = deps + if_cuda(cuda_deps + [
            clean_dep("//tensorflow/core:cuda"),
            "@local_config_cuda//cuda:cuda_headers",
+        ]) + if_rocm_is_configured(cuda_deps + [
+            # rocm_header placeholder
        ]),
-        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
+        copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
                 if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
                 if_enable_mkl(["-DENABLE_MKL"]) +
                 if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
@ -1465,6 +1487,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
        "@local_config_cuda//cuda:cuda_headers",
        "@local_config_cuda//cuda:cudart_static",
    ]
+    rocm_deps = [
+        clean_dep("//tensorflow/core:stream_executor_headers_lib"),
+    ]
    deps = deps + tf_custom_op_library_additional_deps()
    if gpu_srcs:
        basename = name.split(".")[0]
@ -1473,13 +1498,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
            srcs = gpu_srcs,
            copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
            features = if_cuda(["-use_header_modules"]),
-            deps = deps + if_cuda(cuda_deps),
+            deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
        )
        cuda_deps.extend([":" + basename + "_gpu"])
+        rocm_deps.extend([":" + basename + "_gpu"])

    check_deps(
        name = name + "_check_deps",
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
        disallowed_deps = [
            clean_dep("//tensorflow/core:framework"),
            clean_dep("//tensorflow/core:lib"),
@ -1488,7 +1514,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
    tf_cc_shared_object(
        name = name,
        srcs = srcs,
-        deps = deps + if_cuda(cuda_deps),
+        deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
        data = if_static([name + "_check_deps"]),
        copts = tf_copts(is_external = True),
        features = ["windows_export_all_symbols"],
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@ -0,0 +1,97 @@
+# This Dockerfile provides a starting point for a ROCm installation of 
+# MIOpen and tensorflow.  
+FROM ubuntu:xenial
+MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
+
+ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
+ARG ROCM_PATH=/opt/rocm
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV TF_NEED_ROCM 1
+ENV HOME /root/
+RUN apt update && apt install -y wget software-properties-common 
+
+# Add rocm repository
+RUN apt-get clean all
+RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
+RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+
+# Install misc pkgs
+RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+  build-essential \
+  clang-3.8 \
+  clang-format-3.8 \
+  clang-tidy-3.8 \
+  cmake \
+  cmake-qt-gui \
+  ssh \
+  curl \
+  apt-utils \
+  pkg-config \
+  g++-multilib \
+  git \
+  libunwind-dev \
+  libfftw3-dev \
+  libelf-dev \
+  libncurses5-dev \
+  libpthread-stubs0-dev \
+  vim \
+  gfortran \
+  libboost-program-options-dev \
+  libssl-dev \
+  libboost-dev \
+  libboost-system-dev \
+  libboost-filesystem-dev \
+  rpm \
+  libnuma-dev \
+  virtualenv \
+  python-pip \
+  python3-pip \
+  wget && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install rocm pkgs
+RUN apt-get update --allow-insecure-repositories && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
+    rocm-dev rocm-libs rocm-utils \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand \
+    rocm-profiler cxlactivitylogger && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
+RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
+
+ENV HCC_HOME=$ROCM_PATH/hcc
+ENV HIP_PATH=$ROCM_PATH/hip
+ENV OPENCL_ROOT=$ROCM_PATH/opencl
+ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
+ENV PATH="$ROCM_PATH/bin:${PATH}"
+ENV PATH="$OPENCL_ROOT/bin:${PATH}"
+
+# Add target file to help determine which device(s) to build for
+RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
+
+# Setup environment variables, and add those environment variables at the end of ~/.bashrc 
+ARG HCC_HOME=/opt/rocm/hcc
+ARG HIP_PATH=/opt/rocm/hip
+ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN add-apt-repository -y ppa:openjdk-r/ppa && \
+    add-apt-repository -y ppa:george-edison55/cmake-3.x
+RUN /install/install_deb_packages.sh
+RUN /install/install_pip_packages.sh
+RUN /install/install_bazel.sh
+RUN /install/install_golang.sh
+
+# Set up the master bazelrc configuration file.
+COPY install/.bazelrc /etc/bazel.bazelrc
+
+# Configure the build for our CUDA configuration.
+ENV TF_NEED_ROCM 1
+
--- a/tensorflow/tools/ci_build/builds/docker_test.sh
+++ b/tensorflow/tools/ci_build/builds/docker_test.sh
@ -19,7 +19,7 @@
 #
 # Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
 # Arguments:
-#   IMAGE_TYPE : Type of the image: (CPU|GPU)
+#   IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
 #   TAG        : Docker image tag
 #   WHL_PATH   : Path to the whl file to be installed inside the docker image
 #
@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
  DOCKERFILE="tensorflow/tools/docker/Dockerfile"
 elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
  DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
+elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
+  DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
 else
  die "Unrecognized image type: $1"
 fi
@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
  devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
  libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
  GPU_EXTRA_PARAMS="${devices} ${libs}"
+elif [ "${IMAGE_TYPE}" == "rocm" ]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
 else
  GPU_EXTRA_PARAMS=""
+  ROCM_EXTRA_PARAMS=""
 fi

 # Run docker image with source directory mapped
 docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
-${GPU_EXTRA_PARAMS} \
+${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
 "${DOCKER_IMG_TAG}" \
 /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
 "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
 PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
 GPU_FLAG=""
 if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
+   [[ ${CONTAINER_TYPE} == "rocm" ]] || \
   [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
  bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
      die "Build failed."
@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
      die "ERROR: Cannot find repaired wheel."
    fi
  # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
-  elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
+  elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
+       [[ ${CONTAINER_TYPE} == "rocm" ]]; then
    WHL_PATH=${AUDITED_WHL_NAME}
    cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
    echo "Copied manylinx1 wheel file at ${WHL_PATH}"
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
 usermod -a -G sudo "${CI_BUILD_USER}"
 echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo

+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+  # ROCm requires the video group in order to use the GPU for compute. If it
+  # exists on the host, add it to the container.
+  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
 if [ -e /root/.bazelrc ]; then
  cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
  chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@ -18,7 +18,7 @@
 #                    <COMMAND>
 #
 # CONTAINER_TYPE: Type of the docker container used the run the build:
-#                 e.g., (cpu | gpu | android | tensorboard)
+#                 e.g., (cpu | gpu | rocm | android | tensorboard)
 #
 # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
 #                  If this optional value is not supplied (via the
@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
  GPU_EXTRA_PARAMS=""
 fi

+# Add extra params for rocm devices and libraries for ROCm container.
+if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
+  ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
+else
+  ROCM_EXTRA_PARAMS=""
+fi
+
+
 # Determine the docker image name
 DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"

@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
    -v ${WORKSPACE}:/workspace \
    -w /workspace \
    ${GPU_EXTRA_PARAMS} \
+    ${ROCM_EXTRA_PARAMS} \
    ${CI_DOCKER_EXTRA_PARAMS[@]} \
    "${DOCKER_IMG_NAME}" \
    ${CI_COMMAND_PREFIX[@]} \
--- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh
@ -26,6 +26,7 @@ echo ""

 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 # Only running cc tests, python version does not matter.
 export PYTHON_BIN_PATH=`which python`
--- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh
@ -26,6 +26,7 @@ echo ""

 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python2`
 yes "" | $PYTHON_BIN_PATH configure.py
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh
@ -26,6 +26,7 @@ echo ""

 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
--- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh
@ -26,6 +26,7 @@ echo ""

 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=`which python3`
 yes "" | $PYTHON_BIN_PATH configure.py
--- a/tensorflow/tools/ci_build/linux/libtensorflow.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh
@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
 if [ "${TF_NEED_CUDA}" == "1" ]; then
  SUFFIX="-gpu-linux-"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  SUFFIX="-rocm-linux-"
+fi

 build_libtensorflow_tarball "${SUFFIX}$(uname -m)"
--- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh
@ -19,4 +19,5 @@
 set -ex
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 "${SCRIPT_DIR}/libtensorflow_docker.sh"
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
  DOCKER_BINARY="nvidia-docker"
  DOCKER_FILE="Dockerfile.gpu"
 fi
+if [ "${TF_NEED_ROCM}" == "1" ]; then
+  DOCKER_IMAGE="tf-tensorflow-rocm"
+  DOCKER_BINARY="docker"
+  DOCKER_FILE="Dockerfile.rocm"
+fi

 docker build \
  -t "${DOCKER_IMAGE}" \
@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
  -e "TF_NEED_HDFS=0" \
  -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
  -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
+  -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
  -e "TF_NEED_OPENCL_SYCL=0" \
  "${DOCKER_IMAGE}" \
  "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
--- a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to build a binary releases of libtensorflow with GPU support.
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export TF_NEED_ROCM=1
+"${SCRIPT_DIR}/libtensorflow_docker.sh"
--- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
--- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
+    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
+    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
--- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
+++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh
@ -27,6 +27,7 @@ echo ""

 # Run configure.
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export CC_OPT_FLAGS='-mavx'
 export PYTHON_BIN_PATH=$(which python2)
 yes "" | $PYTHON_BIN_PATH configure.py
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
+export TF_NEED_ROCM=0
 export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
--- a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to produce binary release of libtensorflow (C API, Java jars etc.).
+
+set -ex
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# See comments at the top of this file for details.
+source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
+
+# Configure script
+export TF_NEED_ROCM=1
+export PYTHON_BIN_PATH="/usr/bin/python"
+export TF_NEED_GCP=0
+export TF_NEED_HDFS=0
+export TF_NEED_CUDA=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_MKL=0
+export COMPUTECPP_PATH="/usr/local"
+
+export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
+build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"
--- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+
+set -e
+set -x
+
+N_JOBS=$(grep -c ^processor /proc/cpuinfo)
+
+echo ""
+echo "Bazel will use ${N_JOBS} concurrent job(s)."
+echo ""
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+
+export TF_NEED_ROCM=1
+
+yes "" | $PYTHON_BIN_PATH configure.py
+echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
+
+bazel clean
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
+    --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
+    --build_tests_only --test_output=errors --local_test_jobs=1 \
+    --config=xla -- \
+    //tensorflow/compiler/...
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@ -1,6 +1,7 @@
 # TensorFlow external dependencies that can be loaded in WORKSPACE files.

 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
+load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
 load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
 load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
    sycl_configure(name = "local_config_sycl")
    syslibs_configure(name = "local_config_syslibs")
    python_configure(name = "local_config_python")
+    rocm_configure(name = "local_config_rocm")

    initialize_third_party()

--- a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
+++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
@ -0,0 +1,158 @@
+major_version: "local"
+minor_version: ""
+default_target_cpu: "same_as_host"
+
+default_toolchain {
+  cpu: "k8"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "piii"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "arm"
+  toolchain_identifier: "local_linux"
+}
+default_toolchain {
+  cpu: "ppc"
+  toolchain_identifier: "local_linux"
+}
+
+toolchain {
+  abi_version: "local"
+  abi_libc_version: "local"
+  builtin_sysroot: ""
+  compiler: "compiler"
+  host_system_name: "local"
+  needsPic: true
+  supports_gold_linker: false
+  supports_incremental_linker: false
+  supports_fission: false
+  supports_interface_shared_objects: false
+  supports_normalizing_ar: false
+  supports_start_end_lib: false
+  supports_thin_archives: false
+  target_libc: "local"
+  target_cpu: "local"
+  target_system_name: "local"
+  toolchain_identifier: "local_linux"
+
+  tool_path { name: "ar" path: "/usr/bin/ar" }
+  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
+  tool_path { name: "cpp" path: "/usr/bin/cpp" }
+  tool_path { name: "dwp" path: "/usr/bin/dwp" }
+  # As part of the TensorFlow release, we place some ROCm-related compilation
+  # files in @local_config_rocm//crosstool/clang/bin, and this relative
+  # path, combined with the rest of our Bazel configuration causes our
+  # compilation to use those files.
+  tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
+  # Use "-std=c++11" for hipcc. For consistency, force both the host compiler
+  # and the device compiler to use "-std=c++11".
+  cxx_flag: "-std=c++11"
+  linker_flag: "-Wl,-no-as-needed"
+  linker_flag: "-lstdc++"
+  #linker_flag: "-B/usr/bin/"
+  linker_flag: "-B/opt/rocm/hcc/compiler/bin"
+
+%{host_compiler_includes}
+  tool_path { name: "gcov" path: "/usr/bin/gcov" }
+
+  # C(++) compiles invoke the compiler (as that is the one knowing where
+  # to find libraries), but we provide LD so other rules can invoke the linker.
+  tool_path { name: "ld" path: "/usr/bin/ld" }
+
+  tool_path { name: "nm" path: "/usr/bin/nm" }
+  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
+  objcopy_embed_flag: "-I"
+  objcopy_embed_flag: "binary"
+  tool_path { name: "objdump" path: "/usr/bin/objdump" }
+  tool_path { name: "strip" path: "/usr/bin/strip" }
+
+  # Anticipated future default.
+  unfiltered_cxx_flag: "-no-canonical-prefixes"
+
+  # Make C++ compilation deterministic. Use linkstamping instead of these
+  # compiler symbols.
+  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
+  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
+  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
+  unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
+  # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
+  # It needs to be always set when compiling Eigen headers
+  # (irrespective of whether the source file is being compiled via HIPCC)
+  # so adding -DEIGEN_USE_HIP as a default CXX flag here
+  unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
+
+    
+  # Security hardening on by default.
+  # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
+  # We need to undef it before redefining it as some distributions now have
+  # it enabled by default.
+  #compiler_flag: "-U_FORTIFY_SOURCE"
+  #compiler_flag: "-D_FORTIFY_SOURCE=1"
+  #compiler_flag: "-fstack-protector"
+  #compiler_flag: "-fPIE"
+  #linker_flag: "-pie"
+  #linker_flag: "-Wl,-z,relro,-z,now"
+
+  # Enable coloring even if there's no attached terminal. Bazel removes the
+  # escape sequences if --nocolor is specified. This isn't supported by gcc
+  # on Ubuntu 14.04.
+  # compiler_flag: "-fcolor-diagnostics"
+
+  # All warnings are enabled. Maybe enable -Werror as well?
+  compiler_flag: "-Wall"
+  # Enable a few more warnings that aren't part of -Wall.
+  compiler_flag: "-Wunused-but-set-parameter"
+  # But disable some that are problematic.
+  compiler_flag: "-Wno-free-nonheap-object" # has false positives
+
+  # Keep stack frames for debugging, even in opt mode.
+  compiler_flag: "-fno-omit-frame-pointer"
+
+  # Anticipated future default.
+  linker_flag: "-no-canonical-prefixes"
+  unfiltered_cxx_flag: "-fno-canonical-system-headers"
+  # Have gcc return the exit code from ld.
+  linker_flag: "-pass-exit-codes"
+  # Stamp the binary with a unique identifier.
+  linker_flag: "-Wl,--build-id=md5"
+  linker_flag: "-Wl,--hash-style=gnu"
+  # Gold linker only? Can we enable this by default?
+  # linker_flag: "-Wl,--warn-execstack"
+  # linker_flag: "-Wl,--detect-odr-violations"
+
+  # Include directory for ROCm headers.
+%{rocm_include_path}
+
+  compilation_mode_flags {
+    mode: DBG
+    # Enable debug symbols.
+    compiler_flag: "-g"
+  }
+  compilation_mode_flags {
+    mode: OPT
+
+    # No debug symbols.
+    # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
+    # even generally? However, that can't happen here, as it requires special
+    # handling in Bazel.
+    compiler_flag: "-g0"
+
+    # Conservative choice for -O
+    # -O3 can increase binary size and even slow down the resulting binaries.
+    # Profile first and / or use FDO if you need better performance than this.
+    compiler_flag: "-O2"
+
+    # Disable assertions
+    compiler_flag: "-DNDEBUG"
+
+    # Removal of unused code and data at link time (can this increase binary size in some cases?).
+    compiler_flag: "-ffunction-sections"
+    compiler_flag: "-fdata-sections"
+    linker_flag: "-Wl,--gc-sections"
+  }
+  linking_mode_flags { mode: DYNAMIC }
+}
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@ -0,0 +1,241 @@
+#!/usr/bin/env python
+"""Crosstool wrapper for compiling ROCm programs.
+
+SYNOPSIS:
+  crosstool_wrapper_driver_rocm [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x rocm" is present in the list of arguments passed
+  to this script, it invokes the hipcc compiler. Most arguments are passed
+  as is as a string to --compiler-options of hipcc. When "-x rocm" is not
+  present, this wrapper invokes gcc with the input arguments as is.
+"""
+
+from __future__ import print_function
+
+__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by rocm_configure.bzl.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+HIPCC_PATH = '%{hipcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+
+def Log(s):
+  print('gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+  """Extract the list of values for option from the argv list.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    option: The option whose value to extract, without the leading '-'.
+
+  Returns:
+    A list of values, either directly following the option,
+    (eg., -opt val1 val2) or values collected from multiple occurrences of
+    the option (eg., -opt val1 -opt val2).
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-' + option, nargs='*', action='append')
+  args, _ = parser.parse_known_args(argv)
+  if not args or not vars(args)[option]:
+    return []
+  else:
+    return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+  """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be used as the --compiler-options to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-isystem', nargs='*', action='append')
+  parser.add_argument('-iquote', nargs='*', action='append')
+  parser.add_argument('--sysroot', nargs=1)
+  parser.add_argument('-g', nargs='*', action='append')
+  parser.add_argument('-fno-canonical-system-headers', action='store_true')
+
+  args, _ = parser.parse_known_args(argv)
+
+  opts = ''
+
+  if args.isystem:
+    opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+  if args.iquote:
+    opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+  if args.g:
+    opts += ' -g' + ' -g'.join(sum(args.g, []))
+  #if args.fno_canonical_system_headers:
+  #  opts += ' -fno-canonical-system-headers'
+  if args.sysroot:
+    opts += ' --sysroot ' + args.sysroot[0]
+
+  return opts
+
+def GetHipccOptions(argv):
+  """Collect the -hipcc_options values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be passed directly to hipcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-hipcc_options', nargs='*', action='append')
+
+  args, _ = parser.parse_known_args(argv)
+
+  if args.hipcc_options:
+    options = _update_options(sum(args.hipcc_options, []))
+    return ' '.join(['--'+a for a in options])
+  return ''
+
+
+def InvokeHipcc(argv, log=False):
+  """Call hipcc with arguments assembled from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    log: True if logging is requested.
+
+  Returns:
+    The return value of calling os.system('hipcc ' + args)
+  """
+
+  host_compiler_options = GetHostCompilerOptions(argv)
+  hipcc_compiler_options = GetHipccOptions(argv)
+  opt_option = GetOptionValue(argv, 'O')
+  m_options = GetOptionValue(argv, 'm')
+  m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+  include_options = GetOptionValue(argv, 'I')
+  out_file = GetOptionValue(argv, 'o')
+  depfiles = GetOptionValue(argv, 'MF')
+  defines = GetOptionValue(argv, 'D')
+  defines = ''.join([' -D' + define for define in defines])
+  undefines = GetOptionValue(argv, 'U')
+  undefines = ''.join([' -U' + define for define in undefines])
+  std_options = GetOptionValue(argv, 'std')
+  hipcc_allowed_std_options = ["c++11"]
+  std_options = ''.join([' -std=' + define
+      for define in std_options if define in hipcc_allowed_std_options])
+
+  # The list of source files get passed after the -c option. I don't know of
+  # any other reliable way to just get the list of source files to be compiled.
+  src_files = GetOptionValue(argv, 'c')
+
+  if len(src_files) == 0:
+    return 1
+  if len(out_file) != 1:
+    return 1
+
+  opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+         else ' -g')
+
+  includes = (' -I ' + ' -I '.join(include_options)
+              if len(include_options) > 0
+              else '')
+
+  # Unfortunately, there are other options that have -c prefix too.
+  # So allowing only those look like C/C++ files.
+  src_files = [f for f in src_files if
+               re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
+  srcs = ' '.join(src_files)
+  out = ' -o ' + out_file[0]
+
+  hipccopts = ' '
+  hipccopts += ' ' + hipcc_compiler_options
+  hipccopts += undefines
+  hipccopts += defines
+  hipccopts += std_options
+  hipccopts += m_options
+
+  if depfiles:
+    # Generate the dependency file
+    depfile = depfiles[0]
+    cmd = (HIPCC_PATH + ' ' + hipccopts +
+           host_compiler_options +
+           ' ' + GCC_HOST_COMPILER_PATH +
+           ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
+    if log: Log(cmd)
+    exit_status = os.system(cmd)
+    if exit_status != 0:
+      return exit_status
+
+  cmd = (HIPCC_PATH + ' ' + hipccopts +
+         host_compiler_options + ' -fPIC' +
+         ' ' + GCC_HOST_COMPILER_PATH +
+         ' -I .' + opt + includes + ' -c ' + srcs + out)
+
+  # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+  # Need to investigate and fix.
+  cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+  if log: Log(cmd)
+  return os.system(cmd)
+
+
+def main():
+  # ignore PWD env var
+  os.environ['PWD']=''
+
+  parser = ArgumentParser()
+  parser.add_argument('-x', nargs=1)
+  parser.add_argument('--rocm_log', action='store_true')
+  parser.add_argument('-pass-exit-codes', action='store_true')
+  args, leftover = parser.parse_known_args(sys.argv[1:])
+
+  if args.x and args.x[0] == 'rocm':
+    if args.rocm_log: Log('-x rocm')
+    leftover = [pipes.quote(s) for s in leftover]
+    if args.rocm_log: Log('using hipcc')
+    return InvokeHipcc(leftover, log=args.rocm_log)
+
+  # XXX use hipcc to link
+  if args.pass_exit_codes:
+    gpu_compiler_flags = [flag for flag in sys.argv[1:]
+                               if not flag.startswith(('-pass-exit-codes'))]
+
+    # special handling for $ORIGIN
+    # - guard every argument with ''
+    modified_gpu_compiler_flags = []
+    for flag in gpu_compiler_flags:
+      modified_gpu_compiler_flags.append("'" + flag + "'")
+
+    if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
+    return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
+
+  # Strip our flags before passing through to the CPU compiler for files which
+  # are not -x rocm. We can't just pass 'leftover' because it also strips -x.
+  # We not only want to pass -x to the CPU compiler, but also keep it in its
+  # relative location in the argv list (the compiler is actually sensitive to
+  # this).
+  cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                             if not flag.startswith(('--rocm_log'))]
+
+  # XXX: SE codes need to be built with gcc, but need this macro defined
+  cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
+
+  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
--- a/third_party/gpus/rocm/BUILD
+++ b/third_party/gpus/rocm/BUILD
--- a/third_party/gpus/rocm/BUILD.tpl
+++ b/third_party/gpus/rocm/BUILD.tpl
@ -0,0 +1,99 @@
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_hipcc",
+    values = {
+        "define": "using_rocm_hipcc=true",
+    },
+)
+
+cc_library(
+    name = "rocm_headers",
+    hdrs = [
+        "rocm/rocm_config.h",
+        %{rocm_headers}
+    ],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hip",
+    srcs = ["rocm/lib/%{hip_lib}"],
+    data = ["rocm/lib/%{hip_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocblas",
+    srcs = ["rocm/lib/%{rocblas_lib}"],
+    data = ["rocm/lib/%{rocblas_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocfft",
+    srcs = ["rocm/lib/%{rocfft_lib}"],
+    data = ["rocm/lib/%{rocfft_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "hiprand",
+    srcs = ["rocm/lib/%{hiprand_lib}"],
+    data = ["rocm/lib/%{hiprand_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+        "rocm/include/rocrand",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "miopen",
+    srcs = ["rocm/lib/%{miopen_lib}"],
+    data = ["rocm/lib/%{miopen_lib}"],
+    includes = [
+        ".",
+        "rocm/include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "rocm",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":rocm_headers",
+        ":hip",
+        ":rocblas",
+        ":rocfft",
+        ":hiprand",
+        ":miopen",
+    ],
+)
+
+%{rocm_include_genrules}
--- a/third_party/gpus/rocm/build_defs.bzl.tpl
+++ b/third_party/gpus/rocm/build_defs.bzl.tpl
@ -0,0 +1,45 @@
+# Macros for building ROCm code.
+def if_rocm(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with ROCm.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with ROCm enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_rocm//rocm:using_hipcc": if_true,
+        "//conditions:default": if_false
+    })
+
+
+def rocm_default_copts():
+    """Default options for all ROCm compilations."""
+    return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
+
+def rocm_copts(opts = []):
+    """Gets the appropriate set of copts for (maybe) ROCm compilation.
+
+      If we're doing ROCm compilation, returns copts for our particular ROCm
+      compiler.  If we're not doing ROCm compilation, returns an empty list.
+
+      """
+    return rocm_default_copts() + select({
+        "//conditions:default": [],
+        "@local_config_rocm//rocm:using_hipcc": ([
+            "",
+        ]),
+    }) + if_rocm_is_configured(opts)
+
+def rocm_is_configured():
+    """Returns true if ROCm was enabled during the configure process."""
+    return %{rocm_is_configured}
+
+def if_rocm_is_configured(x):
+    """Tests if the ROCm was enabled during the configure process.
+
+    Unlike if_rocm(), this does not require that we are building with
+    --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
+    """
+    if rocm_is_configured():
+      return x
+    return []
--- a/third_party/gpus/rocm/rocm_config.h.tpl
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@ -0,0 +1,21 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef ROCM_ROCM_CONFIG_H_
+#define ROCM_ROCM_CONFIG_H_
+
+#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+
+#endif  // ROCM_ROCM_CONFIG_H_
--- a/third_party/gpus/rocm_configure.bzl
+++ b/third_party/gpus/rocm_configure.bzl
@ -0,0 +1,784 @@
+# -*- Python -*-
+"""Repository rule for ROCm autoconfiguration.
+
+`rocm_configure` depends on the following environment variables:
+
+  * `TF_NEED_ROCM`: Whether to enable building with ROCm.
+  * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
+  * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
+    `/opt/rocm`.
+  * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
+    use the system default.
+  * `TF_MIOPEN_VERSION`: The version of the MIOpen library.
+  * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
+    `gfx803,gfx900`.
+"""
+
+_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_TF_ROCM_VERSION = "TF_ROCM_VERSION"
+_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
+_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
+_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
+
+_DEFAULT_ROCM_VERSION = ""
+_DEFAULT_MIOPEN_VERSION = ""
+_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
+_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
+
+def find_cc(repository_ctx):
+    """Find the C++ compiler."""
+
+    # Return a dummy value for GCC detection here to avoid error
+    target_cc_name = "gcc"
+    cc_path_envvar = _GCC_HOST_COMPILER_PATH
+    cc_name = target_cc_name
+
+    if cc_path_envvar in repository_ctx.os.environ:
+        cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
+        if cc_name_from_env:
+            cc_name = cc_name_from_env
+    if cc_name.startswith("/"):
+        # Absolute path, maybe we should make this supported by our which function.
+        return cc_name
+    cc = repository_ctx.which(cc_name)
+    if cc == None:
+        fail(("Cannot find {}, either correct your path or set the {}" +
+              " environment variable").format(target_cc_name, cc_path_envvar))
+    return cc
+
+_INC_DIR_MARKER_BEGIN = "#include <...>"
+
+def _cxx_inc_convert(path):
+    """Convert path returned by cc -E xc++ in a complete path."""
+    path = path.strip()
+    return path
+
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+    """Compute the list of default C or C++ include directories."""
+    if lang_is_cpp:
+        lang = "c++"
+    else:
+        lang = "c"
+
+    # TODO: We pass -no-canonical-prefixes here to match the compiler flags,
+    #       but in rocm_clang CROSSTOOL file that is a `feature` and we should
+    #       handle the case when it's disabled and no flag is passed
+    result = repository_ctx.execute([
+        cc,
+        "-no-canonical-prefixes",
+        "-E",
+        "-x" + lang,
+        "-",
+        "-v",
+    ])
+    index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
+    if index1 == -1:
+        return []
+    index1 = result.stderr.find("\n", index1)
+    if index1 == -1:
+        return []
+    index2 = result.stderr.rfind("\n ")
+    if index2 == -1 or index2 < index1:
+        return []
+    index2 = result.stderr.find("\n", index2 + 1)
+    if index2 == -1:
+        inc_dirs = result.stderr[index1 + 1:]
+    else:
+        inc_dirs = result.stderr[index1 + 1:index2].strip()
+
+    return [
+        str(repository_ctx.path(_cxx_inc_convert(p)))
+        for p in inc_dirs.split("\n")
+    ]
+
+def get_cxx_inc_directories(repository_ctx, cc):
+    """Compute the list of default C and C++ include directories."""
+
+    # For some reason `clang -xc` sometimes returns include paths that are
+    # different from the ones from `clang -xc++`. (Symlink and a dir)
+    # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
+    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
+    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+
+    includes_cpp_set = depset(includes_cpp)
+    return includes_cpp + [
+        inc
+        for inc in includes_c
+        if inc not in includes_cpp_set
+    ]
+
+def auto_configure_fail(msg):
+    """Output failure message when rocm configuration fails."""
+    red = "\033[0;31m"
+    no_color = "\033[0m"
+    fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
+
+# END cc_configure common functions (see TODO above).
+
+def _host_compiler_includes(repository_ctx, cc):
+    """Generates the cxx_builtin_include_directory entries for gcc inc dirs.
+
+    Args:
+      repository_ctx: The repository context.
+      cc: The path to the gcc host compiler.
+
+    Returns:
+      A string containing the cxx_builtin_include_directory for each of the gcc
+      host compiler include directories, which can be added to the CROSSTOOL
+      file.
+    """
+    inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
+
+    # Add numpy headers
+    inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
+
+    entries = []
+    for inc_dir in inc_dirs:
+        entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+
+    # define TENSORFLOW_USE_ROCM
+    entries.append("  unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
+
+    return "\n".join(entries)
+
+def _rocm_include_path(repository_ctx, rocm_config):
+    """Generates the cxx_builtin_include_directory entries for rocm inc dirs.
+
+    Args:
+      repository_ctx: The repository context.
+      cc: The path to the gcc host compiler.
+
+    Returns:
+      A string containing the cxx_builtin_include_directory for each of the gcc
+      host compiler include directories, which can be added to the CROSSTOOL
+      file.
+    """
+    inc_dirs = []
+
+    # general ROCm include path
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
+
+    # Add HSA headers
+    inc_dirs.append("/opt/rocm/hsa/include")
+
+    # Add HIP headers
+    inc_dirs.append("/opt/rocm/include/hip")
+    inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
+
+    # Add rocrand and hiprand headers
+    inc_dirs.append("/opt/rocm/rocrand/include")
+    inc_dirs.append("/opt/rocm/hiprand/include")
+
+    # Add rocfft headers
+    inc_dirs.append("/opt/rocm/rocfft/include")
+
+    # Add rocBLAS headers
+    inc_dirs.append("/opt/rocm/rocblas/include")
+
+    # Add MIOpen headers
+    inc_dirs.append("/opt/rocm/miopen/include")
+
+    # Add hcc headers
+    inc_dirs.append("/opt/rocm/hcc/include")
+    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
+    inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+
+    # Newer hcc builds use/are based off of clang 8.0.0.
+    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
+    inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+
+    inc_entries = []
+    for inc_dir in inc_dirs:
+        inc_entries.append("  cxx_builtin_include_directory: \"%s\"" % inc_dir)
+    return "\n".join(inc_entries)
+
+def _enable_rocm(repository_ctx):
+    if "TF_NEED_ROCM" in repository_ctx.os.environ:
+        enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
+        return enable_rocm == "1"
+    return False
+
+def _rocm_toolkit_path(repository_ctx):
+    """Finds the rocm toolkit directory.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A speculative real path of the rocm toolkit install directory.
+    """
+    rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
+    if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
+        rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
+    if not repository_ctx.path(rocm_toolkit_path).exists:
+        auto_configure_fail("Cannot find rocm toolkit path.")
+    return str(repository_ctx.path(rocm_toolkit_path).realpath)
+
+def _amdgpu_targets(repository_ctx):
+    """Returns a list of strings representing AMDGPU targets."""
+    if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
+        return _DEFAULT_ROCM_AMDGPU_TARGETS
+    amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
+    amdgpu_targets = amdgpu_targets_str.split(",")
+    for amdgpu_target in amdgpu_targets:
+        if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
+            auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
+    return amdgpu_targets
+
+def _cpu_value(repository_ctx):
+    """Returns the name of the host operating system.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A string containing the name of the host operating system.
+    """
+    os_name = repository_ctx.os.name.lower()
+    if os_name.startswith("mac os"):
+        return "Darwin"
+    if os_name.find("windows") != -1:
+        return "Windows"
+    result = repository_ctx.execute(["uname", "-s"])
+    return result.stdout.strip()
+
+def _lib_name(lib, cpu_value, version = "", static = False):
+    """Constructs the platform-specific name of a library.
+
+    Args:
+      lib: The name of the library, such as "hip"
+      cpu_value: The name of the host operating system.
+      version: The version of the library.
+      static: True the library is static or False if it is a shared object.
+
+    Returns:
+      The platform-specific name of the library.
+    """
+    if cpu_value in ("Linux"):
+        if static:
+            return "lib%s.a" % lib
+        else:
+            if version:
+                version = ".%s" % version
+            return "lib%s.so%s" % (lib, version)
+    elif cpu_value == "Windows":
+        return "%s.lib" % lib
+    elif cpu_value == "Darwin":
+        if static:
+            return "lib%s.a" % lib
+        elif version:
+            version = ".%s" % version
+        return "lib%s%s.dylib" % (lib, version)
+    else:
+        auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
+
+def _find_rocm_lib(
+        lib,
+        repository_ctx,
+        cpu_value,
+        basedir,
+        version = "",
+        static = False):
+    """Finds the given ROCm libraries on the system.
+
+    Args:
+      lib: The name of the library, such as "hip"
+      repository_ctx: The repository context.
+      cpu_value: The name of the host operating system.
+      basedir: The install directory of ROCm.
+      version: The version of the library.
+      static: True if static library, False if shared object.
+
+    Returns:
+      Returns a struct with the following fields:
+        file_name: The basename of the library found on the system.
+        path: The full path to the library.
+    """
+    file_name = _lib_name(lib, cpu_value, version, static)
+    if cpu_value == "Linux":
+        path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+        path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+        path = repository_ctx.path(
+            "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name),
+        )
+        if path.exists:
+            return struct(file_name = file_name, path = str(path.realpath))
+
+    path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
+    if path.exists:
+        return struct(file_name = file_name, path = str(path.realpath))
+    path = repository_ctx.path("%s/%s" % (basedir, file_name))
+    if path.exists:
+        return struct(file_name = file_name, path = str(path.realpath))
+
+    auto_configure_fail("Cannot find rocm library %s" % file_name)
+
+def _find_libs(repository_ctx, rocm_config):
+    """Returns the ROCm libraries on the system.
+
+    Args:
+      repository_ctx: The repository context.
+      rocm_config: The ROCm config as returned by _get_rocm_config
+
+    Returns:
+      Map of library names to structs of filename and path as returned by
+      _find_rocm_lib.
+    """
+    cpu_value = rocm_config.cpu_value
+    return {
+        "hip": _find_rocm_lib(
+            "hip_hcc",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path,
+        ),
+        "rocblas": _find_rocm_lib(
+            "rocblas",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/rocblas",
+        ),
+        "rocfft": _find_rocm_lib(
+            "rocfft",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/rocfft",
+        ),
+        "hiprand": _find_rocm_lib(
+            "hiprand",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/hiprand",
+        ),
+        "miopen": _find_rocm_lib(
+            "MIOpen",
+            repository_ctx,
+            cpu_value,
+            rocm_config.rocm_toolkit_path + "/miopen",
+        ),
+    }
+
+def _get_rocm_config(repository_ctx):
+    """Detects and returns information about the ROCm installation on the system.
+
+    Args:
+      repository_ctx: The repository context.
+
+    Returns:
+      A struct containing the following fields:
+        rocm_toolkit_path: The ROCm toolkit installation directory.
+        amdgpu_targets: A list of the system's AMDGPU targets.
+        cpu_value: The name of the host operating system.
+    """
+    cpu_value = _cpu_value(repository_ctx)
+    rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
+    return struct(
+        rocm_toolkit_path = rocm_toolkit_path,
+        amdgpu_targets = _amdgpu_targets(repository_ctx),
+        cpu_value = cpu_value,
+    )
+
+def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
+    if not out:
+        out = tpl.replace(":", "/")
+    repository_ctx.template(
+        out,
+        Label("//third_party/gpus/%s.tpl" % tpl),
+        substitutions,
+    )
+
+def _file(repository_ctx, label):
+    repository_ctx.template(
+        label.replace(":", "/"),
+        Label("//third_party/gpus/%s.tpl" % label),
+        {},
+    )
+
+_DUMMY_CROSSTOOL_BZL_FILE = """
+def error_gpu_disabled():
+  fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
+       "to build with GPU support. Please re-run ./configure and enter 'Y' " +
+       "at the prompt to build with GPU support.")
+
+  native.genrule(
+      name = "error_gen_crosstool",
+      outs = ["CROSSTOOL"],
+      cmd = "echo 'Should not be run.' && exit 1",
+  )
+
+  native.filegroup(
+      name = "crosstool",
+      srcs = [":CROSSTOOL"],
+      output_licenses = ["unencumbered"],
+  )
+"""
+
+_DUMMY_CROSSTOOL_BUILD_FILE = """
+load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
+
+error_gpu_disabled()
+"""
+
+def _create_dummy_repository(repository_ctx):
+    cpu_value = _cpu_value(repository_ctx)
+
+    # Set up BUILD file for rocm/.
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "False",
+            "%{rocm_extra_copts}": "[]",
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:BUILD",
+        {
+            "%{hip_lib}": _lib_name("hip", cpu_value),
+            "%{rocblas_lib}": _lib_name("rocblas", cpu_value),
+            "%{miopen_lib}": _lib_name("miopen", cpu_value),
+            "%{rocfft_lib}": _lib_name("rocfft", cpu_value),
+            "%{hiprand_lib}": _lib_name("hiprand", cpu_value),
+            "%{rocm_include_genrules}": "",
+            "%{rocm_headers}": "",
+        },
+    )
+
+    # Create dummy files for the ROCm toolkit since they are still required by
+    # tensorflow/core/platform/default/build_config:rocm.
+    repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
+
+    # Set up rocm_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "rocm:rocm_config.h",
+        {
+            "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
+        },
+        "rocm/rocm/rocm_config.h",
+    )
+
+    # If rocm_configure is not configured to build with GPU support, and the user
+    # attempts to build with --config=rocm, add a dummy build rule to intercept
+    # this and fail with an actionable error message.
+    repository_ctx.file(
+        "crosstool/error_gpu_disabled.bzl",
+        _DUMMY_CROSSTOOL_BZL_FILE,
+    )
+    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
+
+def _execute(
+        repository_ctx,
+        cmdline,
+        error_msg = None,
+        error_details = None,
+        empty_stdout_fine = False):
+    """Executes an arbitrary shell command.
+
+    Args:
+      repository_ctx: the repository_ctx object
+      cmdline: list of strings, the command to execute
+      error_msg: string, a summary of the error if the command fails
+      error_details: string, details about the error or steps to fix it
+      empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
+        it's an error
+    Return:
+      the result of repository_ctx.execute(cmdline)
+    """
+    result = repository_ctx.execute(cmdline)
+    if result.stderr or not (empty_stdout_fine or result.stdout):
+        auto_configure_fail(
+            "\n".join([
+                error_msg.strip() if error_msg else "Repository command failed",
+                result.stderr.strip(),
+                error_details if error_details else "",
+            ]),
+        )
+    return result
+
+def _norm_path(path):
+    """Returns a path with '/' and remove the trailing slash."""
+    path = path.replace("\\", "/")
+    if path[-1] == "/":
+        path = path[:-1]
+    return path
+
+def _symlink_genrule_for_dir(
+        repository_ctx,
+        src_dir,
+        dest_dir,
+        genrule_name,
+        src_files = [],
+        dest_files = []):
+    """Returns a genrule to symlink(or copy if on Windows) a set of files.
+
+    If src_dir is passed, files will be read from the given directory; otherwise
+    we assume files are in src_files and dest_files
+    """
+    if src_dir != None:
+        src_dir = _norm_path(src_dir)
+        dest_dir = _norm_path(dest_dir)
+        files = _read_dir(repository_ctx, src_dir)
+
+        # Create a list with the src_dir stripped to use for outputs.
+        dest_files = files.replace(src_dir, "").splitlines()
+        src_files = files.splitlines()
+    command = []
+
+    # We clear folders that might have been generated previously to avoid
+    # undesired inclusions
+    command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
+    command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
+    outs = []
+    for i in range(len(dest_files)):
+        if dest_files[i] != "":
+            # If we have only one file to link we do not want to use the dest_dir, as
+            # $(@D) will include the full path to the file.
+            dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
+
+            # On Windows, symlink is not supported, so we just copy all the files.
+            cmd = "ln -s"
+            command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
+            outs.append('        "' + dest_dir + dest_files[i] + '",')
+    genrule = _genrule(
+        src_dir,
+        genrule_name,
+        " && ".join(command),
+        "\n".join(outs),
+    )
+    return genrule
+
+def _genrule(src_dir, genrule_name, command, outs):
+    """Returns a string with a genrule.
+
+    Genrule executes the given command and produces the given outputs.
+    """
+    return (
+        "genrule(\n" +
+        '    name = "' +
+        genrule_name + '",\n' +
+        "    outs = [\n" +
+        outs +
+        "\n    ],\n" +
+        '    cmd = """\n' +
+        command +
+        '\n   """,\n' +
+        ")\n"
+    )
+
+def _read_dir(repository_ctx, src_dir):
+    """Returns a string with all files in a directory.
+
+    Finds all files inside a directory, traversing subfolders and following
+    symlinks. The returned string contains the full path of all files
+    separated by line breaks.
+    """
+    find_result = _execute(
+        repository_ctx,
+        ["find", src_dir, "-follow", "-type", "f"],
+        empty_stdout_fine = True,
+    )
+    result = find_result.stdout
+    return result
+
+def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
+    if False:
+        amdgpu_target_flags = ["--amdgpu-target=" +
+                               amdgpu_target for amdgpu_target in amdgpu_targets]
+    else:
+        # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
+        amdgpu_target_flags = []
+    return str(amdgpu_target_flags)
+
+def _create_local_rocm_repository(repository_ctx):
+    """Creates the repository containing files set up to build with ROCm."""
+    rocm_config = _get_rocm_config(repository_ctx)
+
+    # Set up symbolic links for the rocm toolkit by creating genrules to do
+    # symlinking. We create one genrule for each directory we want to track under
+    # rocm_toolkit_path
+    rocm_toolkit_path = rocm_config.rocm_toolkit_path
+    rocm_include_path = rocm_toolkit_path + "/include"
+    genrules = [_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_include_path,
+        "rocm/include",
+        "rocm-include",
+    )]
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/rocfft/include",
+        "rocm/include/rocfft",
+        "rocfft-include",
+    ))
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/rocblas/include",
+        "rocm/include/rocblas",
+        "rocblas-include",
+    ))
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        rocm_toolkit_path + "/miopen/include",
+        "rocm/include/miopen",
+        "miopen-include",
+    ))
+
+    rocm_libs = _find_libs(repository_ctx, rocm_config)
+    rocm_lib_src = []
+    rocm_lib_dest = []
+    for lib in rocm_libs.values():
+        rocm_lib_src.append(lib.path)
+        rocm_lib_dest.append("rocm/lib/" + lib.file_name)
+    genrules.append(_symlink_genrule_for_dir(
+        repository_ctx,
+        None,
+        "",
+        "rocm-lib",
+        rocm_lib_src,
+        rocm_lib_dest,
+    ))
+
+    included_files = _read_dir(repository_ctx, rocm_include_path).replace(
+        rocm_include_path,
+        "",
+    ).splitlines()
+
+    # Set up BUILD file for rocm/
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "True",
+            "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+                repository_ctx,
+                rocm_config.amdgpu_targets,
+            ),
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:BUILD",
+        {
+            "%{hip_lib}": rocm_libs["hip"].file_name,
+            "%{rocblas_lib}": rocm_libs["rocblas"].file_name,
+            "%{rocfft_lib}": rocm_libs["rocfft"].file_name,
+            "%{hiprand_lib}": rocm_libs["hiprand"].file_name,
+            "%{miopen_lib}": rocm_libs["miopen"].file_name,
+            "%{rocm_include_genrules}": "\n".join(genrules),
+            "%{rocm_headers}": ('":rocm-include",\n' +
+                                '":rocfft-include",\n' +
+                                '":rocblas-include",\n' +
+                                '":miopen-include",'),
+        },
+    )
+
+    # Set up crosstool/
+    _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
+    cc = find_cc(repository_ctx)
+    host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
+    rocm_defines = {
+        "%{rocm_include_path}": _rocm_include_path(
+            repository_ctx,
+            rocm_config,
+        ),
+        "%{host_compiler_includes}": host_compiler_includes,
+        "%{clang_path}": str(cc),
+    }
+
+    _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out = "crosstool/CROSSTOOL")
+
+    _tpl(
+        repository_ctx,
+        "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
+        {
+            "%{cpu_compiler}": str(cc),
+            "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+            "%{gcc_host_compiler_path}": str(cc),
+            "%{rocm_amdgpu_targets}": ",".join(
+                ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+            ),
+        },
+    )
+
+    # Set up rocm_config.h, which is used by
+    # tensorflow/stream_executor/dso_loader.cc.
+    _tpl(
+        repository_ctx,
+        "rocm:rocm_config.h",
+        {
+            "%{rocm_amdgpu_targets}": ",".join(
+                ["\"%s\"" % c for c in rocm_config.amdgpu_targets],
+            ),
+            "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
+        },
+        "rocm/rocm/rocm_config.h",
+    )
+
+def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
+    """Creates pointers to a remotely configured repo set up to build with ROCm."""
+    _tpl(
+        repository_ctx,
+        "rocm:build_defs.bzl",
+        {
+            "%{rocm_is_configured}": "True",
+            "%{rocm_extra_copts}": _compute_rocm_extra_copts(
+                repository_ctx,  #_compute_capabilities(repository_ctx)
+            ),
+        },
+    )
+    _tpl(
+        repository_ctx,
+        "rocm:remote.BUILD",
+        {
+            "%{remote_rocm_repo}": remote_config_repo,
+        },
+        "rocm/BUILD",
+    )
+    _tpl(repository_ctx, "crosstool:remote.BUILD", {
+        "%{remote_rocm_repo}": remote_config_repo,
+    }, "crosstool/BUILD")
+
+def _rocm_autoconf_impl(repository_ctx):
+    """Implementation of the rocm_autoconf repository rule."""
+    if not _enable_rocm(repository_ctx):
+        _create_dummy_repository(repository_ctx)
+    elif _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
+        _create_remote_rocm_repository(
+            repository_ctx,
+            repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO],
+        )
+    else:
+        _create_local_rocm_repository(repository_ctx)
+
+rocm_configure = repository_rule(
+    implementation = _rocm_autoconf_impl,
+    environ = [
+        _GCC_HOST_COMPILER_PATH,
+        "TF_NEED_ROCM",
+        _ROCM_TOOLKIT_PATH,
+        _TF_ROCM_VERSION,
+        _TF_MIOPEN_VERSION,
+        _TF_ROCM_AMDGPU_TARGETS,
+        _TF_ROCM_CONFIG_REPO,
+    ],
+)
+
+"""Detects and configures the local ROCm toolchain.
+
+Add the following to your WORKSPACE FILE:
+
+```python
+rocm_configure(name = "local_config_rocm")
+```
+
+Args:
+  name: A unique name for this workspace rule.
+"""
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@ -43,6 +43,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true

+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
 build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true