Merge pull request #20277 from ROCmSoftwarePlatform:upstream-staging

PiperOrigin-RevId: 214793113
This commit is contained in:
TensorFlower Gardener 2018-09-27 10:22:55 -07:00
commit 62e60166de
32 changed files with 1731 additions and 18 deletions

View File

@ -1540,6 +1540,13 @@ def main():
else:
set_trisycl_include_dir(environ_cp)
set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
if (environ_cp.get('TF_NEED_ROCM') == '1' and
'LD_LIBRARY_PATH' in environ_cp and
environ_cp.get('LD_LIBRARY_PATH') != '1'):
write_action_env_to_bazelrc('LD_LIBRARY_PATH',
environ_cp.get('LD_LIBRARY_PATH'))
set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
if (environ_cp.get('TF_NEED_CUDA') == '1' and
'TF_CUDA_CONFIG_REPO' not in environ_cp):
@ -1580,6 +1587,19 @@ def main():
write_to_bazelrc('build --config=download_clang')
write_to_bazelrc('test --config=download_clang')
# SYCL / ROCm / CUDA are mutually exclusive.
# At most 1 GPU platform can be configured.
gpu_platform_count = 0
if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
gpu_platform_count += 1
if environ_cp.get('TF_NEED_ROCM') == '1':
gpu_platform_count += 1
if environ_cp.get('TF_NEED_CUDA') == '1':
gpu_platform_count += 1
if gpu_platform_count >= 2:
raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
'At most 1 GPU platform can be configured.')
set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
if environ_cp.get('TF_NEED_MPI') == '1':
set_mpi_home(environ_cp)

View File

@ -149,6 +149,7 @@ load(
"tf_cuda_tests_tags",
)
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
load(
"//third_party/mkl:build_defs.bzl",
@ -3006,7 +3007,7 @@ tf_cuda_library(
"platform/device_tracer.h",
],
copts = tf_copts(),
cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
visibility = ["//visibility:private"],
deps = [
":core_cpu_internal",

View File

@ -19,6 +19,14 @@ load(
"@local_config_cuda//cuda:build_defs.bzl",
"cuda_default_copts",
"if_cuda",
"if_cuda_is_configured",
)
load(
"@local_config_rocm//rocm:build_defs.bzl",
"if_rocm",
"if_rocm_is_configured",
"rocm_copts",
"rocm_default_copts",
)
load(
"//third_party/mkl:build_defs.bzl",
@ -39,6 +47,8 @@ load(
def register_extension_info(**kwargs):
pass
# if_cuda_is_configured def placeholder
# Given a source file, generate a test name.
# i.e. "common_runtime/direct_session_test.cc" becomes
# "common_runtime_direct_session_test"
@ -863,12 +873,16 @@ def tf_cuda_only_cc_test(
srcs = srcs + tf_binary_additional_srcs(),
size = size,
args = args,
copts = _cuda_copts() + tf_copts(),
copts = _cuda_copts() + rocm_copts() + tf_copts(),
data = data + tf_binary_dynamic_kernel_dsos(kernels),
deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
clean_dep("//tensorflow/core:cuda"),
clean_dep("//tensorflow/core:gpu_lib"),
]),
deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
if_cuda_is_configured([
clean_dep("//tensorflow/core:cuda"),
clean_dep("//tensorflow/core:gpu_lib"),
]) +
if_rocm_is_configured([
clean_dep("//tensorflow/core:gpu_lib"),
]),
linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
linkstatic = linkstatic or select({
# cc_tests with ".so"s in srcs incorrectly link on Darwin
@ -1003,7 +1017,7 @@ register_extension_info(
label_regex_for_dep = "{extension_name}",
)
def _cuda_copts():
def _cuda_copts(opts = []):
"""Gets the appropriate set of copts for (maybe) CUDA compilation.
If we're doing CUDA compilation, returns copts for our particular CUDA
@ -1019,13 +1033,17 @@ def _cuda_copts():
"@local_config_cuda//cuda:using_clang": ([
"-fcuda-flush-denormals-to-zero",
]),
})
}) + if_cuda_is_configured(opts)
# Build defs for TensorFlow kernels
# When this target is built using --config=cuda, a cc_library is built
# that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
# libraries needed by GPU kernels.
#
# When this target is built using --config=rocm, a cc_library is built
# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
# libraries needed by GPU kernels.
def tf_gpu_kernel_library(
srcs,
copts = [],
@ -1033,16 +1051,18 @@ def tf_gpu_kernel_library(
deps = [],
hdrs = [],
**kwargs):
copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + rocm_copts(opts = cuda_copts)
kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
native.cc_library(
srcs = srcs,
hdrs = hdrs,
copts = copts,
deps = deps + if_cuda([
deps = deps + if_cuda_is_configured([
clean_dep("//tensorflow/core:cuda"),
clean_dep("//tensorflow/core:gpu_lib"),
]) + if_rocm_is_configured([
clean_dep("//tensorflow/core:gpu_lib"),
]),
alwayslink = 1,
**kwargs
@ -1081,8 +1101,10 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
deps = deps + if_cuda(cuda_deps + [
clean_dep("//tensorflow/core:cuda"),
"@local_config_cuda//cuda:cuda_headers",
]) + if_rocm_is_configured(cuda_deps + [
# rocm_header placeholder
]),
copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
if_enable_mkl(["-DENABLE_MKL"]) +
if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
@ -1465,6 +1487,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
"@local_config_cuda//cuda:cuda_headers",
"@local_config_cuda//cuda:cudart_static",
]
rocm_deps = [
clean_dep("//tensorflow/core:stream_executor_headers_lib"),
]
deps = deps + tf_custom_op_library_additional_deps()
if gpu_srcs:
basename = name.split(".")[0]
@ -1473,13 +1498,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
srcs = gpu_srcs,
copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
features = if_cuda(["-use_header_modules"]),
deps = deps + if_cuda(cuda_deps),
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
)
cuda_deps.extend([":" + basename + "_gpu"])
rocm_deps.extend([":" + basename + "_gpu"])
check_deps(
name = name + "_check_deps",
deps = deps + if_cuda(cuda_deps),
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
disallowed_deps = [
clean_dep("//tensorflow/core:framework"),
clean_dep("//tensorflow/core:lib"),
@ -1488,7 +1514,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
tf_cc_shared_object(
name = name,
srcs = srcs,
deps = deps + if_cuda(cuda_deps),
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
data = if_static([name + "_check_deps"]),
copts = tf_copts(is_external = True),
features = ["windows_export_all_symbols"],

View File

@ -0,0 +1,97 @@
# This Dockerfile provides a starting point for a ROCm installation of
# MIOpen and tensorflow.
FROM ubuntu:xenial
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
ARG ROCM_PATH=/opt/rocm
ENV DEBIAN_FRONTEND noninteractive
ENV TF_NEED_ROCM 1
ENV HOME /root/
RUN apt update && apt install -y wget software-properties-common
# Add rocm repository
RUN apt-get clean all
RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
# Install misc pkgs
RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
clang-3.8 \
clang-format-3.8 \
clang-tidy-3.8 \
cmake \
cmake-qt-gui \
ssh \
curl \
apt-utils \
pkg-config \
g++-multilib \
git \
libunwind-dev \
libfftw3-dev \
libelf-dev \
libncurses5-dev \
libpthread-stubs0-dev \
vim \
gfortran \
libboost-program-options-dev \
libssl-dev \
libboost-dev \
libboost-system-dev \
libboost-filesystem-dev \
rpm \
libnuma-dev \
virtualenv \
python-pip \
python3-pip \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install rocm pkgs
RUN apt-get update --allow-insecure-repositories && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
rocm-dev rocm-libs rocm-utils \
rocfft miopen-hip miopengemm rocblas hipblas rocrand \
rocm-profiler cxlactivitylogger && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
ENV HCC_HOME=$ROCM_PATH/hcc
ENV HIP_PATH=$ROCM_PATH/hip
ENV OPENCL_ROOT=$ROCM_PATH/opencl
ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
ENV PATH="$ROCM_PATH/bin:${PATH}"
ENV PATH="$OPENCL_ROOT/bin:${PATH}"
# Add target file to help determine which device(s) to build for
RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
# Setup environment variables, and add those environment variables at the end of ~/.bashrc
ARG HCC_HOME=/opt/rocm/hcc
ARG HIP_PATH=/opt/rocm/hip
ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
# Copy and run the install scripts.
COPY install/*.sh /install/
ARG DEBIAN_FRONTEND=noninteractive
RUN /install/install_bootstrap_deb_packages.sh
RUN add-apt-repository -y ppa:openjdk-r/ppa && \
add-apt-repository -y ppa:george-edison55/cmake-3.x
RUN /install/install_deb_packages.sh
RUN /install/install_pip_packages.sh
RUN /install/install_bazel.sh
RUN /install/install_golang.sh
# Set up the master bazelrc configuration file.
COPY install/.bazelrc /etc/bazel.bazelrc
# Configure the build for our CUDA configuration.
ENV TF_NEED_ROCM 1

View File

@ -19,7 +19,7 @@
#
# Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
# Arguments:
# IMAGE_TYPE : Type of the image: (CPU|GPU)
# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
# TAG : Docker image tag
# WHL_PATH : Path to the whl file to be installed inside the docker image
#
@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
DOCKERFILE="tensorflow/tools/docker/Dockerfile"
elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
else
die "Unrecognized image type: $1"
fi
@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
GPU_EXTRA_PARAMS="${devices} ${libs}"
elif [ "${IMAGE_TYPE}" == "rocm" ]; then
ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
else
GPU_EXTRA_PARAMS=""
ROCM_EXTRA_PARAMS=""
fi
# Run docker image with source directory mapped
docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
${GPU_EXTRA_PARAMS} \
${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
"${DOCKER_IMG_TAG}" \
/bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
"tensorflow/tools/ci_build/builds/test_tutorials.sh && "\

View File

@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
GPU_FLAG=""
if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
[[ ${CONTAINER_TYPE} == "rocm" ]] || \
[[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
die "Build failed."
@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
die "ERROR: Cannot find repaired wheel."
fi
# Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
[[ ${CONTAINER_TYPE} == "rocm" ]]; then
WHL_PATH=${AUDITED_WHL_NAME}
cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
echo "Copied manylinx1 wheel file at ${WHL_PATH}"

View File

@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
usermod -a -G sudo "${CI_BUILD_USER}"
echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
# ROCm requires the video group in order to use the GPU for compute. If it
# exists on the host, add it to the container.
getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
fi
if [ -e /root/.bazelrc ]; then
cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"

View File

@ -18,7 +18,7 @@
# <COMMAND>
#
# CONTAINER_TYPE: Type of the docker container used the run the build:
# e.g., (cpu | gpu | android | tensorboard)
# e.g., (cpu | gpu | rocm | android | tensorboard)
#
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
# If this optional value is not supplied (via the
@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
GPU_EXTRA_PARAMS=""
fi
# Add extra params for rocm devices and libraries for ROCm container.
if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
else
ROCM_EXTRA_PARAMS=""
fi
# Determine the docker image name
DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
-v ${WORKSPACE}:/workspace \
-w /workspace \
${GPU_EXTRA_PARAMS} \
${ROCM_EXTRA_PARAMS} \
${CI_DOCKER_EXTRA_PARAMS[@]} \
"${DOCKER_IMG_NAME}" \
${CI_COMMAND_PREFIX[@]} \

View File

@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
# Only running cc tests, python version does not matter.
export PYTHON_BIN_PATH=`which python`

View File

@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python2`
yes "" | $PYTHON_BIN_PATH configure.py

View File

@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python3`
yes "" | $PYTHON_BIN_PATH configure.py

View File

@ -26,6 +26,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=`which python3`
yes "" | $PYTHON_BIN_PATH configure.py

View File

@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
if [ "${TF_NEED_CUDA}" == "1" ]; then
SUFFIX="-gpu-linux-"
fi
if [ "${TF_NEED_ROCM}" == "1" ]; then
SUFFIX="-rocm-linux-"
fi
build_libtensorflow_tarball "${SUFFIX}$(uname -m)"

View File

@ -19,4 +19,5 @@
set -ex
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
"${SCRIPT_DIR}/libtensorflow_docker.sh"

View File

@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
DOCKER_BINARY="nvidia-docker"
DOCKER_FILE="Dockerfile.gpu"
fi
if [ "${TF_NEED_ROCM}" == "1" ]; then
DOCKER_IMAGE="tf-tensorflow-rocm"
DOCKER_BINARY="docker"
DOCKER_FILE="Dockerfile.rocm"
fi
docker build \
-t "${DOCKER_IMAGE}" \
@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
-e "TF_NEED_HDFS=0" \
-e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
-e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
-e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
-e "TF_NEED_OPENCL_SYCL=0" \
"${DOCKER_IMAGE}" \
"/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to build a binary releases of libtensorflow with GPU support.
set -ex
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
export TF_NEED_ROCM=1
"${SCRIPT_DIR}/libtensorflow_docker.sh"

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
echo ""
echo "Bazel will use ${N_JOBS} concurrent job(s)."
echo ""
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
yes "" | $PYTHON_BIN_PATH configure.py
# Run bazel test command. Double test timeouts to avoid flakes.
bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
--test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
--build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
//tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
echo ""
echo "Bazel will use ${N_JOBS} concurrent job(s)."
echo ""
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
yes "" | $PYTHON_BIN_PATH configure.py
# Run bazel test command. Double test timeouts to avoid flakes.
bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
--test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
--build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
//tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...

View File

@ -27,6 +27,7 @@ echo ""
# Run configure.
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export CC_OPT_FLAGS='-mavx'
export PYTHON_BIN_PATH=$(which python2)
yes "" | $PYTHON_BIN_PATH configure.py

View File

@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
export PYTHON_BIN_PATH="/usr/bin/python"
export TF_NEED_HDFS=0
export TF_NEED_CUDA=0
export TF_NEED_ROCM=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_MKL=0
export COMPUTECPP_PATH="/usr/local"

View File

@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
export PYTHON_BIN_PATH="/usr/bin/python"
export TF_NEED_HDFS=0
export TF_NEED_ROCM=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_MKL=0
export COMPUTECPP_PATH="/usr/local"

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to produce binary release of libtensorflow (C API, Java jars etc.).
set -ex
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# See comments at the top of this file for details.
source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
# Configure script
export TF_NEED_ROCM=1
export PYTHON_BIN_PATH="/usr/bin/python"
export TF_NEED_GCP=0
export TF_NEED_HDFS=0
export TF_NEED_CUDA=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_MKL=0
export COMPUTECPP_PATH="/usr/local"
export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"

View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
echo ""
echo "Bazel will use ${N_JOBS} concurrent job(s)."
echo ""
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export TF_NEED_ROCM=1
yes "" | $PYTHON_BIN_PATH configure.py
echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
bazel clean
# Run bazel test command. Double test timeouts to avoid flakes.
bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
--jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
--build_tests_only --test_output=errors --local_test_jobs=1 \
--config=xla -- \
//tensorflow/compiler/...

View File

@ -1,6 +1,7 @@
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
load("//third_party/mkl:build_defs.bzl", "mkl_repository")
@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
sycl_configure(name = "local_config_sycl")
syslibs_configure(name = "local_config_syslibs")
python_configure(name = "local_config_python")
rocm_configure(name = "local_config_rocm")
initialize_third_party()

View File

@ -0,0 +1,158 @@
major_version: "local"
minor_version: ""
default_target_cpu: "same_as_host"
default_toolchain {
cpu: "k8"
toolchain_identifier: "local_linux"
}
default_toolchain {
cpu: "piii"
toolchain_identifier: "local_linux"
}
default_toolchain {
cpu: "arm"
toolchain_identifier: "local_linux"
}
default_toolchain {
cpu: "ppc"
toolchain_identifier: "local_linux"
}
toolchain {
abi_version: "local"
abi_libc_version: "local"
builtin_sysroot: ""
compiler: "compiler"
host_system_name: "local"
needsPic: true
supports_gold_linker: false
supports_incremental_linker: false
supports_fission: false
supports_interface_shared_objects: false
supports_normalizing_ar: false
supports_start_end_lib: false
supports_thin_archives: false
target_libc: "local"
target_cpu: "local"
target_system_name: "local"
toolchain_identifier: "local_linux"
tool_path { name: "ar" path: "/usr/bin/ar" }
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
tool_path { name: "cpp" path: "/usr/bin/cpp" }
tool_path { name: "dwp" path: "/usr/bin/dwp" }
# As part of the TensorFlow release, we place some ROCm-related compilation
# files in @local_config_rocm//crosstool/clang/bin, and this relative
# path, combined with the rest of our Bazel configuration causes our
# compilation to use those files.
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
# Use "-std=c++11" for hipcc. For consistency, force both the host compiler
# and the device compiler to use "-std=c++11".
cxx_flag: "-std=c++11"
linker_flag: "-Wl,-no-as-needed"
linker_flag: "-lstdc++"
#linker_flag: "-B/usr/bin/"
linker_flag: "-B/opt/rocm/hcc/compiler/bin"
%{host_compiler_includes}
tool_path { name: "gcov" path: "/usr/bin/gcov" }
# C(++) compiles invoke the compiler (as that is the one knowing where
# to find libraries), but we provide LD so other rules can invoke the linker.
tool_path { name: "ld" path: "/usr/bin/ld" }
tool_path { name: "nm" path: "/usr/bin/nm" }
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
objcopy_embed_flag: "-I"
objcopy_embed_flag: "binary"
tool_path { name: "objdump" path: "/usr/bin/objdump" }
tool_path { name: "strip" path: "/usr/bin/strip" }
# Anticipated future default.
unfiltered_cxx_flag: "-no-canonical-prefixes"
# Make C++ compilation deterministic. Use linkstamping instead of these
# compiler symbols.
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
# The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
# It needs to be always set when compiling Eigen headers
# (irrespective of whether the source file is being compiled via HIPCC)
# so adding -DEIGEN_USE_HIP as a default CXX flag here
unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
# Security hardening on by default.
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
# We need to undef it before redefining it as some distributions now have
# it enabled by default.
#compiler_flag: "-U_FORTIFY_SOURCE"
#compiler_flag: "-D_FORTIFY_SOURCE=1"
#compiler_flag: "-fstack-protector"
#compiler_flag: "-fPIE"
#linker_flag: "-pie"
#linker_flag: "-Wl,-z,relro,-z,now"
# Enable coloring even if there's no attached terminal. Bazel removes the
# escape sequences if --nocolor is specified. This isn't supported by gcc
# on Ubuntu 14.04.
# compiler_flag: "-fcolor-diagnostics"
# All warnings are enabled. Maybe enable -Werror as well?
compiler_flag: "-Wall"
# Enable a few more warnings that aren't part of -Wall.
compiler_flag: "-Wunused-but-set-parameter"
# But disable some that are problematic.
compiler_flag: "-Wno-free-nonheap-object" # has false positives
# Keep stack frames for debugging, even in opt mode.
compiler_flag: "-fno-omit-frame-pointer"
# Anticipated future default.
linker_flag: "-no-canonical-prefixes"
unfiltered_cxx_flag: "-fno-canonical-system-headers"
# Have gcc return the exit code from ld.
linker_flag: "-pass-exit-codes"
# Stamp the binary with a unique identifier.
linker_flag: "-Wl,--build-id=md5"
linker_flag: "-Wl,--hash-style=gnu"
# Gold linker only? Can we enable this by default?
# linker_flag: "-Wl,--warn-execstack"
# linker_flag: "-Wl,--detect-odr-violations"
# Include directory for ROCm headers.
%{rocm_include_path}
compilation_mode_flags {
mode: DBG
# Enable debug symbols.
compiler_flag: "-g"
}
compilation_mode_flags {
mode: OPT
# No debug symbols.
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
# even generally? However, that can't happen here, as it requires special
# handling in Bazel.
compiler_flag: "-g0"
# Conservative choice for -O
# -O3 can increase binary size and even slow down the resulting binaries.
# Profile first and / or use FDO if you need better performance than this.
compiler_flag: "-O2"
# Disable assertions
compiler_flag: "-DNDEBUG"
# Removal of unused code and data at link time (can this increase binary size in some cases?).
compiler_flag: "-ffunction-sections"
compiler_flag: "-fdata-sections"
linker_flag: "-Wl,--gc-sections"
}
linking_mode_flags { mode: DYNAMIC }
}

View File

@ -0,0 +1,241 @@
#!/usr/bin/env python
"""Crosstool wrapper for compiling ROCm programs.
SYNOPSIS:
crosstool_wrapper_driver_rocm [options passed in by cc_library()
or cc_binary() rule]
DESCRIPTION:
This script is expected to be called by the cc_library() or cc_binary() bazel
rules. When the option "-x rocm" is present in the list of arguments passed
to this script, it invokes the hipcc compiler. Most arguments are passed
as is as a string to --compiler-options of hipcc. When "-x rocm" is not
present, this wrapper invokes gcc with the input arguments as is.
"""
from __future__ import print_function
__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
from argparse import ArgumentParser
import os
import subprocess
import re
import sys
import pipes
# Template values set by rocm_configure.bzl.
CPU_COMPILER = ('%{cpu_compiler}')
GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
HIPCC_PATH = '%{hipcc_path}'
PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
def Log(s):
print('gpus/crosstool: {0}'.format(s))
def GetOptionValue(argv, option):
"""Extract the list of values for option from the argv list.
Args:
argv: A list of strings, possibly the argv passed to main().
option: The option whose value to extract, without the leading '-'.
Returns:
A list of values, either directly following the option,
(eg., -opt val1 val2) or values collected from multiple occurrences of
the option (eg., -opt val1 -opt val2).
"""
parser = ArgumentParser()
parser.add_argument('-' + option, nargs='*', action='append')
args, _ = parser.parse_known_args(argv)
if not args or not vars(args)[option]:
return []
else:
return sum(vars(args)[option], [])
def GetHostCompilerOptions(argv):
"""Collect the -isystem, -iquote, and --sysroot option values from argv.
Args:
argv: A list of strings, possibly the argv passed to main().
Returns:
The string that can be used as the --compiler-options to hipcc.
"""
parser = ArgumentParser()
parser.add_argument('-isystem', nargs='*', action='append')
parser.add_argument('-iquote', nargs='*', action='append')
parser.add_argument('--sysroot', nargs=1)
parser.add_argument('-g', nargs='*', action='append')
parser.add_argument('-fno-canonical-system-headers', action='store_true')
args, _ = parser.parse_known_args(argv)
opts = ''
if args.isystem:
opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
if args.iquote:
opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
if args.g:
opts += ' -g' + ' -g'.join(sum(args.g, []))
#if args.fno_canonical_system_headers:
# opts += ' -fno-canonical-system-headers'
if args.sysroot:
opts += ' --sysroot ' + args.sysroot[0]
return opts
def GetHipccOptions(argv):
"""Collect the -hipcc_options values from argv.
Args:
argv: A list of strings, possibly the argv passed to main().
Returns:
The string that can be passed directly to hipcc.
"""
parser = ArgumentParser()
parser.add_argument('-hipcc_options', nargs='*', action='append')
args, _ = parser.parse_known_args(argv)
if args.hipcc_options:
options = _update_options(sum(args.hipcc_options, []))
return ' '.join(['--'+a for a in options])
return ''
def InvokeHipcc(argv, log=False):
"""Call hipcc with arguments assembled from argv.
Args:
argv: A list of strings, possibly the argv passed to main().
log: True if logging is requested.
Returns:
The return value of calling os.system('hipcc ' + args)
"""
host_compiler_options = GetHostCompilerOptions(argv)
hipcc_compiler_options = GetHipccOptions(argv)
opt_option = GetOptionValue(argv, 'O')
m_options = GetOptionValue(argv, 'm')
m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
include_options = GetOptionValue(argv, 'I')
out_file = GetOptionValue(argv, 'o')
depfiles = GetOptionValue(argv, 'MF')
defines = GetOptionValue(argv, 'D')
defines = ''.join([' -D' + define for define in defines])
undefines = GetOptionValue(argv, 'U')
undefines = ''.join([' -U' + define for define in undefines])
std_options = GetOptionValue(argv, 'std')
hipcc_allowed_std_options = ["c++11"]
std_options = ''.join([' -std=' + define
for define in std_options if define in hipcc_allowed_std_options])
# The list of source files get passed after the -c option. I don't know of
# any other reliable way to just get the list of source files to be compiled.
src_files = GetOptionValue(argv, 'c')
if len(src_files) == 0:
return 1
if len(out_file) != 1:
return 1
opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
else ' -g')
includes = (' -I ' + ' -I '.join(include_options)
if len(include_options) > 0
else '')
# Unfortunately, there are other options that have -c prefix too.
# So allowing only those look like C/C++ files.
src_files = [f for f in src_files if
re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
srcs = ' '.join(src_files)
out = ' -o ' + out_file[0]
hipccopts = ' '
hipccopts += ' ' + hipcc_compiler_options
hipccopts += undefines
hipccopts += defines
hipccopts += std_options
hipccopts += m_options
if depfiles:
# Generate the dependency file
depfile = depfiles[0]
cmd = (HIPCC_PATH + ' ' + hipccopts +
host_compiler_options +
' ' + GCC_HOST_COMPILER_PATH +
' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
if log: Log(cmd)
exit_status = os.system(cmd)
if exit_status != 0:
return exit_status
cmd = (HIPCC_PATH + ' ' + hipccopts +
host_compiler_options + ' -fPIC' +
' ' + GCC_HOST_COMPILER_PATH +
' -I .' + opt + includes + ' -c ' + srcs + out)
# TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
# Need to investigate and fix.
cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
if log: Log(cmd)
return os.system(cmd)
def main():
# ignore PWD env var
os.environ['PWD']=''
parser = ArgumentParser()
parser.add_argument('-x', nargs=1)
parser.add_argument('--rocm_log', action='store_true')
parser.add_argument('-pass-exit-codes', action='store_true')
args, leftover = parser.parse_known_args(sys.argv[1:])
if args.x and args.x[0] == 'rocm':
if args.rocm_log: Log('-x rocm')
leftover = [pipes.quote(s) for s in leftover]
if args.rocm_log: Log('using hipcc')
return InvokeHipcc(leftover, log=args.rocm_log)
# XXX use hipcc to link
if args.pass_exit_codes:
gpu_compiler_flags = [flag for flag in sys.argv[1:]
if not flag.startswith(('-pass-exit-codes'))]
# special handling for $ORIGIN
# - guard every argument with ''
modified_gpu_compiler_flags = []
for flag in gpu_compiler_flags:
modified_gpu_compiler_flags.append("'" + flag + "'")
if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
# Strip our flags before passing through to the CPU compiler for files which
# are not -x rocm. We can't just pass 'leftover' because it also strips -x.
# We not only want to pass -x to the CPU compiler, but also keep it in its
# relative location in the argv list (the compiler is actually sensitive to
# this).
cpu_compiler_flags = [flag for flag in sys.argv[1:]
if not flag.startswith(('--rocm_log'))]
# XXX: SE codes need to be built with gcc, but need this macro defined
cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
if __name__ == '__main__':
sys.exit(main())

0
third_party/gpus/rocm/BUILD vendored Normal file
View File

99
third_party/gpus/rocm/BUILD.tpl vendored Normal file
View File

@ -0,0 +1,99 @@
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
package(default_visibility = ["//visibility:public"])
config_setting(
name = "using_hipcc",
values = {
"define": "using_rocm_hipcc=true",
},
)
cc_library(
name = "rocm_headers",
hdrs = [
"rocm/rocm_config.h",
%{rocm_headers}
],
includes = [
".",
"rocm/include",
],
visibility = ["//visibility:public"],
)
cc_library(
name = "hip",
srcs = ["rocm/lib/%{hip_lib}"],
data = ["rocm/lib/%{hip_lib}"],
includes = [
".",
"rocm/include",
],
linkstatic = 1,
visibility = ["//visibility:public"],
)
cc_library(
name = "rocblas",
srcs = ["rocm/lib/%{rocblas_lib}"],
data = ["rocm/lib/%{rocblas_lib}"],
includes = [
".",
"rocm/include",
],
linkstatic = 1,
visibility = ["//visibility:public"],
)
cc_library(
name = "rocfft",
srcs = ["rocm/lib/%{rocfft_lib}"],
data = ["rocm/lib/%{rocfft_lib}"],
includes = [
".",
"rocm/include",
],
linkstatic = 1,
visibility = ["//visibility:public"],
)
cc_library(
name = "hiprand",
srcs = ["rocm/lib/%{hiprand_lib}"],
data = ["rocm/lib/%{hiprand_lib}"],
includes = [
".",
"rocm/include",
"rocm/include/rocrand",
],
linkstatic = 1,
visibility = ["//visibility:public"],
)
cc_library(
name = "miopen",
srcs = ["rocm/lib/%{miopen_lib}"],
data = ["rocm/lib/%{miopen_lib}"],
includes = [
".",
"rocm/include",
],
linkstatic = 1,
visibility = ["//visibility:public"],
)
cc_library(
name = "rocm",
visibility = ["//visibility:public"],
deps = [
":rocm_headers",
":hip",
":rocblas",
":rocfft",
":hiprand",
":miopen",
],
)
%{rocm_include_genrules}

View File

@ -0,0 +1,45 @@
# Macros for building ROCm code.
def if_rocm(if_true, if_false = []):
"""Shorthand for select()'ing on whether we're building with ROCm.
Returns a select statement which evaluates to if_true if we're building
with ROCm enabled. Otherwise, the select statement evaluates to if_false.
"""
return select({
"@local_config_rocm//rocm:using_hipcc": if_true,
"//conditions:default": if_false
})
def rocm_default_copts():
"""Default options for all ROCm compilations."""
return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
def rocm_copts(opts = []):
"""Gets the appropriate set of copts for (maybe) ROCm compilation.
If we're doing ROCm compilation, returns copts for our particular ROCm
compiler. If we're not doing ROCm compilation, returns an empty list.
"""
return rocm_default_copts() + select({
"//conditions:default": [],
"@local_config_rocm//rocm:using_hipcc": ([
"",
]),
}) + if_rocm_is_configured(opts)
def rocm_is_configured():
"""Returns true if ROCm was enabled during the configure process."""
return %{rocm_is_configured}
def if_rocm_is_configured(x):
"""Tests if the ROCm was enabled during the configure process.
Unlike if_rocm(), this does not require that we are building with
--config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
"""
if rocm_is_configured():
return x
return []

21
third_party/gpus/rocm/rocm_config.h.tpl vendored Normal file
View File

@ -0,0 +1,21 @@
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef ROCM_ROCM_CONFIG_H_
#define ROCM_ROCM_CONFIG_H_
#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
#endif // ROCM_ROCM_CONFIG_H_

784
third_party/gpus/rocm_configure.bzl vendored Normal file
View File

@ -0,0 +1,784 @@
# -*- Python -*-
"""Repository rule for ROCm autoconfiguration.
`rocm_configure` depends on the following environment variables:
* `TF_NEED_ROCM`: Whether to enable building with ROCm.
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
* `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
`/opt/rocm`.
* `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
use the system default.
* `TF_MIOPEN_VERSION`: The version of the MIOpen library.
* `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
`gfx803,gfx900`.
"""
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
_TF_ROCM_VERSION = "TF_ROCM_VERSION"
_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
_DEFAULT_ROCM_VERSION = ""
_DEFAULT_MIOPEN_VERSION = ""
_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
def find_cc(repository_ctx):
"""Find the C++ compiler."""
# Return a dummy value for GCC detection here to avoid error
target_cc_name = "gcc"
cc_path_envvar = _GCC_HOST_COMPILER_PATH
cc_name = target_cc_name
if cc_path_envvar in repository_ctx.os.environ:
cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
if cc_name_from_env:
cc_name = cc_name_from_env
if cc_name.startswith("/"):
# Absolute path, maybe we should make this supported by our which function.
return cc_name
cc = repository_ctx.which(cc_name)
if cc == None:
fail(("Cannot find {}, either correct your path or set the {}" +
" environment variable").format(target_cc_name, cc_path_envvar))
return cc
_INC_DIR_MARKER_BEGIN = "#include <...>"
def _cxx_inc_convert(path):
"""Convert path returned by cc -E xc++ in a complete path."""
path = path.strip()
return path
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"
# TODO: We pass -no-canonical-prefixes here to match the compiler flags,
# but in rocm_clang CROSSTOOL file that is a `feature` and we should
# handle the case when it's disabled and no flag is passed
result = repository_ctx.execute([
cc,
"-no-canonical-prefixes",
"-E",
"-x" + lang,
"-",
"-v",
])
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
return []
index1 = result.stderr.find("\n", index1)
if index1 == -1:
return []
index2 = result.stderr.rfind("\n ")
if index2 == -1 or index2 < index1:
return []
index2 = result.stderr.find("\n", index2 + 1)
if index2 == -1:
inc_dirs = result.stderr[index1 + 1:]
else:
inc_dirs = result.stderr[index1 + 1:index2].strip()
return [
str(repository_ctx.path(_cxx_inc_convert(p)))
for p in inc_dirs.split("\n")
]
def get_cxx_inc_directories(repository_ctx, cc):
"""Compute the list of default C and C++ include directories."""
# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
includes_cpp_set = depset(includes_cpp)
return includes_cpp + [
inc
for inc in includes_c
if inc not in includes_cpp_set
]
def auto_configure_fail(msg):
"""Output failure message when rocm configuration fails."""
red = "\033[0;31m"
no_color = "\033[0m"
fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
# END cc_configure common functions (see TODO above).
def _host_compiler_includes(repository_ctx, cc):
"""Generates the cxx_builtin_include_directory entries for gcc inc dirs.
Args:
repository_ctx: The repository context.
cc: The path to the gcc host compiler.
Returns:
A string containing the cxx_builtin_include_directory for each of the gcc
host compiler include directories, which can be added to the CROSSTOOL
file.
"""
inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
# Add numpy headers
inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
entries = []
for inc_dir in inc_dirs:
entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
# define TENSORFLOW_USE_ROCM
entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
return "\n".join(entries)
def _rocm_include_path(repository_ctx, rocm_config):
"""Generates the cxx_builtin_include_directory entries for rocm inc dirs.
Args:
repository_ctx: The repository context.
cc: The path to the gcc host compiler.
Returns:
A string containing the cxx_builtin_include_directory for each of the gcc
host compiler include directories, which can be added to the CROSSTOOL
file.
"""
inc_dirs = []
# general ROCm include path
inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
# Add HSA headers
inc_dirs.append("/opt/rocm/hsa/include")
# Add HIP headers
inc_dirs.append("/opt/rocm/include/hip")
inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
# Add rocrand and hiprand headers
inc_dirs.append("/opt/rocm/rocrand/include")
inc_dirs.append("/opt/rocm/hiprand/include")
# Add rocfft headers
inc_dirs.append("/opt/rocm/rocfft/include")
# Add rocBLAS headers
inc_dirs.append("/opt/rocm/rocblas/include")
# Add MIOpen headers
inc_dirs.append("/opt/rocm/miopen/include")
# Add hcc headers
inc_dirs.append("/opt/rocm/hcc/include")
inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
# Newer hcc builds use/are based off of clang 8.0.0.
inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
inc_entries = []
for inc_dir in inc_dirs:
inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
return "\n".join(inc_entries)
def _enable_rocm(repository_ctx):
if "TF_NEED_ROCM" in repository_ctx.os.environ:
enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
return enable_rocm == "1"
return False
def _rocm_toolkit_path(repository_ctx):
"""Finds the rocm toolkit directory.
Args:
repository_ctx: The repository context.
Returns:
A speculative real path of the rocm toolkit install directory.
"""
rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
if not repository_ctx.path(rocm_toolkit_path).exists:
auto_configure_fail("Cannot find rocm toolkit path.")
return str(repository_ctx.path(rocm_toolkit_path).realpath)
def _amdgpu_targets(repository_ctx):
"""Returns a list of strings representing AMDGPU targets."""
if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
return _DEFAULT_ROCM_AMDGPU_TARGETS
amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
amdgpu_targets = amdgpu_targets_str.split(",")
for amdgpu_target in amdgpu_targets:
if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
return amdgpu_targets
def _cpu_value(repository_ctx):
"""Returns the name of the host operating system.
Args:
repository_ctx: The repository context.
Returns:
A string containing the name of the host operating system.
"""
os_name = repository_ctx.os.name.lower()
if os_name.startswith("mac os"):
return "Darwin"
if os_name.find("windows") != -1:
return "Windows"
result = repository_ctx.execute(["uname", "-s"])
return result.stdout.strip()
def _lib_name(lib, cpu_value, version = "", static = False):
"""Constructs the platform-specific name of a library.
Args:
lib: The name of the library, such as "hip"
cpu_value: The name of the host operating system.
version: The version of the library.
static: True the library is static or False if it is a shared object.
Returns:
The platform-specific name of the library.
"""
if cpu_value in ("Linux"):
if static:
return "lib%s.a" % lib
else:
if version:
version = ".%s" % version
return "lib%s.so%s" % (lib, version)
elif cpu_value == "Windows":
return "%s.lib" % lib
elif cpu_value == "Darwin":
if static:
return "lib%s.a" % lib
elif version:
version = ".%s" % version
return "lib%s%s.dylib" % (lib, version)
else:
auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
def _find_rocm_lib(
lib,
repository_ctx,
cpu_value,
basedir,
version = "",
static = False):
"""Finds the given ROCm libraries on the system.
Args:
lib: The name of the library, such as "hip"
repository_ctx: The repository context.
cpu_value: The name of the host operating system.
basedir: The install directory of ROCm.
version: The version of the library.
static: True if static library, False if shared object.
Returns:
Returns a struct with the following fields:
file_name: The basename of the library found on the system.
path: The full path to the library.
"""
file_name = _lib_name(lib, cpu_value, version, static)
if cpu_value == "Linux":
path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
if path.exists:
return struct(file_name = file_name, path = str(path.realpath))
path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
if path.exists:
return struct(file_name = file_name, path = str(path.realpath))
path = repository_ctx.path(
"%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name),
)
if path.exists:
return struct(file_name = file_name, path = str(path.realpath))
path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
if path.exists:
return struct(file_name = file_name, path = str(path.realpath))
path = repository_ctx.path("%s/%s" % (basedir, file_name))
if path.exists:
return struct(file_name = file_name, path = str(path.realpath))
auto_configure_fail("Cannot find rocm library %s" % file_name)
def _find_libs(repository_ctx, rocm_config):
"""Returns the ROCm libraries on the system.
Args:
repository_ctx: The repository context.
rocm_config: The ROCm config as returned by _get_rocm_config
Returns:
Map of library names to structs of filename and path as returned by
_find_rocm_lib.
"""
cpu_value = rocm_config.cpu_value
return {
"hip": _find_rocm_lib(
"hip_hcc",
repository_ctx,
cpu_value,
rocm_config.rocm_toolkit_path,
),
"rocblas": _find_rocm_lib(
"rocblas",
repository_ctx,
cpu_value,
rocm_config.rocm_toolkit_path + "/rocblas",
),
"rocfft": _find_rocm_lib(
"rocfft",
repository_ctx,
cpu_value,
rocm_config.rocm_toolkit_path + "/rocfft",
),
"hiprand": _find_rocm_lib(
"hiprand",
repository_ctx,
cpu_value,
rocm_config.rocm_toolkit_path + "/hiprand",
),
"miopen": _find_rocm_lib(
"MIOpen",
repository_ctx,
cpu_value,
rocm_config.rocm_toolkit_path + "/miopen",
),
}
def _get_rocm_config(repository_ctx):
"""Detects and returns information about the ROCm installation on the system.
Args:
repository_ctx: The repository context.
Returns:
A struct containing the following fields:
rocm_toolkit_path: The ROCm toolkit installation directory.
amdgpu_targets: A list of the system's AMDGPU targets.
cpu_value: The name of the host operating system.
"""
cpu_value = _cpu_value(repository_ctx)
rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
return struct(
rocm_toolkit_path = rocm_toolkit_path,
amdgpu_targets = _amdgpu_targets(repository_ctx),
cpu_value = cpu_value,
)
def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
if not out:
out = tpl.replace(":", "/")
repository_ctx.template(
out,
Label("//third_party/gpus/%s.tpl" % tpl),
substitutions,
)
def _file(repository_ctx, label):
repository_ctx.template(
label.replace(":", "/"),
Label("//third_party/gpus/%s.tpl" % label),
{},
)
_DUMMY_CROSSTOOL_BZL_FILE = """
def error_gpu_disabled():
fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
"to build with GPU support. Please re-run ./configure and enter 'Y' " +
"at the prompt to build with GPU support.")
native.genrule(
name = "error_gen_crosstool",
outs = ["CROSSTOOL"],
cmd = "echo 'Should not be run.' && exit 1",
)
native.filegroup(
name = "crosstool",
srcs = [":CROSSTOOL"],
output_licenses = ["unencumbered"],
)
"""
_DUMMY_CROSSTOOL_BUILD_FILE = """
load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
error_gpu_disabled()
"""
def _create_dummy_repository(repository_ctx):
cpu_value = _cpu_value(repository_ctx)
# Set up BUILD file for rocm/.
_tpl(
repository_ctx,
"rocm:build_defs.bzl",
{
"%{rocm_is_configured}": "False",
"%{rocm_extra_copts}": "[]",
},
)
_tpl(
repository_ctx,
"rocm:BUILD",
{
"%{hip_lib}": _lib_name("hip", cpu_value),
"%{rocblas_lib}": _lib_name("rocblas", cpu_value),
"%{miopen_lib}": _lib_name("miopen", cpu_value),
"%{rocfft_lib}": _lib_name("rocfft", cpu_value),
"%{hiprand_lib}": _lib_name("hiprand", cpu_value),
"%{rocm_include_genrules}": "",
"%{rocm_headers}": "",
},
)
# Create dummy files for the ROCm toolkit since they are still required by
# tensorflow/core/platform/default/build_config:rocm.
repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
# Set up rocm_config.h, which is used by
# tensorflow/stream_executor/dso_loader.cc.
_tpl(
repository_ctx,
"rocm:rocm_config.h",
{
"%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
},
"rocm/rocm/rocm_config.h",
)
# If rocm_configure is not configured to build with GPU support, and the user
# attempts to build with --config=rocm, add a dummy build rule to intercept
# this and fail with an actionable error message.
repository_ctx.file(
"crosstool/error_gpu_disabled.bzl",
_DUMMY_CROSSTOOL_BZL_FILE,
)
repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
def _execute(
repository_ctx,
cmdline,
error_msg = None,
error_details = None,
empty_stdout_fine = False):
"""Executes an arbitrary shell command.
Args:
repository_ctx: the repository_ctx object
cmdline: list of strings, the command to execute
error_msg: string, a summary of the error if the command fails
error_details: string, details about the error or steps to fix it
empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
it's an error
Return:
the result of repository_ctx.execute(cmdline)
"""
result = repository_ctx.execute(cmdline)
if result.stderr or not (empty_stdout_fine or result.stdout):
auto_configure_fail(
"\n".join([
error_msg.strip() if error_msg else "Repository command failed",
result.stderr.strip(),
error_details if error_details else "",
]),
)
return result
def _norm_path(path):
"""Returns a path with '/' and remove the trailing slash."""
path = path.replace("\\", "/")
if path[-1] == "/":
path = path[:-1]
return path
def _symlink_genrule_for_dir(
repository_ctx,
src_dir,
dest_dir,
genrule_name,
src_files = [],
dest_files = []):
"""Returns a genrule to symlink(or copy if on Windows) a set of files.
If src_dir is passed, files will be read from the given directory; otherwise
we assume files are in src_files and dest_files
"""
if src_dir != None:
src_dir = _norm_path(src_dir)
dest_dir = _norm_path(dest_dir)
files = _read_dir(repository_ctx, src_dir)
# Create a list with the src_dir stripped to use for outputs.
dest_files = files.replace(src_dir, "").splitlines()
src_files = files.splitlines()
command = []
# We clear folders that might have been generated previously to avoid
# undesired inclusions
command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
outs = []
for i in range(len(dest_files)):
if dest_files[i] != "":
# If we have only one file to link we do not want to use the dest_dir, as
# $(@D) will include the full path to the file.
dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
# On Windows, symlink is not supported, so we just copy all the files.
cmd = "ln -s"
command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
outs.append(' "' + dest_dir + dest_files[i] + '",')
genrule = _genrule(
src_dir,
genrule_name,
" && ".join(command),
"\n".join(outs),
)
return genrule
def _genrule(src_dir, genrule_name, command, outs):
"""Returns a string with a genrule.
Genrule executes the given command and produces the given outputs.
"""
return (
"genrule(\n" +
' name = "' +
genrule_name + '",\n' +
" outs = [\n" +
outs +
"\n ],\n" +
' cmd = """\n' +
command +
'\n """,\n' +
")\n"
)
def _read_dir(repository_ctx, src_dir):
"""Returns a string with all files in a directory.
Finds all files inside a directory, traversing subfolders and following
symlinks. The returned string contains the full path of all files
separated by line breaks.
"""
find_result = _execute(
repository_ctx,
["find", src_dir, "-follow", "-type", "f"],
empty_stdout_fine = True,
)
result = find_result.stdout
return result
def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
if False:
amdgpu_target_flags = ["--amdgpu-target=" +
amdgpu_target for amdgpu_target in amdgpu_targets]
else:
# AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
amdgpu_target_flags = []
return str(amdgpu_target_flags)
def _create_local_rocm_repository(repository_ctx):
"""Creates the repository containing files set up to build with ROCm."""
rocm_config = _get_rocm_config(repository_ctx)
# Set up symbolic links for the rocm toolkit by creating genrules to do
# symlinking. We create one genrule for each directory we want to track under
# rocm_toolkit_path
rocm_toolkit_path = rocm_config.rocm_toolkit_path
rocm_include_path = rocm_toolkit_path + "/include"
genrules = [_symlink_genrule_for_dir(
repository_ctx,
rocm_include_path,
"rocm/include",
"rocm-include",
)]
genrules.append(_symlink_genrule_for_dir(
repository_ctx,
rocm_toolkit_path + "/rocfft/include",
"rocm/include/rocfft",
"rocfft-include",
))
genrules.append(_symlink_genrule_for_dir(
repository_ctx,
rocm_toolkit_path + "/rocblas/include",
"rocm/include/rocblas",
"rocblas-include",
))
genrules.append(_symlink_genrule_for_dir(
repository_ctx,
rocm_toolkit_path + "/miopen/include",
"rocm/include/miopen",
"miopen-include",
))
rocm_libs = _find_libs(repository_ctx, rocm_config)
rocm_lib_src = []
rocm_lib_dest = []
for lib in rocm_libs.values():
rocm_lib_src.append(lib.path)
rocm_lib_dest.append("rocm/lib/" + lib.file_name)
genrules.append(_symlink_genrule_for_dir(
repository_ctx,
None,
"",
"rocm-lib",
rocm_lib_src,
rocm_lib_dest,
))
included_files = _read_dir(repository_ctx, rocm_include_path).replace(
rocm_include_path,
"",
).splitlines()
# Set up BUILD file for rocm/
_tpl(
repository_ctx,
"rocm:build_defs.bzl",
{
"%{rocm_is_configured}": "True",
"%{rocm_extra_copts}": _compute_rocm_extra_copts(
repository_ctx,
rocm_config.amdgpu_targets,
),
},
)
_tpl(
repository_ctx,
"rocm:BUILD",
{
"%{hip_lib}": rocm_libs["hip"].file_name,
"%{rocblas_lib}": rocm_libs["rocblas"].file_name,
"%{rocfft_lib}": rocm_libs["rocfft"].file_name,
"%{hiprand_lib}": rocm_libs["hiprand"].file_name,
"%{miopen_lib}": rocm_libs["miopen"].file_name,
"%{rocm_include_genrules}": "\n".join(genrules),
"%{rocm_headers}": ('":rocm-include",\n' +
'":rocfft-include",\n' +
'":rocblas-include",\n' +
'":miopen-include",'),
},
)
# Set up crosstool/
_tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
cc = find_cc(repository_ctx)
host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
rocm_defines = {
"%{rocm_include_path}": _rocm_include_path(
repository_ctx,
rocm_config,
),
"%{host_compiler_includes}": host_compiler_includes,
"%{clang_path}": str(cc),
}
_tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out = "crosstool/CROSSTOOL")
_tpl(
repository_ctx,
"crosstool:clang/bin/crosstool_wrapper_driver_rocm",
{
"%{cpu_compiler}": str(cc),
"%{hipcc_path}": "/opt/rocm/bin/hipcc",
"%{gcc_host_compiler_path}": str(cc),
"%{rocm_amdgpu_targets}": ",".join(
["\"%s\"" % c for c in rocm_config.amdgpu_targets],
),
},
)
# Set up rocm_config.h, which is used by
# tensorflow/stream_executor/dso_loader.cc.
_tpl(
repository_ctx,
"rocm:rocm_config.h",
{
"%{rocm_amdgpu_targets}": ",".join(
["\"%s\"" % c for c in rocm_config.amdgpu_targets],
),
"%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
},
"rocm/rocm/rocm_config.h",
)
def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
"""Creates pointers to a remotely configured repo set up to build with ROCm."""
_tpl(
repository_ctx,
"rocm:build_defs.bzl",
{
"%{rocm_is_configured}": "True",
"%{rocm_extra_copts}": _compute_rocm_extra_copts(
repository_ctx, #_compute_capabilities(repository_ctx)
),
},
)
_tpl(
repository_ctx,
"rocm:remote.BUILD",
{
"%{remote_rocm_repo}": remote_config_repo,
},
"rocm/BUILD",
)
_tpl(repository_ctx, "crosstool:remote.BUILD", {
"%{remote_rocm_repo}": remote_config_repo,
}, "crosstool/BUILD")
def _rocm_autoconf_impl(repository_ctx):
"""Implementation of the rocm_autoconf repository rule."""
if not _enable_rocm(repository_ctx):
_create_dummy_repository(repository_ctx)
elif _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
_create_remote_rocm_repository(
repository_ctx,
repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO],
)
else:
_create_local_rocm_repository(repository_ctx)
rocm_configure = repository_rule(
implementation = _rocm_autoconf_impl,
environ = [
_GCC_HOST_COMPILER_PATH,
"TF_NEED_ROCM",
_ROCM_TOOLKIT_PATH,
_TF_ROCM_VERSION,
_TF_MIOPEN_VERSION,
_TF_ROCM_AMDGPU_TARGETS,
_TF_ROCM_CONFIG_REPO,
],
)
"""Detects and configures the local ROCm toolchain.
Add the following to your WORKSPACE FILE:
```python
rocm_configure(name = "local_config_rocm")
```
Args:
name: A unique name for this workspace rule.
"""

View File

@ -43,6 +43,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true