Merge pull request #20277 from ROCmSoftwarePlatform:upstream-staging
PiperOrigin-RevId: 214793113
This commit is contained in:
commit
62e60166de
20
configure.py
20
configure.py
@ -1540,6 +1540,13 @@ def main():
|
||||
else:
|
||||
set_trisycl_include_dir(environ_cp)
|
||||
|
||||
set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
|
||||
if (environ_cp.get('TF_NEED_ROCM') == '1' and
|
||||
'LD_LIBRARY_PATH' in environ_cp and
|
||||
environ_cp.get('LD_LIBRARY_PATH') != '1'):
|
||||
write_action_env_to_bazelrc('LD_LIBRARY_PATH',
|
||||
environ_cp.get('LD_LIBRARY_PATH'))
|
||||
|
||||
set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
|
||||
if (environ_cp.get('TF_NEED_CUDA') == '1' and
|
||||
'TF_CUDA_CONFIG_REPO' not in environ_cp):
|
||||
@ -1580,6 +1587,19 @@ def main():
|
||||
write_to_bazelrc('build --config=download_clang')
|
||||
write_to_bazelrc('test --config=download_clang')
|
||||
|
||||
# SYCL / ROCm / CUDA are mutually exclusive.
|
||||
# At most 1 GPU platform can be configured.
|
||||
gpu_platform_count = 0
|
||||
if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
|
||||
gpu_platform_count += 1
|
||||
if environ_cp.get('TF_NEED_ROCM') == '1':
|
||||
gpu_platform_count += 1
|
||||
if environ_cp.get('TF_NEED_CUDA') == '1':
|
||||
gpu_platform_count += 1
|
||||
if gpu_platform_count >= 2:
|
||||
raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
|
||||
'At most 1 GPU platform can be configured.')
|
||||
|
||||
set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
|
||||
if environ_cp.get('TF_NEED_MPI') == '1':
|
||||
set_mpi_home(environ_cp)
|
||||
|
@ -149,6 +149,7 @@ load(
|
||||
"tf_cuda_tests_tags",
|
||||
)
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
|
||||
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library")
|
||||
load(
|
||||
"//third_party/mkl:build_defs.bzl",
|
||||
@ -3006,7 +3007,7 @@ tf_cuda_library(
|
||||
"platform/device_tracer.h",
|
||||
],
|
||||
copts = tf_copts(),
|
||||
cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(),
|
||||
cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()),
|
||||
visibility = ["//visibility:private"],
|
||||
deps = [
|
||||
":core_cpu_internal",
|
||||
|
@ -19,6 +19,14 @@ load(
|
||||
"@local_config_cuda//cuda:build_defs.bzl",
|
||||
"cuda_default_copts",
|
||||
"if_cuda",
|
||||
"if_cuda_is_configured",
|
||||
)
|
||||
load(
|
||||
"@local_config_rocm//rocm:build_defs.bzl",
|
||||
"if_rocm",
|
||||
"if_rocm_is_configured",
|
||||
"rocm_copts",
|
||||
"rocm_default_copts",
|
||||
)
|
||||
load(
|
||||
"//third_party/mkl:build_defs.bzl",
|
||||
@ -39,6 +47,8 @@ load(
|
||||
def register_extension_info(**kwargs):
|
||||
pass
|
||||
|
||||
# if_cuda_is_configured def placeholder
|
||||
|
||||
# Given a source file, generate a test name.
|
||||
# i.e. "common_runtime/direct_session_test.cc" becomes
|
||||
# "common_runtime_direct_session_test"
|
||||
@ -863,12 +873,16 @@ def tf_cuda_only_cc_test(
|
||||
srcs = srcs + tf_binary_additional_srcs(),
|
||||
size = size,
|
||||
args = args,
|
||||
copts = _cuda_copts() + tf_copts(),
|
||||
copts = _cuda_copts() + rocm_copts() + tf_copts(),
|
||||
data = data + tf_binary_dynamic_kernel_dsos(kernels),
|
||||
deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([
|
||||
clean_dep("//tensorflow/core:cuda"),
|
||||
clean_dep("//tensorflow/core:gpu_lib"),
|
||||
]),
|
||||
deps = deps + tf_binary_dynamic_kernel_deps(kernels) +
|
||||
if_cuda_is_configured([
|
||||
clean_dep("//tensorflow/core:cuda"),
|
||||
clean_dep("//tensorflow/core:gpu_lib"),
|
||||
]) +
|
||||
if_rocm_is_configured([
|
||||
clean_dep("//tensorflow/core:gpu_lib"),
|
||||
]),
|
||||
linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name),
|
||||
linkstatic = linkstatic or select({
|
||||
# cc_tests with ".so"s in srcs incorrectly link on Darwin
|
||||
@ -1003,7 +1017,7 @@ register_extension_info(
|
||||
label_regex_for_dep = "{extension_name}",
|
||||
)
|
||||
|
||||
def _cuda_copts():
|
||||
def _cuda_copts(opts = []):
|
||||
"""Gets the appropriate set of copts for (maybe) CUDA compilation.
|
||||
|
||||
If we're doing CUDA compilation, returns copts for our particular CUDA
|
||||
@ -1019,13 +1033,17 @@ def _cuda_copts():
|
||||
"@local_config_cuda//cuda:using_clang": ([
|
||||
"-fcuda-flush-denormals-to-zero",
|
||||
]),
|
||||
})
|
||||
}) + if_cuda_is_configured(opts)
|
||||
|
||||
# Build defs for TensorFlow kernels
|
||||
|
||||
# When this target is built using --config=cuda, a cc_library is built
|
||||
# that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional
|
||||
# libraries needed by GPU kernels.
|
||||
#
|
||||
# When this target is built using --config=rocm, a cc_library is built
|
||||
# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional
|
||||
# libraries needed by GPU kernels.
|
||||
def tf_gpu_kernel_library(
|
||||
srcs,
|
||||
copts = [],
|
||||
@ -1033,16 +1051,18 @@ def tf_gpu_kernel_library(
|
||||
deps = [],
|
||||
hdrs = [],
|
||||
**kwargs):
|
||||
copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts()
|
||||
copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + rocm_copts(opts = cuda_copts)
|
||||
kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"]
|
||||
|
||||
native.cc_library(
|
||||
srcs = srcs,
|
||||
hdrs = hdrs,
|
||||
copts = copts,
|
||||
deps = deps + if_cuda([
|
||||
deps = deps + if_cuda_is_configured([
|
||||
clean_dep("//tensorflow/core:cuda"),
|
||||
clean_dep("//tensorflow/core:gpu_lib"),
|
||||
]) + if_rocm_is_configured([
|
||||
clean_dep("//tensorflow/core:gpu_lib"),
|
||||
]),
|
||||
alwayslink = 1,
|
||||
**kwargs
|
||||
@ -1081,8 +1101,10 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs)
|
||||
deps = deps + if_cuda(cuda_deps + [
|
||||
clean_dep("//tensorflow/core:cuda"),
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
]) + if_rocm_is_configured(cuda_deps + [
|
||||
# rocm_header placeholder
|
||||
]),
|
||||
copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
|
||||
copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) +
|
||||
if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) +
|
||||
if_enable_mkl(["-DENABLE_MKL"]) +
|
||||
if_tensorrt(["-DGOOGLE_TENSORRT=1"])),
|
||||
@ -1465,6 +1487,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"@local_config_cuda//cuda:cudart_static",
|
||||
]
|
||||
rocm_deps = [
|
||||
clean_dep("//tensorflow/core:stream_executor_headers_lib"),
|
||||
]
|
||||
deps = deps + tf_custom_op_library_additional_deps()
|
||||
if gpu_srcs:
|
||||
basename = name.split(".")[0]
|
||||
@ -1473,13 +1498,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
|
||||
srcs = gpu_srcs,
|
||||
copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]),
|
||||
features = if_cuda(["-use_header_modules"]),
|
||||
deps = deps + if_cuda(cuda_deps),
|
||||
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
|
||||
)
|
||||
cuda_deps.extend([":" + basename + "_gpu"])
|
||||
rocm_deps.extend([":" + basename + "_gpu"])
|
||||
|
||||
check_deps(
|
||||
name = name + "_check_deps",
|
||||
deps = deps + if_cuda(cuda_deps),
|
||||
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
|
||||
disallowed_deps = [
|
||||
clean_dep("//tensorflow/core:framework"),
|
||||
clean_dep("//tensorflow/core:lib"),
|
||||
@ -1488,7 +1514,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [
|
||||
tf_cc_shared_object(
|
||||
name = name,
|
||||
srcs = srcs,
|
||||
deps = deps + if_cuda(cuda_deps),
|
||||
deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps),
|
||||
data = if_static([name + "_check_deps"]),
|
||||
copts = tf_copts(is_external = True),
|
||||
features = ["windows_export_all_symbols"],
|
||||
|
97
tensorflow/tools/ci_build/Dockerfile.rocm
Normal file
97
tensorflow/tools/ci_build/Dockerfile.rocm
Normal file
@ -0,0 +1,97 @@
|
||||
# This Dockerfile provides a starting point for a ROCm installation of
|
||||
# MIOpen and tensorflow.
|
||||
FROM ubuntu:xenial
|
||||
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
|
||||
|
||||
ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/
|
||||
ARG ROCM_PATH=/opt/rocm
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV TF_NEED_ROCM 1
|
||||
ENV HOME /root/
|
||||
RUN apt update && apt install -y wget software-properties-common
|
||||
|
||||
# Add rocm repository
|
||||
RUN apt-get clean all
|
||||
RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
|
||||
RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
|
||||
|
||||
# Install misc pkgs
|
||||
RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
build-essential \
|
||||
clang-3.8 \
|
||||
clang-format-3.8 \
|
||||
clang-tidy-3.8 \
|
||||
cmake \
|
||||
cmake-qt-gui \
|
||||
ssh \
|
||||
curl \
|
||||
apt-utils \
|
||||
pkg-config \
|
||||
g++-multilib \
|
||||
git \
|
||||
libunwind-dev \
|
||||
libfftw3-dev \
|
||||
libelf-dev \
|
||||
libncurses5-dev \
|
||||
libpthread-stubs0-dev \
|
||||
vim \
|
||||
gfortran \
|
||||
libboost-program-options-dev \
|
||||
libssl-dev \
|
||||
libboost-dev \
|
||||
libboost-system-dev \
|
||||
libboost-filesystem-dev \
|
||||
rpm \
|
||||
libnuma-dev \
|
||||
virtualenv \
|
||||
python-pip \
|
||||
python3-pip \
|
||||
wget && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install rocm pkgs
|
||||
RUN apt-get update --allow-insecure-repositories && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
|
||||
rocm-dev rocm-libs rocm-utils \
|
||||
rocfft miopen-hip miopengemm rocblas hipblas rocrand \
|
||||
rocm-profiler cxlactivitylogger && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git
|
||||
RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb
|
||||
|
||||
ENV HCC_HOME=$ROCM_PATH/hcc
|
||||
ENV HIP_PATH=$ROCM_PATH/hip
|
||||
ENV OPENCL_ROOT=$ROCM_PATH/opencl
|
||||
ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
|
||||
ENV PATH="$ROCM_PATH/bin:${PATH}"
|
||||
ENV PATH="$OPENCL_ROOT/bin:${PATH}"
|
||||
|
||||
# Add target file to help determine which device(s) to build for
|
||||
RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst
|
||||
|
||||
# Setup environment variables, and add those environment variables at the end of ~/.bashrc
|
||||
ARG HCC_HOME=/opt/rocm/hcc
|
||||
ARG HIP_PATH=/opt/rocm/hip
|
||||
ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH
|
||||
|
||||
# Copy and run the install scripts.
|
||||
COPY install/*.sh /install/
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN /install/install_bootstrap_deb_packages.sh
|
||||
RUN add-apt-repository -y ppa:openjdk-r/ppa && \
|
||||
add-apt-repository -y ppa:george-edison55/cmake-3.x
|
||||
RUN /install/install_deb_packages.sh
|
||||
RUN /install/install_pip_packages.sh
|
||||
RUN /install/install_bazel.sh
|
||||
RUN /install/install_golang.sh
|
||||
|
||||
# Set up the master bazelrc configuration file.
|
||||
COPY install/.bazelrc /etc/bazel.bazelrc
|
||||
|
||||
# Configure the build for our CUDA configuration.
|
||||
ENV TF_NEED_ROCM 1
|
||||
|
@ -19,7 +19,7 @@
|
||||
#
|
||||
# Usage: docker_test.sh <IMAGE_TYPE> <TAG> <WHL_PATH>
|
||||
# Arguments:
|
||||
# IMAGE_TYPE : Type of the image: (CPU|GPU)
|
||||
# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM)
|
||||
# TAG : Docker image tag
|
||||
# WHL_PATH : Path to the whl file to be installed inside the docker image
|
||||
#
|
||||
@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then
|
||||
DOCKERFILE="tensorflow/tools/docker/Dockerfile"
|
||||
elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then
|
||||
DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu"
|
||||
elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then
|
||||
DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm"
|
||||
else
|
||||
die "Unrecognized image type: $1"
|
||||
fi
|
||||
@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then
|
||||
devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
|
||||
libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}')
|
||||
GPU_EXTRA_PARAMS="${devices} ${libs}"
|
||||
elif [ "${IMAGE_TYPE}" == "rocm" ]; then
|
||||
ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
|
||||
else
|
||||
GPU_EXTRA_PARAMS=""
|
||||
ROCM_EXTRA_PARAMS=""
|
||||
fi
|
||||
|
||||
# Run docker image with source directory mapped
|
||||
docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \
|
||||
${GPU_EXTRA_PARAMS} \
|
||||
${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \
|
||||
"${DOCKER_IMG_TAG}" \
|
||||
/bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\
|
||||
"tensorflow/tools/ci_build/builds/test_tutorials.sh && "\
|
||||
|
@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}"
|
||||
PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package"
|
||||
GPU_FLAG=""
|
||||
if [[ ${CONTAINER_TYPE} == "cpu" ]] || \
|
||||
[[ ${CONTAINER_TYPE} == "rocm" ]] || \
|
||||
[[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then
|
||||
bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \
|
||||
die "Build failed."
|
||||
@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then
|
||||
die "ERROR: Cannot find repaired wheel."
|
||||
fi
|
||||
# Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so
|
||||
elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then
|
||||
elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \
|
||||
[[ ${CONTAINER_TYPE} == "rocm" ]]; then
|
||||
WHL_PATH=${AUDITED_WHL_NAME}
|
||||
cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH}
|
||||
echo "Copied manylinx1 wheel file at ${WHL_PATH}"
|
||||
|
@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
|
||||
usermod -a -G sudo "${CI_BUILD_USER}"
|
||||
echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
|
||||
|
||||
if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
|
||||
# ROCm requires the video group in order to use the GPU for compute. If it
|
||||
# exists on the host, add it to the container.
|
||||
getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
|
||||
fi
|
||||
|
||||
if [ -e /root/.bazelrc ]; then
|
||||
cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
|
||||
chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
|
||||
|
@ -18,7 +18,7 @@
|
||||
# <COMMAND>
|
||||
#
|
||||
# CONTAINER_TYPE: Type of the docker container used the run the build:
|
||||
# e.g., (cpu | gpu | android | tensorboard)
|
||||
# e.g., (cpu | gpu | rocm | android | tensorboard)
|
||||
#
|
||||
# DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build.
|
||||
# If this optional value is not supplied (via the
|
||||
@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then
|
||||
GPU_EXTRA_PARAMS=""
|
||||
fi
|
||||
|
||||
# Add extra params for rocm devices and libraries for ROCm container.
|
||||
if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then
|
||||
ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video"
|
||||
else
|
||||
ROCM_EXTRA_PARAMS=""
|
||||
fi
|
||||
|
||||
|
||||
# Determine the docker image name
|
||||
DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
|
||||
|
||||
@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \
|
||||
-v ${WORKSPACE}:/workspace \
|
||||
-w /workspace \
|
||||
${GPU_EXTRA_PARAMS} \
|
||||
${ROCM_EXTRA_PARAMS} \
|
||||
${CI_DOCKER_EXTRA_PARAMS[@]} \
|
||||
"${DOCKER_IMG_NAME}" \
|
||||
${CI_COMMAND_PREFIX[@]} \
|
||||
|
@ -26,6 +26,7 @@ echo ""
|
||||
|
||||
# Run configure.
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
# Only running cc tests, python version does not matter.
|
||||
export PYTHON_BIN_PATH=`which python`
|
||||
|
@ -26,6 +26,7 @@ echo ""
|
||||
|
||||
# Run configure.
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
export PYTHON_BIN_PATH=`which python2`
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
@ -26,6 +26,7 @@ echo ""
|
||||
|
||||
# Run configure.
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
@ -26,6 +26,7 @@ echo ""
|
||||
|
||||
# Run configure.
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-"
|
||||
if [ "${TF_NEED_CUDA}" == "1" ]; then
|
||||
SUFFIX="-gpu-linux-"
|
||||
fi
|
||||
if [ "${TF_NEED_ROCM}" == "1" ]; then
|
||||
SUFFIX="-rocm-linux-"
|
||||
fi
|
||||
|
||||
build_libtensorflow_tarball "${SUFFIX}$(uname -m)"
|
||||
|
@ -19,4 +19,5 @@
|
||||
set -ex
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
"${SCRIPT_DIR}/libtensorflow_docker.sh"
|
||||
|
@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then
|
||||
DOCKER_BINARY="nvidia-docker"
|
||||
DOCKER_FILE="Dockerfile.gpu"
|
||||
fi
|
||||
if [ "${TF_NEED_ROCM}" == "1" ]; then
|
||||
DOCKER_IMAGE="tf-tensorflow-rocm"
|
||||
DOCKER_BINARY="docker"
|
||||
DOCKER_FILE="Dockerfile.rocm"
|
||||
fi
|
||||
|
||||
docker build \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \
|
||||
-e "TF_NEED_HDFS=0" \
|
||||
-e "TF_NEED_CUDA=${TF_NEED_CUDA}" \
|
||||
-e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
|
||||
-e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
|
||||
-e "TF_NEED_OPENCL_SYCL=0" \
|
||||
"${DOCKER_IMAGE}" \
|
||||
"/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
|
||||
|
22
tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
Executable file
22
tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
#
|
||||
# Script to build a binary releases of libtensorflow with GPU support.
|
||||
|
||||
set -ex
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
export TF_NEED_ROCM=1
|
||||
"${SCRIPT_DIR}/libtensorflow_docker.sh"
|
39
tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
Executable file
39
tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent job(s)."
|
||||
echo ""
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
||||
# Run bazel test command. Double test timeouts to avoid flakes.
|
||||
bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
|
||||
--test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
|
||||
--build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
|
||||
//tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
|
39
tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
Executable file
39
tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent job(s)."
|
||||
echo ""
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
||||
# Run bazel test command. Double test timeouts to avoid flakes.
|
||||
bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \
|
||||
--test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
|
||||
--build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \
|
||||
//tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
|
@ -27,6 +27,7 @@ echo ""
|
||||
|
||||
# Run configure.
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
export PYTHON_BIN_PATH=$(which python2)
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
|
||||
export PYTHON_BIN_PATH="/usr/bin/python"
|
||||
export TF_NEED_HDFS=0
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_ROCM=0
|
||||
export TF_NEED_OPENCL_SYCL=0
|
||||
export TF_NEED_MKL=0
|
||||
export COMPUTECPP_PATH="/usr/local"
|
||||
|
@ -27,6 +27,7 @@ export TF_NEED_CUDA=1
|
||||
export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}"
|
||||
export PYTHON_BIN_PATH="/usr/bin/python"
|
||||
export TF_NEED_HDFS=0
|
||||
export TF_NEED_ROCM=0
|
||||
export TF_NEED_OPENCL_SYCL=0
|
||||
export TF_NEED_MKL=0
|
||||
export COMPUTECPP_PATH="/usr/local"
|
||||
|
36
tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
Executable file
36
tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
Executable file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
#
|
||||
# Script to produce binary release of libtensorflow (C API, Java jars etc.).
|
||||
|
||||
set -ex
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
# See comments at the top of this file for details.
|
||||
source "${SCRIPT_DIR}/../builds/libtensorflow.sh"
|
||||
|
||||
# Configure script
|
||||
export TF_NEED_ROCM=1
|
||||
export PYTHON_BIN_PATH="/usr/bin/python"
|
||||
export TF_NEED_GCP=0
|
||||
export TF_NEED_HDFS=0
|
||||
export TF_NEED_CUDA=0
|
||||
export TF_NEED_OPENCL_SYCL=0
|
||||
export TF_NEED_MKL=0
|
||||
export COMPUTECPP_PATH="/usr/local"
|
||||
|
||||
export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
|
||||
build_libtensorflow_tarball "-gpu-darwin-$(uname -m)"
|
41
tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
Executable file
41
tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
Executable file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent job(s)."
|
||||
echo ""
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
|
||||
|
||||
bazel clean
|
||||
# Run bazel test command. Double test timeouts to avoid flakes.
|
||||
bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \
|
||||
--jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
|
||||
--build_tests_only --test_output=errors --local_test_jobs=1 \
|
||||
--config=xla -- \
|
||||
//tensorflow/compiler/...
|
@ -1,6 +1,7 @@
|
||||
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
|
||||
|
||||
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
||||
load("//third_party/gpus:rocm_configure.bzl", "rocm_configure")
|
||||
load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure")
|
||||
load("//third_party:nccl/nccl_configure.bzl", "nccl_configure")
|
||||
load("//third_party/mkl:build_defs.bzl", "mkl_repository")
|
||||
@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
sycl_configure(name = "local_config_sycl")
|
||||
syslibs_configure(name = "local_config_syslibs")
|
||||
python_configure(name = "local_config_python")
|
||||
rocm_configure(name = "local_config_rocm")
|
||||
|
||||
initialize_third_party()
|
||||
|
||||
|
158
third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
vendored
Normal file
158
third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
vendored
Normal file
@ -0,0 +1,158 @@
|
||||
major_version: "local"
|
||||
minor_version: ""
|
||||
default_target_cpu: "same_as_host"
|
||||
|
||||
default_toolchain {
|
||||
cpu: "k8"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "piii"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "arm"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "ppc"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
|
||||
toolchain {
|
||||
abi_version: "local"
|
||||
abi_libc_version: "local"
|
||||
builtin_sysroot: ""
|
||||
compiler: "compiler"
|
||||
host_system_name: "local"
|
||||
needsPic: true
|
||||
supports_gold_linker: false
|
||||
supports_incremental_linker: false
|
||||
supports_fission: false
|
||||
supports_interface_shared_objects: false
|
||||
supports_normalizing_ar: false
|
||||
supports_start_end_lib: false
|
||||
supports_thin_archives: false
|
||||
target_libc: "local"
|
||||
target_cpu: "local"
|
||||
target_system_name: "local"
|
||||
toolchain_identifier: "local_linux"
|
||||
|
||||
tool_path { name: "ar" path: "/usr/bin/ar" }
|
||||
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||
# As part of the TensorFlow release, we place some ROCm-related compilation
|
||||
# files in @local_config_rocm//crosstool/clang/bin, and this relative
|
||||
# path, combined with the rest of our Bazel configuration causes our
|
||||
# compilation to use those files.
|
||||
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" }
|
||||
# Use "-std=c++11" for hipcc. For consistency, force both the host compiler
|
||||
# and the device compiler to use "-std=c++11".
|
||||
cxx_flag: "-std=c++11"
|
||||
linker_flag: "-Wl,-no-as-needed"
|
||||
linker_flag: "-lstdc++"
|
||||
#linker_flag: "-B/usr/bin/"
|
||||
linker_flag: "-B/opt/rocm/hcc/compiler/bin"
|
||||
|
||||
%{host_compiler_includes}
|
||||
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||
|
||||
# C(++) compiles invoke the compiler (as that is the one knowing where
|
||||
# to find libraries), but we provide LD so other rules can invoke the linker.
|
||||
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||
|
||||
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||
objcopy_embed_flag: "-I"
|
||||
objcopy_embed_flag: "binary"
|
||||
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||
|
||||
# Anticipated future default.
|
||||
unfiltered_cxx_flag: "-no-canonical-prefixes"
|
||||
|
||||
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||
# compiler symbols.
|
||||
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__"
|
||||
# The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers
|
||||
# It needs to be always set when compiling Eigen headers
|
||||
# (irrespective of whether the source file is being compiled via HIPCC)
|
||||
# so adding -DEIGEN_USE_HIP as a default CXX flag here
|
||||
unfiltered_cxx_flag: "-DEIGEN_USE_HIP"
|
||||
|
||||
|
||||
# Security hardening on by default.
|
||||
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||
# We need to undef it before redefining it as some distributions now have
|
||||
# it enabled by default.
|
||||
#compiler_flag: "-U_FORTIFY_SOURCE"
|
||||
#compiler_flag: "-D_FORTIFY_SOURCE=1"
|
||||
#compiler_flag: "-fstack-protector"
|
||||
#compiler_flag: "-fPIE"
|
||||
#linker_flag: "-pie"
|
||||
#linker_flag: "-Wl,-z,relro,-z,now"
|
||||
|
||||
# Enable coloring even if there's no attached terminal. Bazel removes the
|
||||
# escape sequences if --nocolor is specified. This isn't supported by gcc
|
||||
# on Ubuntu 14.04.
|
||||
# compiler_flag: "-fcolor-diagnostics"
|
||||
|
||||
# All warnings are enabled. Maybe enable -Werror as well?
|
||||
compiler_flag: "-Wall"
|
||||
# Enable a few more warnings that aren't part of -Wall.
|
||||
compiler_flag: "-Wunused-but-set-parameter"
|
||||
# But disable some that are problematic.
|
||||
compiler_flag: "-Wno-free-nonheap-object" # has false positives
|
||||
|
||||
# Keep stack frames for debugging, even in opt mode.
|
||||
compiler_flag: "-fno-omit-frame-pointer"
|
||||
|
||||
# Anticipated future default.
|
||||
linker_flag: "-no-canonical-prefixes"
|
||||
unfiltered_cxx_flag: "-fno-canonical-system-headers"
|
||||
# Have gcc return the exit code from ld.
|
||||
linker_flag: "-pass-exit-codes"
|
||||
# Stamp the binary with a unique identifier.
|
||||
linker_flag: "-Wl,--build-id=md5"
|
||||
linker_flag: "-Wl,--hash-style=gnu"
|
||||
# Gold linker only? Can we enable this by default?
|
||||
# linker_flag: "-Wl,--warn-execstack"
|
||||
# linker_flag: "-Wl,--detect-odr-violations"
|
||||
|
||||
# Include directory for ROCm headers.
|
||||
%{rocm_include_path}
|
||||
|
||||
compilation_mode_flags {
|
||||
mode: DBG
|
||||
# Enable debug symbols.
|
||||
compiler_flag: "-g"
|
||||
}
|
||||
compilation_mode_flags {
|
||||
mode: OPT
|
||||
|
||||
# No debug symbols.
|
||||
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
|
||||
# even generally? However, that can't happen here, as it requires special
|
||||
# handling in Bazel.
|
||||
compiler_flag: "-g0"
|
||||
|
||||
# Conservative choice for -O
|
||||
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||
# Profile first and / or use FDO if you need better performance than this.
|
||||
compiler_flag: "-O2"
|
||||
|
||||
# Disable assertions
|
||||
compiler_flag: "-DNDEBUG"
|
||||
|
||||
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||
compiler_flag: "-ffunction-sections"
|
||||
compiler_flag: "-fdata-sections"
|
||||
linker_flag: "-Wl,--gc-sections"
|
||||
}
|
||||
linking_mode_flags { mode: DYNAMIC }
|
||||
}
|
241
third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
vendored
Executable file
241
third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
vendored
Executable file
@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python
|
||||
"""Crosstool wrapper for compiling ROCm programs.
|
||||
|
||||
SYNOPSIS:
|
||||
crosstool_wrapper_driver_rocm [options passed in by cc_library()
|
||||
or cc_binary() rule]
|
||||
|
||||
DESCRIPTION:
|
||||
This script is expected to be called by the cc_library() or cc_binary() bazel
|
||||
rules. When the option "-x rocm" is present in the list of arguments passed
|
||||
to this script, it invokes the hipcc compiler. Most arguments are passed
|
||||
as is as a string to --compiler-options of hipcc. When "-x rocm" is not
|
||||
present, this wrapper invokes gcc with the input arguments as is.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)'
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
import pipes
|
||||
|
||||
# Template values set by rocm_configure.bzl.
|
||||
CPU_COMPILER = ('%{cpu_compiler}')
|
||||
GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
|
||||
|
||||
HIPCC_PATH = '%{hipcc_path}'
|
||||
PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
|
||||
|
||||
def Log(s):
|
||||
print('gpus/crosstool: {0}'.format(s))
|
||||
|
||||
|
||||
def GetOptionValue(argv, option):
|
||||
"""Extract the list of values for option from the argv list.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
option: The option whose value to extract, without the leading '-'.
|
||||
|
||||
Returns:
|
||||
A list of values, either directly following the option,
|
||||
(eg., -opt val1 val2) or values collected from multiple occurrences of
|
||||
the option (eg., -opt val1 -opt val2).
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-' + option, nargs='*', action='append')
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
if not args or not vars(args)[option]:
|
||||
return []
|
||||
else:
|
||||
return sum(vars(args)[option], [])
|
||||
|
||||
|
||||
def GetHostCompilerOptions(argv):
|
||||
"""Collect the -isystem, -iquote, and --sysroot option values from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
|
||||
Returns:
|
||||
The string that can be used as the --compiler-options to hipcc.
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-isystem', nargs='*', action='append')
|
||||
parser.add_argument('-iquote', nargs='*', action='append')
|
||||
parser.add_argument('--sysroot', nargs=1)
|
||||
parser.add_argument('-g', nargs='*', action='append')
|
||||
parser.add_argument('-fno-canonical-system-headers', action='store_true')
|
||||
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
|
||||
opts = ''
|
||||
|
||||
if args.isystem:
|
||||
opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
|
||||
if args.iquote:
|
||||
opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
|
||||
if args.g:
|
||||
opts += ' -g' + ' -g'.join(sum(args.g, []))
|
||||
#if args.fno_canonical_system_headers:
|
||||
# opts += ' -fno-canonical-system-headers'
|
||||
if args.sysroot:
|
||||
opts += ' --sysroot ' + args.sysroot[0]
|
||||
|
||||
return opts
|
||||
|
||||
def GetHipccOptions(argv):
|
||||
"""Collect the -hipcc_options values from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
|
||||
Returns:
|
||||
The string that can be passed directly to hipcc.
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-hipcc_options', nargs='*', action='append')
|
||||
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
|
||||
if args.hipcc_options:
|
||||
options = _update_options(sum(args.hipcc_options, []))
|
||||
return ' '.join(['--'+a for a in options])
|
||||
return ''
|
||||
|
||||
|
||||
def InvokeHipcc(argv, log=False):
|
||||
"""Call hipcc with arguments assembled from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
log: True if logging is requested.
|
||||
|
||||
Returns:
|
||||
The return value of calling os.system('hipcc ' + args)
|
||||
"""
|
||||
|
||||
host_compiler_options = GetHostCompilerOptions(argv)
|
||||
hipcc_compiler_options = GetHipccOptions(argv)
|
||||
opt_option = GetOptionValue(argv, 'O')
|
||||
m_options = GetOptionValue(argv, 'm')
|
||||
m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
|
||||
include_options = GetOptionValue(argv, 'I')
|
||||
out_file = GetOptionValue(argv, 'o')
|
||||
depfiles = GetOptionValue(argv, 'MF')
|
||||
defines = GetOptionValue(argv, 'D')
|
||||
defines = ''.join([' -D' + define for define in defines])
|
||||
undefines = GetOptionValue(argv, 'U')
|
||||
undefines = ''.join([' -U' + define for define in undefines])
|
||||
std_options = GetOptionValue(argv, 'std')
|
||||
hipcc_allowed_std_options = ["c++11"]
|
||||
std_options = ''.join([' -std=' + define
|
||||
for define in std_options if define in hipcc_allowed_std_options])
|
||||
|
||||
# The list of source files get passed after the -c option. I don't know of
|
||||
# any other reliable way to just get the list of source files to be compiled.
|
||||
src_files = GetOptionValue(argv, 'c')
|
||||
|
||||
if len(src_files) == 0:
|
||||
return 1
|
||||
if len(out_file) != 1:
|
||||
return 1
|
||||
|
||||
opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
|
||||
else ' -g')
|
||||
|
||||
includes = (' -I ' + ' -I '.join(include_options)
|
||||
if len(include_options) > 0
|
||||
else '')
|
||||
|
||||
# Unfortunately, there are other options that have -c prefix too.
|
||||
# So allowing only those look like C/C++ files.
|
||||
src_files = [f for f in src_files if
|
||||
re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
|
||||
srcs = ' '.join(src_files)
|
||||
out = ' -o ' + out_file[0]
|
||||
|
||||
hipccopts = ' '
|
||||
hipccopts += ' ' + hipcc_compiler_options
|
||||
hipccopts += undefines
|
||||
hipccopts += defines
|
||||
hipccopts += std_options
|
||||
hipccopts += m_options
|
||||
|
||||
if depfiles:
|
||||
# Generate the dependency file
|
||||
depfile = depfiles[0]
|
||||
cmd = (HIPCC_PATH + ' ' + hipccopts +
|
||||
host_compiler_options +
|
||||
' ' + GCC_HOST_COMPILER_PATH +
|
||||
' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile)
|
||||
if log: Log(cmd)
|
||||
exit_status = os.system(cmd)
|
||||
if exit_status != 0:
|
||||
return exit_status
|
||||
|
||||
cmd = (HIPCC_PATH + ' ' + hipccopts +
|
||||
host_compiler_options + ' -fPIC' +
|
||||
' ' + GCC_HOST_COMPILER_PATH +
|
||||
' -I .' + opt + includes + ' -c ' + srcs + out)
|
||||
|
||||
# TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
|
||||
# Need to investigate and fix.
|
||||
cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
|
||||
if log: Log(cmd)
|
||||
return os.system(cmd)
|
||||
|
||||
|
||||
def main():
|
||||
# ignore PWD env var
|
||||
os.environ['PWD']=''
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-x', nargs=1)
|
||||
parser.add_argument('--rocm_log', action='store_true')
|
||||
parser.add_argument('-pass-exit-codes', action='store_true')
|
||||
args, leftover = parser.parse_known_args(sys.argv[1:])
|
||||
|
||||
if args.x and args.x[0] == 'rocm':
|
||||
if args.rocm_log: Log('-x rocm')
|
||||
leftover = [pipes.quote(s) for s in leftover]
|
||||
if args.rocm_log: Log('using hipcc')
|
||||
return InvokeHipcc(leftover, log=args.rocm_log)
|
||||
|
||||
# XXX use hipcc to link
|
||||
if args.pass_exit_codes:
|
||||
gpu_compiler_flags = [flag for flag in sys.argv[1:]
|
||||
if not flag.startswith(('-pass-exit-codes'))]
|
||||
|
||||
# special handling for $ORIGIN
|
||||
# - guard every argument with ''
|
||||
modified_gpu_compiler_flags = []
|
||||
for flag in gpu_compiler_flags:
|
||||
modified_gpu_compiler_flags.append("'" + flag + "'")
|
||||
|
||||
if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags)))
|
||||
return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags)
|
||||
|
||||
# Strip our flags before passing through to the CPU compiler for files which
|
||||
# are not -x rocm. We can't just pass 'leftover' because it also strips -x.
|
||||
# We not only want to pass -x to the CPU compiler, but also keep it in its
|
||||
# relative location in the argv list (the compiler is actually sensitive to
|
||||
# this).
|
||||
cpu_compiler_flags = [flag for flag in sys.argv[1:]
|
||||
if not flag.startswith(('--rocm_log'))]
|
||||
|
||||
# XXX: SE codes need to be built with gcc, but need this macro defined
|
||||
cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
|
||||
|
||||
return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
0
third_party/gpus/rocm/BUILD
vendored
Normal file
0
third_party/gpus/rocm/BUILD
vendored
Normal file
99
third_party/gpus/rocm/BUILD.tpl
vendored
Normal file
99
third_party/gpus/rocm/BUILD.tpl
vendored
Normal file
@ -0,0 +1,99 @@
|
||||
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
config_setting(
|
||||
name = "using_hipcc",
|
||||
values = {
|
||||
"define": "using_rocm_hipcc=true",
|
||||
},
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocm_headers",
|
||||
hdrs = [
|
||||
"rocm/rocm_config.h",
|
||||
%{rocm_headers}
|
||||
],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "hip",
|
||||
srcs = ["rocm/lib/%{hip_lib}"],
|
||||
data = ["rocm/lib/%{hip_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocblas",
|
||||
srcs = ["rocm/lib/%{rocblas_lib}"],
|
||||
data = ["rocm/lib/%{rocblas_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocfft",
|
||||
srcs = ["rocm/lib/%{rocfft_lib}"],
|
||||
data = ["rocm/lib/%{rocfft_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "hiprand",
|
||||
srcs = ["rocm/lib/%{hiprand_lib}"],
|
||||
data = ["rocm/lib/%{hiprand_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
"rocm/include/rocrand",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "miopen",
|
||||
srcs = ["rocm/lib/%{miopen_lib}"],
|
||||
data = ["rocm/lib/%{miopen_lib}"],
|
||||
includes = [
|
||||
".",
|
||||
"rocm/include",
|
||||
],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "rocm",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":rocm_headers",
|
||||
":hip",
|
||||
":rocblas",
|
||||
":rocfft",
|
||||
":hiprand",
|
||||
":miopen",
|
||||
],
|
||||
)
|
||||
|
||||
%{rocm_include_genrules}
|
45
third_party/gpus/rocm/build_defs.bzl.tpl
vendored
Normal file
45
third_party/gpus/rocm/build_defs.bzl.tpl
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# Macros for building ROCm code.
|
||||
def if_rocm(if_true, if_false = []):
|
||||
"""Shorthand for select()'ing on whether we're building with ROCm.
|
||||
|
||||
Returns a select statement which evaluates to if_true if we're building
|
||||
with ROCm enabled. Otherwise, the select statement evaluates to if_false.
|
||||
|
||||
"""
|
||||
return select({
|
||||
"@local_config_rocm//rocm:using_hipcc": if_true,
|
||||
"//conditions:default": if_false
|
||||
})
|
||||
|
||||
|
||||
def rocm_default_copts():
|
||||
"""Default options for all ROCm compilations."""
|
||||
return if_rocm(["-x", "rocm"] + %{rocm_extra_copts})
|
||||
|
||||
def rocm_copts(opts = []):
|
||||
"""Gets the appropriate set of copts for (maybe) ROCm compilation.
|
||||
|
||||
If we're doing ROCm compilation, returns copts for our particular ROCm
|
||||
compiler. If we're not doing ROCm compilation, returns an empty list.
|
||||
|
||||
"""
|
||||
return rocm_default_copts() + select({
|
||||
"//conditions:default": [],
|
||||
"@local_config_rocm//rocm:using_hipcc": ([
|
||||
"",
|
||||
]),
|
||||
}) + if_rocm_is_configured(opts)
|
||||
|
||||
def rocm_is_configured():
|
||||
"""Returns true if ROCm was enabled during the configure process."""
|
||||
return %{rocm_is_configured}
|
||||
|
||||
def if_rocm_is_configured(x):
|
||||
"""Tests if the ROCm was enabled during the configure process.
|
||||
|
||||
Unlike if_rocm(), this does not require that we are building with
|
||||
--config=rocm. Used to allow non-ROCm code to depend on ROCm libraries.
|
||||
"""
|
||||
if rocm_is_configured():
|
||||
return x
|
||||
return []
|
21
third_party/gpus/rocm/rocm_config.h.tpl
vendored
Normal file
21
third_party/gpus/rocm/rocm_config.h.tpl
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef ROCM_ROCM_CONFIG_H_
|
||||
#define ROCM_ROCM_CONFIG_H_
|
||||
|
||||
#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
|
||||
|
||||
#endif // ROCM_ROCM_CONFIG_H_
|
784
third_party/gpus/rocm_configure.bzl
vendored
Normal file
784
third_party/gpus/rocm_configure.bzl
vendored
Normal file
@ -0,0 +1,784 @@
|
||||
# -*- Python -*-
|
||||
"""Repository rule for ROCm autoconfiguration.
|
||||
|
||||
`rocm_configure` depends on the following environment variables:
|
||||
|
||||
* `TF_NEED_ROCM`: Whether to enable building with ROCm.
|
||||
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
||||
* `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is
|
||||
`/opt/rocm`.
|
||||
* `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then
|
||||
use the system default.
|
||||
* `TF_MIOPEN_VERSION`: The version of the MIOpen library.
|
||||
* `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is
|
||||
`gfx803,gfx900`.
|
||||
"""
|
||||
|
||||
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
||||
_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
|
||||
_TF_ROCM_VERSION = "TF_ROCM_VERSION"
|
||||
_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
|
||||
_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
|
||||
_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
|
||||
|
||||
_DEFAULT_ROCM_VERSION = ""
|
||||
_DEFAULT_MIOPEN_VERSION = ""
|
||||
_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm"
|
||||
_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"]
|
||||
|
||||
def find_cc(repository_ctx):
|
||||
"""Find the C++ compiler."""
|
||||
|
||||
# Return a dummy value for GCC detection here to avoid error
|
||||
target_cc_name = "gcc"
|
||||
cc_path_envvar = _GCC_HOST_COMPILER_PATH
|
||||
cc_name = target_cc_name
|
||||
|
||||
if cc_path_envvar in repository_ctx.os.environ:
|
||||
cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
|
||||
if cc_name_from_env:
|
||||
cc_name = cc_name_from_env
|
||||
if cc_name.startswith("/"):
|
||||
# Absolute path, maybe we should make this supported by our which function.
|
||||
return cc_name
|
||||
cc = repository_ctx.which(cc_name)
|
||||
if cc == None:
|
||||
fail(("Cannot find {}, either correct your path or set the {}" +
|
||||
" environment variable").format(target_cc_name, cc_path_envvar))
|
||||
return cc
|
||||
|
||||
_INC_DIR_MARKER_BEGIN = "#include <...>"
|
||||
|
||||
def _cxx_inc_convert(path):
|
||||
"""Convert path returned by cc -E xc++ in a complete path."""
|
||||
path = path.strip()
|
||||
return path
|
||||
|
||||
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
|
||||
"""Compute the list of default C or C++ include directories."""
|
||||
if lang_is_cpp:
|
||||
lang = "c++"
|
||||
else:
|
||||
lang = "c"
|
||||
|
||||
# TODO: We pass -no-canonical-prefixes here to match the compiler flags,
|
||||
# but in rocm_clang CROSSTOOL file that is a `feature` and we should
|
||||
# handle the case when it's disabled and no flag is passed
|
||||
result = repository_ctx.execute([
|
||||
cc,
|
||||
"-no-canonical-prefixes",
|
||||
"-E",
|
||||
"-x" + lang,
|
||||
"-",
|
||||
"-v",
|
||||
])
|
||||
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
|
||||
if index1 == -1:
|
||||
return []
|
||||
index1 = result.stderr.find("\n", index1)
|
||||
if index1 == -1:
|
||||
return []
|
||||
index2 = result.stderr.rfind("\n ")
|
||||
if index2 == -1 or index2 < index1:
|
||||
return []
|
||||
index2 = result.stderr.find("\n", index2 + 1)
|
||||
if index2 == -1:
|
||||
inc_dirs = result.stderr[index1 + 1:]
|
||||
else:
|
||||
inc_dirs = result.stderr[index1 + 1:index2].strip()
|
||||
|
||||
return [
|
||||
str(repository_ctx.path(_cxx_inc_convert(p)))
|
||||
for p in inc_dirs.split("\n")
|
||||
]
|
||||
|
||||
def get_cxx_inc_directories(repository_ctx, cc):
|
||||
"""Compute the list of default C and C++ include directories."""
|
||||
|
||||
# For some reason `clang -xc` sometimes returns include paths that are
|
||||
# different from the ones from `clang -xc++`. (Symlink and a dir)
|
||||
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
|
||||
includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
|
||||
includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
|
||||
|
||||
includes_cpp_set = depset(includes_cpp)
|
||||
return includes_cpp + [
|
||||
inc
|
||||
for inc in includes_c
|
||||
if inc not in includes_cpp_set
|
||||
]
|
||||
|
||||
def auto_configure_fail(msg):
|
||||
"""Output failure message when rocm configuration fails."""
|
||||
red = "\033[0;31m"
|
||||
no_color = "\033[0m"
|
||||
fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg))
|
||||
|
||||
# END cc_configure common functions (see TODO above).
|
||||
|
||||
def _host_compiler_includes(repository_ctx, cc):
|
||||
"""Generates the cxx_builtin_include_directory entries for gcc inc dirs.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
cc: The path to the gcc host compiler.
|
||||
|
||||
Returns:
|
||||
A string containing the cxx_builtin_include_directory for each of the gcc
|
||||
host compiler include directories, which can be added to the CROSSTOOL
|
||||
file.
|
||||
"""
|
||||
inc_dirs = get_cxx_inc_directories(repository_ctx, cc)
|
||||
|
||||
# Add numpy headers
|
||||
inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include")
|
||||
|
||||
entries = []
|
||||
for inc_dir in inc_dirs:
|
||||
entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
|
||||
|
||||
# define TENSORFLOW_USE_ROCM
|
||||
entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"")
|
||||
|
||||
return "\n".join(entries)
|
||||
|
||||
def _rocm_include_path(repository_ctx, rocm_config):
|
||||
"""Generates the cxx_builtin_include_directory entries for rocm inc dirs.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
cc: The path to the gcc host compiler.
|
||||
|
||||
Returns:
|
||||
A string containing the cxx_builtin_include_directory for each of the gcc
|
||||
host compiler include directories, which can be added to the CROSSTOOL
|
||||
file.
|
||||
"""
|
||||
inc_dirs = []
|
||||
|
||||
# general ROCm include path
|
||||
inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
|
||||
|
||||
# Add HSA headers
|
||||
inc_dirs.append("/opt/rocm/hsa/include")
|
||||
|
||||
# Add HIP headers
|
||||
inc_dirs.append("/opt/rocm/include/hip")
|
||||
inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
|
||||
|
||||
# Add rocrand and hiprand headers
|
||||
inc_dirs.append("/opt/rocm/rocrand/include")
|
||||
inc_dirs.append("/opt/rocm/hiprand/include")
|
||||
|
||||
# Add rocfft headers
|
||||
inc_dirs.append("/opt/rocm/rocfft/include")
|
||||
|
||||
# Add rocBLAS headers
|
||||
inc_dirs.append("/opt/rocm/rocblas/include")
|
||||
|
||||
# Add MIOpen headers
|
||||
inc_dirs.append("/opt/rocm/miopen/include")
|
||||
|
||||
# Add hcc headers
|
||||
inc_dirs.append("/opt/rocm/hcc/include")
|
||||
inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
|
||||
inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
|
||||
|
||||
# Newer hcc builds use/are based off of clang 8.0.0.
|
||||
inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
|
||||
inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
|
||||
|
||||
inc_entries = []
|
||||
for inc_dir in inc_dirs:
|
||||
inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir)
|
||||
return "\n".join(inc_entries)
|
||||
|
||||
def _enable_rocm(repository_ctx):
|
||||
if "TF_NEED_ROCM" in repository_ctx.os.environ:
|
||||
enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip()
|
||||
return enable_rocm == "1"
|
||||
return False
|
||||
|
||||
def _rocm_toolkit_path(repository_ctx):
|
||||
"""Finds the rocm toolkit directory.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
|
||||
Returns:
|
||||
A speculative real path of the rocm toolkit install directory.
|
||||
"""
|
||||
rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH
|
||||
if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ:
|
||||
rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip()
|
||||
if not repository_ctx.path(rocm_toolkit_path).exists:
|
||||
auto_configure_fail("Cannot find rocm toolkit path.")
|
||||
return str(repository_ctx.path(rocm_toolkit_path).realpath)
|
||||
|
||||
def _amdgpu_targets(repository_ctx):
|
||||
"""Returns a list of strings representing AMDGPU targets."""
|
||||
if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ:
|
||||
return _DEFAULT_ROCM_AMDGPU_TARGETS
|
||||
amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS]
|
||||
amdgpu_targets = amdgpu_targets_str.split(",")
|
||||
for amdgpu_target in amdgpu_targets:
|
||||
if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit():
|
||||
auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target)
|
||||
return amdgpu_targets
|
||||
|
||||
def _cpu_value(repository_ctx):
|
||||
"""Returns the name of the host operating system.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
|
||||
Returns:
|
||||
A string containing the name of the host operating system.
|
||||
"""
|
||||
os_name = repository_ctx.os.name.lower()
|
||||
if os_name.startswith("mac os"):
|
||||
return "Darwin"
|
||||
if os_name.find("windows") != -1:
|
||||
return "Windows"
|
||||
result = repository_ctx.execute(["uname", "-s"])
|
||||
return result.stdout.strip()
|
||||
|
||||
def _lib_name(lib, cpu_value, version = "", static = False):
|
||||
"""Constructs the platform-specific name of a library.
|
||||
|
||||
Args:
|
||||
lib: The name of the library, such as "hip"
|
||||
cpu_value: The name of the host operating system.
|
||||
version: The version of the library.
|
||||
static: True the library is static or False if it is a shared object.
|
||||
|
||||
Returns:
|
||||
The platform-specific name of the library.
|
||||
"""
|
||||
if cpu_value in ("Linux"):
|
||||
if static:
|
||||
return "lib%s.a" % lib
|
||||
else:
|
||||
if version:
|
||||
version = ".%s" % version
|
||||
return "lib%s.so%s" % (lib, version)
|
||||
elif cpu_value == "Windows":
|
||||
return "%s.lib" % lib
|
||||
elif cpu_value == "Darwin":
|
||||
if static:
|
||||
return "lib%s.a" % lib
|
||||
elif version:
|
||||
version = ".%s" % version
|
||||
return "lib%s%s.dylib" % (lib, version)
|
||||
else:
|
||||
auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
|
||||
|
||||
def _find_rocm_lib(
|
||||
lib,
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
basedir,
|
||||
version = "",
|
||||
static = False):
|
||||
"""Finds the given ROCm libraries on the system.
|
||||
|
||||
Args:
|
||||
lib: The name of the library, such as "hip"
|
||||
repository_ctx: The repository context.
|
||||
cpu_value: The name of the host operating system.
|
||||
basedir: The install directory of ROCm.
|
||||
version: The version of the library.
|
||||
static: True if static library, False if shared object.
|
||||
|
||||
Returns:
|
||||
Returns a struct with the following fields:
|
||||
file_name: The basename of the library found on the system.
|
||||
path: The full path to the library.
|
||||
"""
|
||||
file_name = _lib_name(lib, cpu_value, version, static)
|
||||
if cpu_value == "Linux":
|
||||
path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name))
|
||||
if path.exists:
|
||||
return struct(file_name = file_name, path = str(path.realpath))
|
||||
path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name))
|
||||
if path.exists:
|
||||
return struct(file_name = file_name, path = str(path.realpath))
|
||||
path = repository_ctx.path(
|
||||
"%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name),
|
||||
)
|
||||
if path.exists:
|
||||
return struct(file_name = file_name, path = str(path.realpath))
|
||||
|
||||
path = repository_ctx.path("%s/lib/%s" % (basedir, file_name))
|
||||
if path.exists:
|
||||
return struct(file_name = file_name, path = str(path.realpath))
|
||||
path = repository_ctx.path("%s/%s" % (basedir, file_name))
|
||||
if path.exists:
|
||||
return struct(file_name = file_name, path = str(path.realpath))
|
||||
|
||||
auto_configure_fail("Cannot find rocm library %s" % file_name)
|
||||
|
||||
def _find_libs(repository_ctx, rocm_config):
|
||||
"""Returns the ROCm libraries on the system.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
rocm_config: The ROCm config as returned by _get_rocm_config
|
||||
|
||||
Returns:
|
||||
Map of library names to structs of filename and path as returned by
|
||||
_find_rocm_lib.
|
||||
"""
|
||||
cpu_value = rocm_config.cpu_value
|
||||
return {
|
||||
"hip": _find_rocm_lib(
|
||||
"hip_hcc",
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
rocm_config.rocm_toolkit_path,
|
||||
),
|
||||
"rocblas": _find_rocm_lib(
|
||||
"rocblas",
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
rocm_config.rocm_toolkit_path + "/rocblas",
|
||||
),
|
||||
"rocfft": _find_rocm_lib(
|
||||
"rocfft",
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
rocm_config.rocm_toolkit_path + "/rocfft",
|
||||
),
|
||||
"hiprand": _find_rocm_lib(
|
||||
"hiprand",
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
rocm_config.rocm_toolkit_path + "/hiprand",
|
||||
),
|
||||
"miopen": _find_rocm_lib(
|
||||
"MIOpen",
|
||||
repository_ctx,
|
||||
cpu_value,
|
||||
rocm_config.rocm_toolkit_path + "/miopen",
|
||||
),
|
||||
}
|
||||
|
||||
def _get_rocm_config(repository_ctx):
|
||||
"""Detects and returns information about the ROCm installation on the system.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
|
||||
Returns:
|
||||
A struct containing the following fields:
|
||||
rocm_toolkit_path: The ROCm toolkit installation directory.
|
||||
amdgpu_targets: A list of the system's AMDGPU targets.
|
||||
cpu_value: The name of the host operating system.
|
||||
"""
|
||||
cpu_value = _cpu_value(repository_ctx)
|
||||
rocm_toolkit_path = _rocm_toolkit_path(repository_ctx)
|
||||
return struct(
|
||||
rocm_toolkit_path = rocm_toolkit_path,
|
||||
amdgpu_targets = _amdgpu_targets(repository_ctx),
|
||||
cpu_value = cpu_value,
|
||||
)
|
||||
|
||||
def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
|
||||
if not out:
|
||||
out = tpl.replace(":", "/")
|
||||
repository_ctx.template(
|
||||
out,
|
||||
Label("//third_party/gpus/%s.tpl" % tpl),
|
||||
substitutions,
|
||||
)
|
||||
|
||||
def _file(repository_ctx, label):
|
||||
repository_ctx.template(
|
||||
label.replace(":", "/"),
|
||||
Label("//third_party/gpus/%s.tpl" % label),
|
||||
{},
|
||||
)
|
||||
|
||||
_DUMMY_CROSSTOOL_BZL_FILE = """
|
||||
def error_gpu_disabled():
|
||||
fail("ERROR: Building with --config=rocm but TensorFlow is not configured " +
|
||||
"to build with GPU support. Please re-run ./configure and enter 'Y' " +
|
||||
"at the prompt to build with GPU support.")
|
||||
|
||||
native.genrule(
|
||||
name = "error_gen_crosstool",
|
||||
outs = ["CROSSTOOL"],
|
||||
cmd = "echo 'Should not be run.' && exit 1",
|
||||
)
|
||||
|
||||
native.filegroup(
|
||||
name = "crosstool",
|
||||
srcs = [":CROSSTOOL"],
|
||||
output_licenses = ["unencumbered"],
|
||||
)
|
||||
"""
|
||||
|
||||
_DUMMY_CROSSTOOL_BUILD_FILE = """
|
||||
load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
|
||||
|
||||
error_gpu_disabled()
|
||||
"""
|
||||
|
||||
def _create_dummy_repository(repository_ctx):
|
||||
cpu_value = _cpu_value(repository_ctx)
|
||||
|
||||
# Set up BUILD file for rocm/.
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:build_defs.bzl",
|
||||
{
|
||||
"%{rocm_is_configured}": "False",
|
||||
"%{rocm_extra_copts}": "[]",
|
||||
},
|
||||
)
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:BUILD",
|
||||
{
|
||||
"%{hip_lib}": _lib_name("hip", cpu_value),
|
||||
"%{rocblas_lib}": _lib_name("rocblas", cpu_value),
|
||||
"%{miopen_lib}": _lib_name("miopen", cpu_value),
|
||||
"%{rocfft_lib}": _lib_name("rocfft", cpu_value),
|
||||
"%{hiprand_lib}": _lib_name("hiprand", cpu_value),
|
||||
"%{rocm_include_genrules}": "",
|
||||
"%{rocm_headers}": "",
|
||||
},
|
||||
)
|
||||
|
||||
# Create dummy files for the ROCm toolkit since they are still required by
|
||||
# tensorflow/core/platform/default/build_config:rocm.
|
||||
repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "")
|
||||
|
||||
# Set up rocm_config.h, which is used by
|
||||
# tensorflow/stream_executor/dso_loader.cc.
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:rocm_config.h",
|
||||
{
|
||||
"%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH,
|
||||
},
|
||||
"rocm/rocm/rocm_config.h",
|
||||
)
|
||||
|
||||
# If rocm_configure is not configured to build with GPU support, and the user
|
||||
# attempts to build with --config=rocm, add a dummy build rule to intercept
|
||||
# this and fail with an actionable error message.
|
||||
repository_ctx.file(
|
||||
"crosstool/error_gpu_disabled.bzl",
|
||||
_DUMMY_CROSSTOOL_BZL_FILE,
|
||||
)
|
||||
repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
|
||||
|
||||
def _execute(
|
||||
repository_ctx,
|
||||
cmdline,
|
||||
error_msg = None,
|
||||
error_details = None,
|
||||
empty_stdout_fine = False):
|
||||
"""Executes an arbitrary shell command.
|
||||
|
||||
Args:
|
||||
repository_ctx: the repository_ctx object
|
||||
cmdline: list of strings, the command to execute
|
||||
error_msg: string, a summary of the error if the command fails
|
||||
error_details: string, details about the error or steps to fix it
|
||||
empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
|
||||
it's an error
|
||||
Return:
|
||||
the result of repository_ctx.execute(cmdline)
|
||||
"""
|
||||
result = repository_ctx.execute(cmdline)
|
||||
if result.stderr or not (empty_stdout_fine or result.stdout):
|
||||
auto_configure_fail(
|
||||
"\n".join([
|
||||
error_msg.strip() if error_msg else "Repository command failed",
|
||||
result.stderr.strip(),
|
||||
error_details if error_details else "",
|
||||
]),
|
||||
)
|
||||
return result
|
||||
|
||||
def _norm_path(path):
|
||||
"""Returns a path with '/' and remove the trailing slash."""
|
||||
path = path.replace("\\", "/")
|
||||
if path[-1] == "/":
|
||||
path = path[:-1]
|
||||
return path
|
||||
|
||||
def _symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
src_dir,
|
||||
dest_dir,
|
||||
genrule_name,
|
||||
src_files = [],
|
||||
dest_files = []):
|
||||
"""Returns a genrule to symlink(or copy if on Windows) a set of files.
|
||||
|
||||
If src_dir is passed, files will be read from the given directory; otherwise
|
||||
we assume files are in src_files and dest_files
|
||||
"""
|
||||
if src_dir != None:
|
||||
src_dir = _norm_path(src_dir)
|
||||
dest_dir = _norm_path(dest_dir)
|
||||
files = _read_dir(repository_ctx, src_dir)
|
||||
|
||||
# Create a list with the src_dir stripped to use for outputs.
|
||||
dest_files = files.replace(src_dir, "").splitlines()
|
||||
src_files = files.splitlines()
|
||||
command = []
|
||||
|
||||
# We clear folders that might have been generated previously to avoid
|
||||
# undesired inclusions
|
||||
command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi')
|
||||
command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi')
|
||||
outs = []
|
||||
for i in range(len(dest_files)):
|
||||
if dest_files[i] != "":
|
||||
# If we have only one file to link we do not want to use the dest_dir, as
|
||||
# $(@D) will include the full path to the file.
|
||||
dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
|
||||
|
||||
# On Windows, symlink is not supported, so we just copy all the files.
|
||||
cmd = "ln -s"
|
||||
command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
|
||||
outs.append(' "' + dest_dir + dest_files[i] + '",')
|
||||
genrule = _genrule(
|
||||
src_dir,
|
||||
genrule_name,
|
||||
" && ".join(command),
|
||||
"\n".join(outs),
|
||||
)
|
||||
return genrule
|
||||
|
||||
def _genrule(src_dir, genrule_name, command, outs):
|
||||
"""Returns a string with a genrule.
|
||||
|
||||
Genrule executes the given command and produces the given outputs.
|
||||
"""
|
||||
return (
|
||||
"genrule(\n" +
|
||||
' name = "' +
|
||||
genrule_name + '",\n' +
|
||||
" outs = [\n" +
|
||||
outs +
|
||||
"\n ],\n" +
|
||||
' cmd = """\n' +
|
||||
command +
|
||||
'\n """,\n' +
|
||||
")\n"
|
||||
)
|
||||
|
||||
def _read_dir(repository_ctx, src_dir):
|
||||
"""Returns a string with all files in a directory.
|
||||
|
||||
Finds all files inside a directory, traversing subfolders and following
|
||||
symlinks. The returned string contains the full path of all files
|
||||
separated by line breaks.
|
||||
"""
|
||||
find_result = _execute(
|
||||
repository_ctx,
|
||||
["find", src_dir, "-follow", "-type", "f"],
|
||||
empty_stdout_fine = True,
|
||||
)
|
||||
result = find_result.stdout
|
||||
return result
|
||||
|
||||
def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
|
||||
if False:
|
||||
amdgpu_target_flags = ["--amdgpu-target=" +
|
||||
amdgpu_target for amdgpu_target in amdgpu_targets]
|
||||
else:
|
||||
# AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc"
|
||||
amdgpu_target_flags = []
|
||||
return str(amdgpu_target_flags)
|
||||
|
||||
def _create_local_rocm_repository(repository_ctx):
|
||||
"""Creates the repository containing files set up to build with ROCm."""
|
||||
rocm_config = _get_rocm_config(repository_ctx)
|
||||
|
||||
# Set up symbolic links for the rocm toolkit by creating genrules to do
|
||||
# symlinking. We create one genrule for each directory we want to track under
|
||||
# rocm_toolkit_path
|
||||
rocm_toolkit_path = rocm_config.rocm_toolkit_path
|
||||
rocm_include_path = rocm_toolkit_path + "/include"
|
||||
genrules = [_symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
rocm_include_path,
|
||||
"rocm/include",
|
||||
"rocm-include",
|
||||
)]
|
||||
genrules.append(_symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
rocm_toolkit_path + "/rocfft/include",
|
||||
"rocm/include/rocfft",
|
||||
"rocfft-include",
|
||||
))
|
||||
genrules.append(_symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
rocm_toolkit_path + "/rocblas/include",
|
||||
"rocm/include/rocblas",
|
||||
"rocblas-include",
|
||||
))
|
||||
genrules.append(_symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
rocm_toolkit_path + "/miopen/include",
|
||||
"rocm/include/miopen",
|
||||
"miopen-include",
|
||||
))
|
||||
|
||||
rocm_libs = _find_libs(repository_ctx, rocm_config)
|
||||
rocm_lib_src = []
|
||||
rocm_lib_dest = []
|
||||
for lib in rocm_libs.values():
|
||||
rocm_lib_src.append(lib.path)
|
||||
rocm_lib_dest.append("rocm/lib/" + lib.file_name)
|
||||
genrules.append(_symlink_genrule_for_dir(
|
||||
repository_ctx,
|
||||
None,
|
||||
"",
|
||||
"rocm-lib",
|
||||
rocm_lib_src,
|
||||
rocm_lib_dest,
|
||||
))
|
||||
|
||||
included_files = _read_dir(repository_ctx, rocm_include_path).replace(
|
||||
rocm_include_path,
|
||||
"",
|
||||
).splitlines()
|
||||
|
||||
# Set up BUILD file for rocm/
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:build_defs.bzl",
|
||||
{
|
||||
"%{rocm_is_configured}": "True",
|
||||
"%{rocm_extra_copts}": _compute_rocm_extra_copts(
|
||||
repository_ctx,
|
||||
rocm_config.amdgpu_targets,
|
||||
),
|
||||
},
|
||||
)
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:BUILD",
|
||||
{
|
||||
"%{hip_lib}": rocm_libs["hip"].file_name,
|
||||
"%{rocblas_lib}": rocm_libs["rocblas"].file_name,
|
||||
"%{rocfft_lib}": rocm_libs["rocfft"].file_name,
|
||||
"%{hiprand_lib}": rocm_libs["hiprand"].file_name,
|
||||
"%{miopen_lib}": rocm_libs["miopen"].file_name,
|
||||
"%{rocm_include_genrules}": "\n".join(genrules),
|
||||
"%{rocm_headers}": ('":rocm-include",\n' +
|
||||
'":rocfft-include",\n' +
|
||||
'":rocblas-include",\n' +
|
||||
'":miopen-include",'),
|
||||
},
|
||||
)
|
||||
|
||||
# Set up crosstool/
|
||||
_tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"})
|
||||
cc = find_cc(repository_ctx)
|
||||
host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
|
||||
rocm_defines = {
|
||||
"%{rocm_include_path}": _rocm_include_path(
|
||||
repository_ctx,
|
||||
rocm_config,
|
||||
),
|
||||
"%{host_compiler_includes}": host_compiler_includes,
|
||||
"%{clang_path}": str(cc),
|
||||
}
|
||||
|
||||
_tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out = "crosstool/CROSSTOOL")
|
||||
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"crosstool:clang/bin/crosstool_wrapper_driver_rocm",
|
||||
{
|
||||
"%{cpu_compiler}": str(cc),
|
||||
"%{hipcc_path}": "/opt/rocm/bin/hipcc",
|
||||
"%{gcc_host_compiler_path}": str(cc),
|
||||
"%{rocm_amdgpu_targets}": ",".join(
|
||||
["\"%s\"" % c for c in rocm_config.amdgpu_targets],
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
# Set up rocm_config.h, which is used by
|
||||
# tensorflow/stream_executor/dso_loader.cc.
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:rocm_config.h",
|
||||
{
|
||||
"%{rocm_amdgpu_targets}": ",".join(
|
||||
["\"%s\"" % c for c in rocm_config.amdgpu_targets],
|
||||
),
|
||||
"%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path,
|
||||
},
|
||||
"rocm/rocm/rocm_config.h",
|
||||
)
|
||||
|
||||
def _create_remote_rocm_repository(repository_ctx, remote_config_repo):
|
||||
"""Creates pointers to a remotely configured repo set up to build with ROCm."""
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:build_defs.bzl",
|
||||
{
|
||||
"%{rocm_is_configured}": "True",
|
||||
"%{rocm_extra_copts}": _compute_rocm_extra_copts(
|
||||
repository_ctx, #_compute_capabilities(repository_ctx)
|
||||
),
|
||||
},
|
||||
)
|
||||
_tpl(
|
||||
repository_ctx,
|
||||
"rocm:remote.BUILD",
|
||||
{
|
||||
"%{remote_rocm_repo}": remote_config_repo,
|
||||
},
|
||||
"rocm/BUILD",
|
||||
)
|
||||
_tpl(repository_ctx, "crosstool:remote.BUILD", {
|
||||
"%{remote_rocm_repo}": remote_config_repo,
|
||||
}, "crosstool/BUILD")
|
||||
|
||||
def _rocm_autoconf_impl(repository_ctx):
|
||||
"""Implementation of the rocm_autoconf repository rule."""
|
||||
if not _enable_rocm(repository_ctx):
|
||||
_create_dummy_repository(repository_ctx)
|
||||
elif _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ:
|
||||
_create_remote_rocm_repository(
|
||||
repository_ctx,
|
||||
repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO],
|
||||
)
|
||||
else:
|
||||
_create_local_rocm_repository(repository_ctx)
|
||||
|
||||
rocm_configure = repository_rule(
|
||||
implementation = _rocm_autoconf_impl,
|
||||
environ = [
|
||||
_GCC_HOST_COMPILER_PATH,
|
||||
"TF_NEED_ROCM",
|
||||
_ROCM_TOOLKIT_PATH,
|
||||
_TF_ROCM_VERSION,
|
||||
_TF_MIOPEN_VERSION,
|
||||
_TF_ROCM_AMDGPU_TARGETS,
|
||||
_TF_ROCM_CONFIG_REPO,
|
||||
],
|
||||
)
|
||||
|
||||
"""Detects and configures the local ROCm toolchain.
|
||||
|
||||
Add the following to your WORKSPACE FILE:
|
||||
|
||||
```python
|
||||
rocm_configure(name = "local_config_rocm")
|
||||
```
|
||||
|
||||
Args:
|
||||
name: A unique name for this workspace rule.
|
||||
"""
|
@ -43,6 +43,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld'
|
||||
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
|
||||
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||
|
||||
build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
|
||||
build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
|
||||
|
||||
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
|
||||
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user