[ROCm] Updating Dockerfile.rocm / CI scripts to use ROCm 3.3

This commit is contained in:
Deven Desai 2020-04-17 15:00:56 +00:00
parent 482e77f3cd
commit f0bf514df3
6 changed files with 40 additions and 9 deletions

View File

@ -1171,14 +1171,16 @@ def system_specific_test_config(environ_cp):
test_only_filters = ['-oss_serial']
if is_windows():
test_and_build_filters.append('-no_windows')
if environ_cp.get('TF_NEED_CUDA', None) == '1':
if ((environ_cp.get('TF_NEED_CUDA', None) == '1') or
(environ_cp.get('TF_NEED_ROCM', None) == '1')):
test_and_build_filters += ['-no_windows_gpu', '-no_gpu']
else:
test_and_build_filters.append('-gpu')
elif is_macos():
test_and_build_filters += ['-gpu', '-nomac', '-no_mac']
elif is_linux():
if environ_cp.get('TF_NEED_CUDA', None) == '1':
if ((environ_cp.get('TF_NEED_CUDA', None) == '1') or
(environ_cp.get('TF_NEED_ROCM', None) == '1')):
test_and_build_filters.append('-no_gpu')
write_to_bazelrc('test --test_env=LD_LIBRARY_PATH')
else:
@ -1416,6 +1418,10 @@ def main():
write_action_env_to_bazelrc('LD_LIBRARY_PATH',
environ_cp.get('LD_LIBRARY_PATH'))
if (environ_cp.get('TF_NEED_ROCM') == '1' and environ_cp.get('ROCM_PATH')):
write_action_env_to_bazelrc('ROCM_PATH',environ_cp.get('ROCM_PATH'))
write_action_env_to_bazelrc('ROCM_ROOT',environ_cp.get('ROCM_PATH'))
environ_cp['TF_NEED_CUDA'] = str(
int(get_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)))
if (environ_cp.get('TF_NEED_CUDA') == '1' and

View File

@ -3,8 +3,8 @@
FROM ubuntu:xenial
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.0/
ARG ROCM_PATH=/opt/rocm
ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
ARG ROCM_PATH=/opt/rocm-3.3.0
ENV DEBIAN_FRONTEND noninteractive
ENV TF_NEED_ROCM 1
@ -71,7 +71,21 @@ ENV PATH="$ROCM_PATH/bin:${PATH}"
ENV PATH="$OPENCL_ROOT/bin:${PATH}"
# Add target file to help determine which device(s) to build for
RUN bash -c 'echo -e "gfx803\ngfx900\ngfx906" >> /opt/rocm/bin/target.lst'
RUN bash -c 'echo -e "gfx803\ngfx900\ngfx906" >> ${ROCM_PATH}/bin/target.lst'
# Need to explicitly create the $ROCM_PATH/.info/version file to workaround what seems to be a bazel bug
# The env vars being set via --action_env in .bazelrc and .tf_configure.bazelrc files are sometimes
# not getting set in the build command being spawned by bazel (in theory this should not happen)
# As a consequence ROCM_PATH is sometimes not set for the hipcc commands.
# When hipcc incokes hcc, it specifies $ROCM_PATH/.../include dirs via the `-isystem` options
# If ROCM_PATH is not set, it defaults to /opt/rocm, and as a consequence a dependency is generated on the
# header files included within `/opt/rocm`, which then leads to bazel dependency errors
# Explicitly creating the $ROCM_PATH/.info/version allows ROCM path to be set correrctly, even when ROCM_PATH
# is not explicitly set, and thus avoids the eventual bazel dependency error.
# The bazel bug needs to be root-caused and addressed, but that is out of our control and may take a long time
# to come to fruition, so implementing the workaround to make do till then
# Filed https://github.com/bazelbuild/bazel/issues/11163 for tracking this
RUN touch ${ROCM_PATH}/.info/version
# Copy and run the install scripts.
COPY install/*.sh /install/
@ -90,3 +104,7 @@ COPY install/.bazelrc /etc/bazel.bazelrc
# Configure the build for our ROCm configuration.
ENV TF_NEED_ROCM 1
# This is a temporary workaround to fix Out-Of-Memory errors we are running into with XLA perf tests
# By default, HIP runtime "hides" 256MB from the TF Runtime, but with recent changes (update to ROCm2.3, dynamic loading of roc* libs, et al)
# it seems that we need to up the threshold slightly to 320MB
ENV HIP_HIDDEN_FREE_MEM=320

View File

@ -30,6 +30,7 @@ export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
yes "" | $PYTHON_BIN_PATH configure.py
@ -42,7 +43,7 @@ bazel test \
--test_lang_filters=cc \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT}\
--test_timeout 300,450,1200,3600 \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \

View File

@ -30,6 +30,7 @@ export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
yes "" | $PYTHON_BIN_PATH configure.py
@ -38,12 +39,13 @@ yes "" | $PYTHON_BIN_PATH configure.py
bazel test \
--config=rocm \
-k \
--test_tag_filters=gpu,-no_gpu,-no_rocm,-benchmark-test,-no_oss,-oss_serial,-rocm_multi_gpu, \
--test_timeout 600,900,2400,7200 \
--test_output=errors \
--test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT} \
--test_timeout 600,900,2400,7200 \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/... \

View File

@ -30,6 +30,7 @@ export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
yes "" | $PYTHON_BIN_PATH configure.py
@ -46,6 +47,7 @@ bazel test \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/... \

View File

@ -30,6 +30,7 @@ export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
yes "" | $PYTHON_BIN_PATH configure.py
@ -47,6 +48,7 @@ bazel test \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/compiler/... \