updating ROCM CI scripts to use ROCm 3.7

This commit is contained in:
Deven Desai 2020-08-12 00:39:02 +00:00
parent 566d2a95c6
commit 9dcaad456e
4 changed files with 79 additions and 38 deletions

View File

@ -18,20 +18,27 @@
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
echo ""
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
fi
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
export ROCM_PATH=$ROCM_INSTALL_DIR
yes "" | $PYTHON_BIN_PATH configure.py
@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
bazel test \
--config=rocm \
-k \
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
--test_lang_filters=cc \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT}\
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium \
--test_size_filters=small,medium,large \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/... \
@ -59,11 +68,14 @@ bazel test \
--config=rocm \
-k \
--test_tag_filters=gpu \
--jobs=${N_JOBS} \
--local_test_jobs=1 \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium,large \
-- \
//tensorflow/core/nccl:nccl_manager_test

View File

@ -18,20 +18,27 @@
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
echo ""
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
fi
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
export ROCM_PATH=$ROCM_INSTALL_DIR
yes "" | $PYTHON_BIN_PATH configure.py
@ -40,8 +47,10 @@ bazel test \
--config=rocm \
-k \
--test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT} \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--test_output=errors \
--test_sharding_strategy=disabled \
@ -60,8 +69,8 @@ bazel test \
--test_tag_filters=gpu \
--test_timeout 600,900,2400,7200 \
--test_output=errors \
--jobs=${N_JOBS} \
--local_test_jobs=1 \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_sharding_strategy=disabled \
-- \
//tensorflow/core/nccl:nccl_manager_test

View File

@ -18,20 +18,27 @@
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
echo ""
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
fi
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
export ROCM_PATH=$ROCM_INSTALL_DIR
yes "" | $PYTHON_BIN_PATH configure.py
@ -41,8 +48,10 @@ bazel test \
-k \
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--test_lang_filters=py \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT} \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \

View File

@ -18,20 +18,27 @@
set -e
set -x
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
echo ""
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
fi
# Run configure.
export PYTHON_BIN_PATH=`which python3`
export CC_OPT_FLAGS='-mavx'
export TF_NEED_ROCM=1
export ROCM_PATH=/opt/rocm-3.3.0
export TF_GPU_COUNT=${N_GPUS}
export ROCM_PATH=$ROCM_INSTALL_DIR
yes "" | $PYTHON_BIN_PATH configure.py
echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
@ -41,9 +48,11 @@ bazel test \
--config=rocm \
--config=xla \
-k \
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT} \
--test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \
@ -65,9 +74,11 @@ bazel test \
--config=rocm \
--config=xla \
-k \
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_JOBS} \
--local_test_jobs=${TF_GPU_COUNT} \
--test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_timeout 600,900,2400,7200 \
--build_tests_only \
--test_output=errors \