diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh index 1f4a36f8de0..92d21cb133b 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -18,20 +18,27 @@ set -e set -x -N_JOBS=$(grep -c ^processor /proc/cpuinfo) -N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) echo "" -echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." echo "" +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +fi + # Run configure. export PYTHON_BIN_PATH=`which python3` export CC_OPT_FLAGS='-mavx' export TF_NEED_ROCM=1 -export ROCM_PATH=/opt/rocm-3.3.0 -export TF_GPU_COUNT=${N_GPUS} +export ROCM_PATH=$ROCM_INSTALL_DIR yes "" | $PYTHON_BIN_PATH configure.py @@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py bazel test \ --config=rocm \ -k \ - --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \ --test_lang_filters=cc \ - --jobs=${N_JOBS} \ - --local_test_jobs=${TF_GPU_COUNT}\ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --build_tests_only \ --test_output=errors \ --test_sharding_strategy=disabled \ - --test_size_filters=small,medium \ + --test_size_filters=small,medium,large \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ -- \ //tensorflow/... \ @@ -59,11 +68,14 @@ bazel test \ --config=rocm \ -k \ --test_tag_filters=gpu \ - --jobs=${N_JOBS} \ - --local_test_jobs=1 \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --build_tests_only \ --test_output=errors \ --test_sharding_strategy=disabled \ + --test_size_filters=small,medium,large \ -- \ //tensorflow/core/nccl:nccl_manager_test diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh index 4962b2789b1..80c0686e647 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh @@ -18,20 +18,27 @@ set -e set -x -N_JOBS=$(grep -c ^processor /proc/cpuinfo) -N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) echo "" -echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." echo "" +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +fi + # Run configure. export PYTHON_BIN_PATH=`which python3` export CC_OPT_FLAGS='-mavx' export TF_NEED_ROCM=1 -export ROCM_PATH=/opt/rocm-3.3.0 -export TF_GPU_COUNT=${N_GPUS} +export ROCM_PATH=$ROCM_INSTALL_DIR yes "" | $PYTHON_BIN_PATH configure.py @@ -40,8 +47,10 @@ bazel test \ --config=rocm \ -k \ --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ - --jobs=${N_JOBS} \ - --local_test_jobs=${TF_GPU_COUNT} \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --test_output=errors \ --test_sharding_strategy=disabled \ @@ -60,8 +69,8 @@ bazel test \ --test_tag_filters=gpu \ --test_timeout 600,900,2400,7200 \ --test_output=errors \ - --jobs=${N_JOBS} \ - --local_test_jobs=1 \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ --test_sharding_strategy=disabled \ -- \ //tensorflow/core/nccl:nccl_manager_test diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh index 7ea866f8e20..3a09081dd6a 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -18,20 +18,27 @@ set -e set -x -N_JOBS=$(grep -c ^processor /proc/cpuinfo) -N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) echo "" -echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." echo "" +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +fi + # Run configure. export PYTHON_BIN_PATH=`which python3` export CC_OPT_FLAGS='-mavx' export TF_NEED_ROCM=1 -export ROCM_PATH=/opt/rocm-3.3.0 -export TF_GPU_COUNT=${N_GPUS} +export ROCM_PATH=$ROCM_INSTALL_DIR yes "" | $PYTHON_BIN_PATH configure.py @@ -41,8 +48,10 @@ bazel test \ -k \ --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ --test_lang_filters=py \ - --jobs=${N_JOBS} \ - --local_test_jobs=${TF_GPU_COUNT} \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --build_tests_only \ --test_output=errors \ diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh index 6ce1fad9cc7..d623b77d533 100755 --- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -18,20 +18,27 @@ set -e set -x -N_JOBS=$(grep -c ^processor /proc/cpuinfo) -N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) echo "" -echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)." +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." echo "" +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +ROCM_INSTALL_DIR=/opt/rocm-3.7.0 +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +fi + # Run configure. export PYTHON_BIN_PATH=`which python3` export CC_OPT_FLAGS='-mavx' export TF_NEED_ROCM=1 -export ROCM_PATH=/opt/rocm-3.3.0 -export TF_GPU_COUNT=${N_GPUS} +export ROCM_PATH=$ROCM_INSTALL_DIR yes "" | $PYTHON_BIN_PATH configure.py echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc @@ -41,9 +48,11 @@ bazel test \ --config=rocm \ --config=xla \ -k \ - --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ - --jobs=${N_JOBS} \ - --local_test_jobs=${TF_GPU_COUNT} \ + --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --build_tests_only \ --test_output=errors \ @@ -65,9 +74,11 @@ bazel test \ --config=rocm \ --config=xla \ -k \ - --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ - --jobs=${N_JOBS} \ - --local_test_jobs=${TF_GPU_COUNT} \ + --test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_timeout 600,900,2400,7200 \ --build_tests_only \ --test_output=errors \