From 262cbe78279772e1c95f609af7c289f22e4bfea6 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 18 Dec 2019 20:23:16 +0000 Subject: [PATCH] sync (ROCm fork --> TF repo) the dockerfile+scripts used for ROCm CI --- .../ci_build/Dockerfile.rbe.rocm-ubuntu16.04 | 3 +- tensorflow/tools/ci_build/Dockerfile.rocm | 3 +- .../tools/ci_build/linux/rocm/run_cc_core.sh | 37 ++++++++++--- .../tools/ci_build/linux/rocm/run_py3_core.sh | 21 +++++-- .../tools/ci_build/xla/linux/rocm/run_py3.sh | 55 ++++++++++++++++--- 5 files changed, 94 insertions(+), 25 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 index 5bf7d05e0f9..7fb037f0dfa 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 @@ -16,8 +16,7 @@ RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources. RUN apt-get update --allow-insecure-repositories && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \ - rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \ - rocm-profiler cxlactivitylogger && \ + rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index a083bc6debd..70029d2a9a9 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -58,8 +58,7 @@ RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteracti RUN apt-get update --allow-insecure-repositories && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \ - rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \ - rocm-profiler cxlactivitylogger && \ + rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh index 0286d0aea4c..0eb7fec7d9e 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -35,10 +35,33 @@ export TF_GPU_COUNT=${N_GPUS} yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. -bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \ - --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ - --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \ - --test_sharding_strategy=disabled \ - --test_size_filters=small,medium \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \ - //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... +bazel test \ + --config=rocm \ + -k \ + --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --test_lang_filters=cc \ + --jobs=${N_JOBS} \ + --local_test_jobs=${TF_GPU_COUNT}\ + --test_timeout 300,450,1200,3600 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + --test_size_filters=small,medium \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- \ + //tensorflow/... \ + -//tensorflow/compiler/... \ + -//tensorflow/lite/delegates/gpu/gl/... \ + -//tensorflow/lite/delegates/gpu/cl/... \ +&& bazel test \ + --config=rocm \ + -k \ + --test_tag_filters=-no_gpu,-no_rocm,-v1only \ + --jobs=${N_JOBS} \ + --local_test_jobs=1 \ + --test_timeout 600,900,2400,7200 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + -- \ + //tensorflow/core/nccl:nccl_manager_test diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh index 424b3e6fa0a..64bfffad149 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -35,9 +35,18 @@ export TF_GPU_COUNT=${N_GPUS} yes "" | $PYTHON_BIN_PATH configure.py # Run bazel test command. Double test timeouts to avoid flakes. -bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \ - --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \ - --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \ - --test_sharding_strategy=disabled \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \ - //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... +bazel test \ + --config=rocm \ + -k \ + --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --test_lang_filters=py \ + --jobs=${N_JOBS} \ + --local_test_jobs=${TF_GPU_COUNT} \ + --test_timeout 600,900,2400,7200 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- \ + //tensorflow/... \ + -//tensorflow/compiler/... diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh index 72924fb1c44..9288b7b3582 100755 --- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -27,6 +27,7 @@ echo "" # Run configure. export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' export TF_NEED_ROCM=1 export TF_GPU_COUNT=${N_GPUS} @@ -34,12 +35,50 @@ export TF_GPU_COUNT=${N_GPUS} yes "" | $PYTHON_BIN_PATH configure.py echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc -bazel clean # Run bazel test command. Double test timeouts to avoid flakes. -bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss,-no_rocm -k \ - --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \ - --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} \ - --test_sharding_strategy=disabled \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ - --config=xla -- \ - //tensorflow/compiler/... +bazel test \ + --config=rocm \ + --config=xla \ + -k \ + --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --jobs=${N_JOBS} \ + --local_test_jobs=${TF_GPU_COUNT} \ + --test_timeout 600,900,2400,7200 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- \ + //tensorflow/compiler/... \ + -//tensorflow/compiler/tests:dense_layer_test \ + -//tensorflow/compiler/tests:dense_layer_test_gpu \ + -//tensorflow/compiler/tests:jit_test \ + -//tensorflow/compiler/tests:jit_test_gpu \ + -//tensorflow/compiler/tests:matrix_triangular_solve_op_test \ + -//tensorflow/compiler/tests:tensor_array_ops_test \ + -//tensorflow/compiler/tests:xla_ops_test \ + -//tensorflow/compiler/xla/client/lib:svd_test \ + -//tensorflow/compiler/tests:lstm_test \ +&& bazel test \ + --config=rocm \ + --config=xla \ + -k \ + --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \ + --jobs=${N_JOBS} \ + --local_test_jobs=${TF_GPU_COUNT} \ + --test_timeout 600,900,2400,7200 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + --test_env=TF2_BEHAVIOR=0 \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- \ + //tensorflow/compiler/tests:dense_layer_test \ + //tensorflow/compiler/tests:dense_layer_test_gpu \ + //tensorflow/compiler/tests:jit_test \ + //tensorflow/compiler/tests:jit_test_gpu \ + //tensorflow/compiler/tests:matrix_triangular_solve_op_test \ + //tensorflow/compiler/tests:tensor_array_ops_test \ + //tensorflow/compiler/tests:xla_ops_test \ + //tensorflow/compiler/xla/client/lib:svd_test \ + //tensorflow/compiler/tests:lstm_test