Merge pull request #42689 from ROCmSoftwarePlatform:google_upstream_switch_to_rocm37
PiperOrigin-RevId: 333874384 Change-Id: Ic5e8b2394120d907e049f71cf5c00029ec75ad9f
This commit is contained in:
commit
0764c90855
@ -230,9 +230,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
|
||||
TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
||||
{
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
// Priority outside the range (0, 2) for AMD GPUs
|
||||
// Priority outside the range (-1, 1) for AMD GPUs
|
||||
SessionOptions opts =
|
||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
|
||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 1}});
|
||||
#else
|
||||
// Priority outside the range (-2, 0) for NVidia GPUs
|
||||
SessionOptions opts =
|
||||
@ -245,7 +245,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
ExpectErrorMessageSubstr(
|
||||
status,
|
||||
"Priority -1 is outside the range of supported priorities [0,2] for"
|
||||
"Priority -2 is outside the range of supported priorities [-1,1] for"
|
||||
" virtual device 0 on GPU# 0");
|
||||
#else
|
||||
ExpectErrorMessageSubstr(
|
||||
@ -254,8 +254,9 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
||||
}
|
||||
{
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
// Priority outside the range (0, 2) for AMD GPUs
|
||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
|
||||
// Priority outside the range (-1, 1) for AMD GPUs
|
||||
SessionOptions opts =
|
||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
|
||||
#else
|
||||
// Priority outside the range (-2, 0) for NVidia GPUs
|
||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
|
||||
@ -267,7 +268,7 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
ExpectErrorMessageSubstr(
|
||||
status,
|
||||
"Priority 3 is outside the range of supported priorities [0,2] for"
|
||||
"Priority 2 is outside the range of supported priorities [-1,1] for"
|
||||
" virtual device 0 on GPU# 0");
|
||||
#else
|
||||
ExpectErrorMessageSubstr(
|
||||
@ -288,26 +289,17 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
|
||||
}
|
||||
|
||||
TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
// Valid range for priority values on AMD GPUs in (0,2)
|
||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
|
||||
#else
|
||||
// Valid range for priority values on AMD GPUs in (-1,1)
|
||||
// Valid range for priority values on NVidia GPUs in (-2, 0)
|
||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
|
||||
#endif
|
||||
std::vector<std::unique_ptr<Device>> devices;
|
||||
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||
opts, kDeviceNamePrefix, &devices));
|
||||
EXPECT_EQ(2, devices.size());
|
||||
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
||||
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||
EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||
#else
|
||||
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||
#endif
|
||||
ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
|
||||
ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
|
||||
EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
|
||||
@ -339,27 +331,18 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
|
||||
}
|
||||
{
|
||||
// Multile virtual devices with matching priority.
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
// Valid range for priority values on AMD GPUs in (0,2)
|
||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
|
||||
#else
|
||||
// Valid range for priority values on AMD GPUs in (-1,1)
|
||||
// Valid range for priority values on NVidia GPUs in (-2, 0)
|
||||
SessionOptions opts =
|
||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
|
||||
#endif
|
||||
std::vector<std::unique_ptr<Device>> devices;
|
||||
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||
opts, kDeviceNamePrefix, &devices));
|
||||
EXPECT_EQ(2, devices.size());
|
||||
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
||||
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
||||
#if TENSORFLOW_USE_ROCM
|
||||
EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||
EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||
#else
|
||||
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,15 +27,8 @@ limitations under the License.
|
||||
#define EIGEN_USE_GPU
|
||||
#include "tensorflow/core/framework/tensor_types.h"
|
||||
#include "tensorflow/core/kernels/cwise_ops.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
// Provide ldexp float overload for HIP, it's missing in their headers.
|
||||
__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
|
||||
#endif
|
||||
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
|
||||
|
@ -25,11 +25,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
// Provide ldexp float overload for HIP, it's missing in their headers.
|
||||
__device__ inline float ldexp(float x, int exp) { return ldexpf(x, exp); }
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
class OpKernelContext;
|
||||
namespace functor {
|
||||
|
@ -5414,6 +5414,7 @@ cuda_py_test(
|
||||
python_version = "PY3",
|
||||
shard_count = 10,
|
||||
tags = [
|
||||
"no_rocm",
|
||||
"no_windows_gpu",
|
||||
"noasan", # b/159332048
|
||||
"nomsan", # b/148630708
|
||||
|
@ -157,6 +157,7 @@ cuda_py_test(
|
||||
size = "medium",
|
||||
srcs = ["adadelta_test.py"],
|
||||
shard_count = 4,
|
||||
tags = ["no_rocm"],
|
||||
tfrt_enabled = True,
|
||||
deps = [
|
||||
":optimizer_v2",
|
||||
@ -298,6 +299,7 @@ cuda_py_test(
|
||||
size = "medium",
|
||||
srcs = ["rmsprop_test.py"],
|
||||
shard_count = 2,
|
||||
tags = ["no_rocm"],
|
||||
tfrt_enabled = True,
|
||||
deps = [
|
||||
":optimizer_v2",
|
||||
|
@ -82,6 +82,11 @@ class MathTest(PForTestCase, parameterized.TestCase):
|
||||
self._test_unary_cwise_ops(complex_ops, True)
|
||||
|
||||
def test_unary_cwise_real_ops_1(self):
|
||||
if test.is_built_with_rocm():
|
||||
# TODO(rocm):
|
||||
# This fails on ROCm...see JIRA ticket 236756
|
||||
self.skipTest("Fails on ROCM")
|
||||
|
||||
real_ops = [
|
||||
lambda x: math_ops.acosh(1 + math_ops.square(x)),
|
||||
math_ops.abs,
|
||||
|
@ -139,6 +139,11 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
||||
]
|
||||
) # pyformat: disable
|
||||
def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
|
||||
if test_util.IsBuiltWithROCm():
|
||||
# TODO(rocm):
|
||||
# This fails on ROCm...see JIRA ticket 236756
|
||||
self.skipTest('Fails on ROCM')
|
||||
|
||||
result = op(x, **extra_args)
|
||||
|
||||
# Run the wrapped op on the dense values, for comparison.
|
||||
|
@ -140,7 +140,7 @@ port::StatusOr<void*> GetHipsparseDsoHandle() {
|
||||
return GetDsoHandle("hipsparse", "");
|
||||
}
|
||||
|
||||
port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
|
||||
port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("amdhip64", ""); }
|
||||
|
||||
} // namespace DsoLoader
|
||||
|
||||
|
@ -113,9 +113,6 @@ string ToString(miopenConvFwdAlgorithm_t algorithm) {
|
||||
case miopenConvolutionFwdAlgoImplicitGEMM:
|
||||
s = "Implicit GEMM";
|
||||
break;
|
||||
case miopenConvolutionFwdAlgoStaticCompiledGEMM:
|
||||
s = "Static Compiled GEMM";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -182,9 +179,6 @@ string ToString(miopenConvAlgorithm_t algorithm) {
|
||||
case miopenConvolutionAlgoImplicitGEMM:
|
||||
s = "Implicit GEMM";
|
||||
break;
|
||||
case miopenConvolutionAlgoStaticCompiledGEMM:
|
||||
s = "Static Compiled GEMM";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
@ -3,8 +3,10 @@
|
||||
FROM ubuntu:bionic
|
||||
MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
|
||||
|
||||
ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/
|
||||
ARG ROCM_PATH=/opt/rocm-3.3.0
|
||||
ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.7/
|
||||
ARG ROCM_BUILD_NAME=xenial
|
||||
ARG ROCM_BUILD_NUM=main
|
||||
ARG ROCM_PATH=/opt/rocm-3.7.0
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV TF_NEED_ROCM 1
|
||||
@ -13,8 +15,12 @@ RUN apt update && apt install -y wget software-properties-common
|
||||
|
||||
# Add rocm repository
|
||||
RUN apt-get clean all
|
||||
RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
|
||||
RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
|
||||
RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \
|
||||
wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
|
||||
echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
|
||||
else \
|
||||
echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
|
||||
fi'
|
||||
|
||||
# Install misc pkgs
|
||||
RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
|
@ -18,20 +18,27 @@
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
TF_TESTS_PER_GPU=1
|
||||
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
|
||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
|
||||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
export ROCM_PATH=/opt/rocm-3.3.0
|
||||
export TF_GPU_COUNT=${N_GPUS}
|
||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
||||
@ -39,15 +46,17 @@ yes "" | $PYTHON_BIN_PATH configure.py
|
||||
bazel test \
|
||||
--config=rocm \
|
||||
-k \
|
||||
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-multi_gpu,-v1only \
|
||||
--test_lang_filters=cc \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=${TF_GPU_COUNT}\
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
--test_sharding_strategy=disabled \
|
||||
--test_size_filters=small,medium \
|
||||
--test_size_filters=small,medium,large \
|
||||
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
|
||||
-- \
|
||||
//tensorflow/... \
|
||||
@ -59,11 +68,14 @@ bazel test \
|
||||
--config=rocm \
|
||||
-k \
|
||||
--test_tag_filters=gpu \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=1 \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
--test_sharding_strategy=disabled \
|
||||
--test_size_filters=small,medium,large \
|
||||
-- \
|
||||
//tensorflow/core/nccl:nccl_manager_test
|
||||
|
@ -18,20 +18,27 @@
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
TF_TESTS_PER_GPU=1
|
||||
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
|
||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
|
||||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
export ROCM_PATH=/opt/rocm-3.3.0
|
||||
export TF_GPU_COUNT=${N_GPUS}
|
||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
||||
@ -40,8 +47,10 @@ bazel test \
|
||||
--config=rocm \
|
||||
-k \
|
||||
--test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=${TF_GPU_COUNT} \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--test_output=errors \
|
||||
--test_sharding_strategy=disabled \
|
||||
@ -60,8 +69,8 @@ bazel test \
|
||||
--test_tag_filters=gpu \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--test_output=errors \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=1 \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_sharding_strategy=disabled \
|
||||
-- \
|
||||
//tensorflow/core/nccl:nccl_manager_test
|
||||
|
@ -18,20 +18,27 @@
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
TF_TESTS_PER_GPU=1
|
||||
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
|
||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
|
||||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
export ROCM_PATH=/opt/rocm-3.3.0
|
||||
export TF_GPU_COUNT=${N_GPUS}
|
||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
|
||||
@ -41,8 +48,10 @@ bazel test \
|
||||
-k \
|
||||
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--test_lang_filters=py \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=${TF_GPU_COUNT} \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
|
@ -18,20 +18,27 @@
|
||||
set -e
|
||||
set -x
|
||||
|
||||
N_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
|
||||
TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
TF_TESTS_PER_GPU=1
|
||||
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
|
||||
|
||||
echo ""
|
||||
echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
|
||||
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
|
||||
echo ""
|
||||
|
||||
# First positional argument (if any) specifies the ROCM_INSTALL_DIR
|
||||
ROCM_INSTALL_DIR=/opt/rocm-3.7.0
|
||||
if [[ -n $1 ]]; then
|
||||
ROCM_INSTALL_DIR=$1
|
||||
fi
|
||||
|
||||
# Run configure.
|
||||
export PYTHON_BIN_PATH=`which python3`
|
||||
export CC_OPT_FLAGS='-mavx'
|
||||
|
||||
export TF_NEED_ROCM=1
|
||||
export ROCM_PATH=/opt/rocm-3.3.0
|
||||
export TF_GPU_COUNT=${N_GPUS}
|
||||
export ROCM_PATH=$ROCM_INSTALL_DIR
|
||||
|
||||
yes "" | $PYTHON_BIN_PATH configure.py
|
||||
echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
|
||||
@ -41,9 +48,11 @@ bazel test \
|
||||
--config=rocm \
|
||||
--config=xla \
|
||||
-k \
|
||||
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=${TF_GPU_COUNT} \
|
||||
--test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
@ -65,9 +74,11 @@ bazel test \
|
||||
--config=rocm \
|
||||
--config=xla \
|
||||
-k \
|
||||
--test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--jobs=${N_JOBS} \
|
||||
--local_test_jobs=${TF_GPU_COUNT} \
|
||||
--test_tag_filters=-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
|
||||
--jobs=${N_BUILD_JOBS} \
|
||||
--local_test_jobs=${N_TEST_JOBS} \
|
||||
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
|
||||
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
|
||||
--test_timeout 600,900,2400,7200 \
|
||||
--build_tests_only \
|
||||
--test_output=errors \
|
||||
|
@ -34,8 +34,6 @@ HIPCC_ENV = '%{hipcc_env}'
|
||||
HIPCC_IS_HIPCLANG = '%{hipcc_is_hipclang}'=="True"
|
||||
HIP_RUNTIME_PATH = '%{hip_runtime_path}'
|
||||
HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
|
||||
HCC_RUNTIME_PATH = '%{hcc_runtime_path}'
|
||||
HCC_RUNTIME_LIBRARY = '%{hcc_runtime_library}'
|
||||
ROCR_RUNTIME_PATH = '%{rocr_runtime_path}'
|
||||
ROCR_RUNTIME_LIBRARY = '%{rocr_runtime_library}'
|
||||
VERBOSE = '%{crosstool_verbose}'=='1'
|
||||
@ -267,11 +265,6 @@ def main():
|
||||
gpu_linker_flags.append('-L' + ROCR_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-Wl,-rpath=' + ROCR_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-l' + ROCR_RUNTIME_LIBRARY)
|
||||
# do not link with HCC runtime library in case hip-clang toolchain is used
|
||||
if not HIPCC_IS_HIPCLANG:
|
||||
gpu_linker_flags.append('-L' + HCC_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-Wl,-rpath=' + HCC_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-l' + HCC_RUNTIME_LIBRARY)
|
||||
gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
|
||||
gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
|
||||
|
8
third_party/gpus/rocm_configure.bzl
vendored
8
third_party/gpus/rocm_configure.bzl
vendored
@ -390,7 +390,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin):
|
||||
libs_paths = [
|
||||
(name, _rocm_lib_paths(repository_ctx, name, path))
|
||||
for name, path in [
|
||||
("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"),
|
||||
("amdhip64", rocm_config.rocm_toolkit_path + "/hip"),
|
||||
("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"),
|
||||
("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"),
|
||||
("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"),
|
||||
@ -646,7 +646,7 @@ def _create_local_rocm_repository(repository_ctx):
|
||||
"rocm/BUILD",
|
||||
tpl_paths["rocm:BUILD"],
|
||||
{
|
||||
"%{hip_lib}": rocm_libs["hip_hcc"].file_name,
|
||||
"%{hip_lib}": rocm_libs["amdhip64"].file_name,
|
||||
"%{rocblas_lib}": rocm_libs["rocblas"].file_name,
|
||||
"%{rocfft_lib}": rocm_libs["rocfft"].file_name,
|
||||
"%{hiprand_lib}": rocm_libs["hiprand"].file_name,
|
||||
@ -733,9 +733,7 @@ def _create_local_rocm_repository(repository_ctx):
|
||||
"%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
|
||||
"%{rocr_runtime_library}": "hsa-runtime64",
|
||||
"%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
|
||||
"%{hip_runtime_library}": "hip_hcc",
|
||||
"%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
|
||||
"%{hcc_runtime_library}": "mcwamp",
|
||||
"%{hip_runtime_library}": "amdhip64",
|
||||
"%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
|
||||
"%{gcc_host_compiler_path}": str(cc),
|
||||
},
|
||||
|
@ -12,6 +12,6 @@ container_digests = {
|
||||
"cuda10.1-cudnn7-ubuntu16.04-manylinux2010-multipython": "sha256:3f890a951c81a201d60d0161a56ce628a90323be0c7f795550caa37f6f41a85c",
|
||||
"cuda10.1-cudnn7-ubuntu18.04-manylinux2010-multipython": "sha256:bd7666d1ef49b2b2e2a64981f1c9234deeccdb0d5198b30ff4289c3dfcffedbf",
|
||||
"cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython": "sha256:b52edb4e35c780334ba417b008927722ae668847715a1624e9b2984e99c05338",
|
||||
"rocm-ubuntu18.04-manylinux2010-multipython": "sha256:ac52a60d12d0c9f81e558782b5431127b93bb1a13dab7294b3a5b3de91173019",
|
||||
"rocm-ubuntu18.04-manylinux2010-multipython": "sha256:8c6ba5a831c23906716cc9e9c201081f2b5632e3bf3cbc0207da0ddbef18d525",
|
||||
"windows-1803": "sha256:f109576c7c0c8a1783ff22b666e8923b52dbbe7933f69a1c7a7275202c304a12",
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user