Enabling unit tests for grappler on ROCm

Preventing grappler from fusing convolution with other ops on ROCm
(since ROCm does not support fused convolution at this time)
This commit is contained in:
Eugene Kuznetsov 2020-01-15 17:18:17 -08:00
parent 432ef2bee2
commit 6aa431f8c9
10 changed files with 231 additions and 195 deletions

View File

@ -40,7 +40,12 @@ class SingleMachineTest : public ::testing::Test {
// Provision a single machine with 3 cpu cores, and a short timeout of 5 // Provision a single machine with 3 cpu cores, and a short timeout of 5
// seconds: since there isn't much work to process a test graph that should // seconds: since there isn't much work to process a test graph that should
// be plenty. // be plenty.
#if TENSORFLOW_USE_ROCM
// ROCm takes longer to start up
int timeout_s = 10;
#else
int timeout_s = 5; int timeout_s = 5;
#endif
#ifdef THREAD_SANITIZER #ifdef THREAD_SANITIZER
timeout_s *= 5; timeout_s *= 5;
#endif #endif
@ -348,10 +353,11 @@ static void RunInfiniteTFLoop() {
} }
TEST_F(SingleMachineTest, InfiniteLoops) { TEST_F(SingleMachineTest, InfiniteLoops) {
#if !(TENSORFLOW_USE_ROCM) // fails with ROCm (investigate)
// The RunInfiniteTFLoop function creates its own cluster. // The RunInfiniteTFLoop function creates its own cluster.
TF_CHECK_OK(cluster_->Shutdown()); TF_CHECK_OK(cluster_->Shutdown());
EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*"); EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*");
#endif
} }
TEST_F(SingleMachineTest, InitializationMemory) { TEST_F(SingleMachineTest, InitializationMemory) {

View File

@ -19,7 +19,7 @@ limitations under the License.
#include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/byte_order.h"
#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/cpu_info.h"
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
#include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/stream_executor.h"
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA
@ -30,12 +30,22 @@ namespace grappler {
int GetNumAvailableGPUs( int GetNumAvailableGPUs(
const std::pair<int, int>& min_cuda_compute_capability) { const std::pair<int, int>& min_cuda_compute_capability) {
int num_eligible_gpus = 0; int num_eligible_gpus = 0;
#if GOOGLE_CUDA
#if TENSORFLOW_USE_ROCM
if(min_cuda_compute_capability.first!=0 ||
min_cuda_compute_capability.second!=0) {
LOG(ERROR) << "GetNumAvailableGPUs() should receive zero "
"min_cuda_compute_capability";
return 0;
}
#endif
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
if (ValidateGPUMachineManager().ok()) { if (ValidateGPUMachineManager().ok()) {
se::Platform* gpu_manager = GPUMachineManager(); se::Platform* gpu_manager = GPUMachineManager();
if (gpu_manager != nullptr) { if (gpu_manager != nullptr) {
int num_gpus = gpu_manager->VisibleDeviceCount(); int num_gpus = gpu_manager->VisibleDeviceCount();
for (int i = 0; i < num_gpus; i++) { for (int i = 0; i < num_gpus; i++) {
#if GOOGLE_CUDA
auto desc_status = gpu_manager->DescriptionForDevice(i); auto desc_status = gpu_manager->DescriptionForDevice(i);
if (desc_status.ok()) { if (desc_status.ok()) {
auto desc = desc_status.ConsumeValueOrDie(); auto desc = desc_status.ConsumeValueOrDie();
@ -49,25 +59,33 @@ int GetNumAvailableGPUs(
num_eligible_gpus++; num_eligible_gpus++;
} }
} }
#else
num_eligible_gpus++;
#endif
} }
} }
} }
#if GOOGLE_CUDA
LOG(INFO) LOG(INFO)
<< "Number of eligible GPUs (core count >= 8, compute capability >= " << "Number of eligible GPUs (core count >= 8, compute capability >= "
<< min_cuda_compute_capability.first << "." << min_cuda_compute_capability.first << "."
<< min_cuda_compute_capability.second << "): " << num_eligible_gpus; << min_cuda_compute_capability.second << "): " << num_eligible_gpus;
#else #else
LOG(INFO) << "Number of eligible GPUs: " << num_eligible_gpus;
#endif
#else // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
LOG(INFO) LOG(INFO)
<< "Number of eligible GPUs (core count >= 8, compute capability >= " << "Number of eligible GPUs (core count >= 8, compute capability >= "
<< min_cuda_compute_capability.first << "." << min_cuda_compute_capability.first << "."
<< min_cuda_compute_capability.second << "): " << num_eligible_gpus << min_cuda_compute_capability.second << "): " << num_eligible_gpus
<< " (Note: TensorFlow was not compiled with CUDA support)"; << " (Note: TensorFlow was not compiled with CUDA or ROCm support)";
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
return num_eligible_gpus; return num_eligible_gpus;
} }
int64 AvailableGPUMemory(int gpu_id) { int64 AvailableGPUMemory(int gpu_id) {
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// Look up the device, to see its attributes. // Look up the device, to see its attributes.
se::Platform* gpu_platform = GPUMachineManager(); se::Platform* gpu_platform = GPUMachineManager();
CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount()); CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount());

View File

@ -44,7 +44,11 @@ namespace tensorflow {
namespace grappler { namespace grappler {
namespace { namespace {
#if GOOGLE_CUDA
const std::pair<int, int> kMinGPUArch = {7, 0}; const std::pair<int, int> kMinGPUArch = {7, 0};
#else
const std::pair<int, int> kMinGPUArch = {0, 0};
#endif
const char kSuffix[] = "AutoMixedPrecision"; const char kSuffix[] = "AutoMixedPrecision";
const char kCastToFp16[] = "CastToFp16"; const char kCastToFp16[] = "CastToFp16";

View File

@ -17,7 +17,7 @@ limitations under the License.
// otherwise the optimizer will not turn clearlist nodes to float16. When // otherwise the optimizer will not turn clearlist nodes to float16. When
// looking at clearlist nodes, this optimizer checks if the nodes have a float16 // looking at clearlist nodes, this optimizer checks if the nodes have a float16
// GPU OpKernel, but without CUDA there are no GPU OpKernels at all. // GPU OpKernel, but without CUDA there are no GPU OpKernels at all.
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h" #include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"
@ -76,16 +76,20 @@ class AutoMixedPrecisionTest : public GrapplerTest {
void SetUp() override { void SetUp() override {
int num_gpus = GetNumAvailableGPUs(); int num_gpus = GetNumAvailableGPUs();
// If GPUs are available, require that they all satisfy the min arch. // If GPUs are available, require that they all satisfy the min arch.
gpu_available_ = gpu_available_ = (num_gpus > 0);
num_gpus > 0 && num_gpus == GetNumAvailableGPUs(kMinGPUArch); #if GOOGLE_CUDA
gpu_available_ = gpu_available_ &&
(num_gpus == GetNumAvailableGPUs(kMinGPUArch));
#endif
if (gpu_available_) { if (gpu_available_) {
virtual_cluster_.reset(new SingleMachine(/* timeout_s = */ 10, 1, 1)); virtual_cluster_.reset(new SingleMachine(/* timeout_s = */ 10, 1, 1));
} else { } else {
DeviceProperties device_properties; DeviceProperties device_properties;
device_properties.set_type("GPU"); device_properties.set_type("GPU");
#if GOOGLE_CUDA
device_properties.mutable_environment()->insert({"architecture", "7"}); device_properties.mutable_environment()->insert({"architecture", "7"});
device_properties.mutable_environment()->insert({"cuda", "9010"}); device_properties.mutable_environment()->insert({"cuda", "9010"});
#endif
virtual_cluster_.reset( virtual_cluster_.reset(
new VirtualCluster({{"/GPU:1", device_properties}})); new VirtualCluster({{"/GPU:1", device_properties}}));
} }
@ -1078,4 +1082,4 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {
} // namespace grappler } // namespace grappler
} // namespace tensorflow } // namespace tensorflow
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -179,9 +179,9 @@ void VerifyDataFormatAttributeMatch(const utils::NodeView* node,
} }
TEST_F(GenericLayoutOptimizerTest, OptimizeSimpleConv2DGraph) { TEST_F(GenericLayoutOptimizerTest, OptimizeSimpleConv2DGraph) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// A simple graph contains 1 "NHWC" Conv2D node, 2 input and 1 output nodes. // A simple graph contains 1 "NHWC" Conv2D node, 2 input and 1 output nodes.
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -245,9 +245,9 @@ TEST_F(GenericLayoutOptimizerTest, PreserveFetch) {
} }
TEST_F(GenericLayoutOptimizerTest, EmptyDevice) { TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
tensorflow::Scope s = tensorflow::Scope::NewRootScope(); tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv = SimpleConv2D(&s, 4, 2, "VALID", ""); auto conv = SimpleConv2D(&s, 4, 2, "VALID", "");
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@ -267,9 +267,9 @@ TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
} }
TEST_F(GenericLayoutOptimizerTest, GPUDevice) { TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
tensorflow::Scope s = tensorflow::Scope::NewRootScope(); tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv = auto conv =
SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:GPU:0"); SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:GPU:0");
@ -290,9 +290,9 @@ TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
} }
TEST_F(GenericLayoutOptimizerTest, CPUDevice) { TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
tensorflow::Scope s = tensorflow::Scope::NewRootScope(); tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0"); auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0");
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@ -312,9 +312,9 @@ TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
} }
TEST_F(GenericLayoutOptimizerTest, Connectivity) { TEST_F(GenericLayoutOptimizerTest, Connectivity) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0"); auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
auto i1 = ops::Identity(scope.WithOpName("i1"), conv); auto i1 = ops::Identity(scope.WithOpName("i1"), conv);
@ -349,9 +349,9 @@ TEST_F(GenericLayoutOptimizerTest, Connectivity) {
} }
TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) { TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope s = Scope::NewRootScope(); Scope s = Scope::NewRootScope();
auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", /*dilated=*/false); auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", /*dilated=*/false);
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv}); Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@ -381,9 +381,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
} }
TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) { TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0"); Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");
auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0"); auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
auto shape = ops::Shape(scope.WithOpName("shape"), conv); auto shape = ops::Shape(scope.WithOpName("shape"), conv);
@ -434,9 +434,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
} }
TEST_F(GenericLayoutOptimizerTest, DoNotPruneNonAddedCancellableTransposes) { TEST_F(GenericLayoutOptimizerTest, DoNotPruneNonAddedCancellableTransposes) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
{ {
Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0"); Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");

View File

@ -374,9 +374,9 @@ class TransposerTest : public ::testing::Test {
}; };
TEST_F(TransposerTest, CreateConstPermNode) { TEST_F(TransposerTest, CreateConstPermNode) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph));
@ -418,9 +418,9 @@ TensorShapeProto MakeTensorShapeFromDimensions(absl::Span<const int> dims) {
} }
TEST_F(TransposerTest, CreateTransposeNode) { TEST_F(TransposerTest, CreateTransposeNode) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph));
@ -458,9 +458,9 @@ TEST_F(TransposerTest, CreateTransposeNode) {
} }
TEST_F(TransposerTest, UpdateNode) { TEST_F(TransposerTest, UpdateNode) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph));
@ -489,9 +489,9 @@ AttrValue_ListValue MakeAttrValueListValueFromVector(
} }
TEST_F(TransposerTest, UpdateStrides) { TEST_F(TransposerTest, UpdateStrides) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph));
@ -527,9 +527,9 @@ TEST_F(TransposerTest, UpdateStrides) {
} }
TEST_F(TransposerTest, UpdateFaninEdgesTranspose) { TEST_F(TransposerTest, UpdateFaninEdgesTranspose) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleFusedBatchNormGrad(&item.graph, true)); TF_ASSERT_OK(CreateSimpleFusedBatchNormGrad(&item.graph, true));
@ -586,9 +586,9 @@ TEST_F(TransposerTest, UpdateFaninEdgesTranspose) {
} }
TEST_F(TransposerTest, UpdateFanoutEdgesTranspose) { TEST_F(TransposerTest, UpdateFanoutEdgesTranspose) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DGraph(&item.graph));
@ -639,9 +639,9 @@ TEST_F(TransposerTest, UpdateFanoutEdgesTranspose) {
} }
TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestFusedBatchNorm) { TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestFusedBatchNorm) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
// Use FusedBatchNorm for default transposer test // Use FusedBatchNorm for default transposer test
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
@ -695,9 +695,9 @@ TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestFusedBatchNorm) {
} }
TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestConv2D) { TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestConv2D) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
// Use Conv2D for default transposer test // Use Conv2D for default transposer test
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
@ -746,9 +746,9 @@ TEST_F(TransposerTest, DefaultLayoutSensitiveOpTransposerTestConv2D) {
} }
TEST_F(TransposerTest, MaxPoolGradTransposerTest) { TEST_F(TransposerTest, MaxPoolGradTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
for (bool use_grad_grad : {false, true}) { for (bool use_grad_grad : {false, true}) {
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
@ -800,9 +800,9 @@ TEST_F(TransposerTest, MaxPoolGradTransposerTest) {
} }
TEST_F(TransposerTest, BiasAddGradTransposerTest) { TEST_F(TransposerTest, BiasAddGradTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleBiasAddGrad( TF_ASSERT_OK(CreateSimpleBiasAddGrad(
@ -872,9 +872,9 @@ TEST_F(TransposerTest, BiasAddGradTransposerIncorrectInputTest) {
} }
TEST_F(TransposerTest, Conv2DBackpropFilterTransposerTest) { TEST_F(TransposerTest, Conv2DBackpropFilterTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DBackpropFilter(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DBackpropFilter(&item.graph));
@ -925,9 +925,9 @@ TEST_F(TransposerTest, Conv2DBackpropFilterTransposerTest) {
} }
TEST_F(TransposerTest, NodeAttributes) { TEST_F(TransposerTest, NodeAttributes) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK( TF_ASSERT_OK(
@ -966,9 +966,9 @@ TEST_F(TransposerTest, NodeAttributes) {
} }
TEST_F(TransposerTest, Conv2DBackpropInputTransposerTest) { TEST_F(TransposerTest, Conv2DBackpropInputTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleConv2DBackpropInput(&item.graph)); TF_ASSERT_OK(CreateSimpleConv2DBackpropInput(&item.graph));
@ -1024,9 +1024,9 @@ TEST_F(TransposerTest, Conv2DBackpropInputTransposerTest) {
} }
TEST_F(TransposerTest, FusedBatchNormGradTransposerIsTrainingTest) { TEST_F(TransposerTest, FusedBatchNormGradTransposerIsTrainingTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TransposeContext context; TransposeContext context;
TF_ASSERT_OK(CreateSimpleFusedBatchNormGrad(&item.graph, true)); TF_ASSERT_OK(CreateSimpleFusedBatchNormGrad(&item.graph, true));
@ -1159,9 +1159,9 @@ TEST_F(TransposerTest, FusedBatchNormGradTransposerNotTrainingTest) {
} }
TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityTest) { TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1210,9 +1210,9 @@ TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityTest) {
} }
TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityBadInputTest) { TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityBadInputTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1258,9 +1258,9 @@ TEST_F(TransposerTest, DefaultLayoutAgnosticOpTransposerIdentityBadInputTest) {
} }
TEST_F(TransposerTest, AddNTransposerTest) { TEST_F(TransposerTest, AddNTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TF_ASSERT_OK(CreateSimpleAddN(&item.graph)); TF_ASSERT_OK(CreateSimpleAddN(&item.graph));
TransposeContext context; TransposeContext context;
@ -1373,9 +1373,9 @@ TEST_F(TransposerTest, AddNTransposerNotAfterTransformTest) {
} }
TEST_F(TransposerTest, IdentityNTransposerTest) { TEST_F(TransposerTest, IdentityNTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
TF_ASSERT_OK(CreateSimpleIdentityN(&item.graph)); TF_ASSERT_OK(CreateSimpleIdentityN(&item.graph));
TransposeContext context; TransposeContext context;
@ -1466,9 +1466,9 @@ TEST_F(TransposerTest, IdentityNTransposerTest) {
} }
TEST_F(TransposerTest, MergeTransposerTestMergeBothInputsConvertible) { TEST_F(TransposerTest, MergeTransposerTestMergeBothInputsConvertible) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1525,9 +1525,9 @@ TEST_F(TransposerTest, MergeTransposerTestMergeBothInputsConvertible) {
} }
TEST_F(TransposerTest, MergeTransposerTestMergeOneInputNotConvertible) { TEST_F(TransposerTest, MergeTransposerTestMergeOneInputNotConvertible) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1580,9 +1580,9 @@ TEST_F(TransposerTest, MergeTransposerTestMergeOneInputNotConvertible) {
} }
TEST_F(TransposerTest, PadTransposerTest) { TEST_F(TransposerTest, PadTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1640,9 +1640,9 @@ TEST_F(TransposerTest, PadTransposerTest) {
} }
TEST_F(TransposerTest, SwitchTransposerTest) { TEST_F(TransposerTest, SwitchTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1705,9 +1705,9 @@ TEST_F(TransposerTest, SwitchTransposerTest) {
} }
TEST_F(TransposerTest, TernaryOpTransposerTest) { TEST_F(TransposerTest, TernaryOpTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1775,9 +1775,9 @@ TEST_F(TransposerTest, TernaryOpTransposerTest) {
} }
TEST_F(TransposerTest, UnaryGradTransposerTestTanhGrad) { TEST_F(TransposerTest, UnaryGradTransposerTestTanhGrad) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1839,9 +1839,9 @@ TEST_F(TransposerTest, UnaryGradTransposerTestTanhGrad) {
} }
TEST_F(TransposerTest, UnaryGradTransposerTestRelu6Grad) { TEST_F(TransposerTest, UnaryGradTransposerTestRelu6Grad) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto conv2d = SimpleConv2D(&scope); auto conv2d = SimpleConv2D(&scope);
@ -1903,9 +1903,9 @@ TEST_F(TransposerTest, UnaryGradTransposerTestRelu6Grad) {
} }
TEST_F(TransposerTest, SqueezeTransposerTest) { TEST_F(TransposerTest, SqueezeTransposerTest) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -1958,9 +1958,9 @@ TEST_F(TransposerTest, SqueezeTransposerTest) {
} }
TEST_F(TransposerTest, SqueezeTransposerTestUnsupportedInputShape) { TEST_F(TransposerTest, SqueezeTransposerTestUnsupportedInputShape) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -1997,9 +1997,9 @@ TEST_F(TransposerTest, SqueezeTransposerTestUnsupportedInputShape) {
} }
TEST_F(TransposerTest, SqueezeTransposerTestInvalidHWAxis) { TEST_F(TransposerTest, SqueezeTransposerTestInvalidHWAxis) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2037,9 +2037,9 @@ TEST_F(TransposerTest, SqueezeTransposerTestInvalidHWAxis) {
} }
TEST_F(TransposerTest, SqueezeTransposerTestInvalidNHWAxis) { TEST_F(TransposerTest, SqueezeTransposerTestInvalidNHWAxis) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2077,9 +2077,9 @@ TEST_F(TransposerTest, SqueezeTransposerTestInvalidNHWAxis) {
} }
TEST_F(TransposerTest, SqueezeTransposerTestSqueezeDimsUpdated) { TEST_F(TransposerTest, SqueezeTransposerTestSqueezeDimsUpdated) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2138,9 +2138,9 @@ TEST_F(TransposerTest, SqueezeTransposerTestSqueezeDimsUpdated) {
} }
TEST_F(TransposerTest, MaxPoolV2Transposer) { TEST_F(TransposerTest, MaxPoolV2Transposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2196,9 +2196,9 @@ TEST_F(TransposerTest, MaxPoolV2Transposer) {
} }
TEST_F(TransposerTest, MaxPoolGradV2Transposer) { TEST_F(TransposerTest, MaxPoolGradV2Transposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
for (bool use_grad_grad : {false, true}) { for (bool use_grad_grad : {false, true}) {
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -2280,9 +2280,9 @@ TEST_F(TransposerTest, MaxPoolGradV2Transposer) {
} }
TEST_F(TransposerTest, BinaryOpTransposerAdd) { TEST_F(TransposerTest, BinaryOpTransposerAdd) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2352,9 +2352,9 @@ TEST_F(TransposerTest, BinaryOpTransposerAdd) {
} }
TEST_F(TransposerTest, BinaryOpTransposerMul) { TEST_F(TransposerTest, BinaryOpTransposerMul) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2424,9 +2424,9 @@ TEST_F(TransposerTest, BinaryOpTransposerMul) {
} }
TEST_F(TransposerTest, BinaryOpTransposerPolygamma) { TEST_F(TransposerTest, BinaryOpTransposerPolygamma) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2511,9 +2511,9 @@ bool CreateConcatV1Op(const Scope& scope, const InputList& tensors,
} }
TEST_F(TransposerTest, ConcatOpTransposerConcat) { TEST_F(TransposerTest, ConcatOpTransposerConcat) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
Output input_1 = ops::RandomUniform(scope.WithOpName("input_1"), Output input_1 = ops::RandomUniform(scope.WithOpName("input_1"),
@ -2589,9 +2589,9 @@ TEST_F(TransposerTest, ConcatOpTransposerConcat) {
} }
TEST_F(TransposerTest, ConcatOpTransposerConcatV2) { TEST_F(TransposerTest, ConcatOpTransposerConcatV2) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
Output input_1 = ops::RandomUniform(scope.WithOpName("input_1"), Output input_1 = ops::RandomUniform(scope.WithOpName("input_1"),
@ -2666,9 +2666,9 @@ TEST_F(TransposerTest, ConcatOpTransposerConcatV2) {
} }
TEST_F(TransposerTest, ReverseV2Transposer) { TEST_F(TransposerTest, ReverseV2Transposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -2734,9 +2734,9 @@ TEST_F(TransposerTest, ReverseV2Transposer) {
} }
TEST_F(TransposerTest, TileTransposer) { TEST_F(TransposerTest, TileTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -2801,9 +2801,9 @@ TEST_F(TransposerTest, TileTransposer) {
} }
TEST_F(TransposerTest, ShapeTransposer) { TEST_F(TransposerTest, ShapeTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2858,9 +2858,9 @@ TEST_F(TransposerTest, ShapeTransposer) {
} }
TEST_F(TransposerTest, ShapeNTransposer) { TEST_F(TransposerTest, ShapeNTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -2961,9 +2961,9 @@ TEST_F(TransposerTest, ShapeNTransposer) {
} }
TEST_F(TransposerTest, FillOpTransposer) { TEST_F(TransposerTest, FillOpTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
auto input = auto input =
@ -3020,9 +3020,9 @@ TEST_F(TransposerTest, FillOpTransposer) {
} }
TEST_F(TransposerTest, SliceTransposer) { TEST_F(TransposerTest, SliceTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3097,9 +3097,9 @@ TEST_F(TransposerTest, SliceTransposer) {
} }
TEST_F(TransposerTest, SplitTransposer) { TEST_F(TransposerTest, SplitTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3183,9 +3183,9 @@ TEST_F(TransposerTest, SplitTransposer) {
} }
TEST_F(TransposerTest, SplitVTransposer) { TEST_F(TransposerTest, SplitVTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3273,9 +3273,9 @@ TEST_F(TransposerTest, SplitVTransposer) {
} }
TEST_F(TransposerTest, StridedSliceTransposer) { TEST_F(TransposerTest, StridedSliceTransposer) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3363,9 +3363,9 @@ TEST_F(TransposerTest, StridedSliceTransposer) {
} }
TEST_F(TransposerTest, StridedSliceTransposerEllipsisMaskPresent) { TEST_F(TransposerTest, StridedSliceTransposerEllipsisMaskPresent) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3425,9 +3425,9 @@ TEST_F(TransposerTest, StridedSliceTransposerEllipsisMaskPresent) {
} }
TEST_F(TransposerTest, StridedSliceTransposerConstFaninBadRank) { TEST_F(TransposerTest, StridedSliceTransposerConstFaninBadRank) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3510,9 +3510,9 @@ TEST_F(TransposerTest, StridedSliceTransposerConstFaninBadRank) {
} }
TEST_F(TransposerTest, ReduceTransposerKeepDims) { TEST_F(TransposerTest, ReduceTransposerKeepDims) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();
@ -3577,9 +3577,9 @@ TEST_F(TransposerTest, ReduceTransposerKeepDims) {
} }
TEST_F(TransposerTest, ReduceTransposerValidAxisNode) { TEST_F(TransposerTest, ReduceTransposerValidAxisNode) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item; GrapplerItem item;
Scope scope = Scope::NewRootScope(); Scope scope = Scope::NewRootScope();

View File

@ -284,7 +284,7 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) {
status = optimizer.Optimize(cluster.get(), item_copy, &output); status = optimizer.Optimize(cluster.get(), item_copy, &output);
TF_EXPECT_OK(status); TF_EXPECT_OK(status);
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
item.fetch = {"e"}; item.fetch = {"e"};
item.init_ops = {init.name()}; item.init_ops = {init.name()};
auto tensors_expected = EvaluateFetchNodes(item); auto tensors_expected = EvaluateFetchNodes(item);
@ -336,7 +336,7 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) {
} }
} }
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
auto tensors_expected = EvaluateFetchNodes(item); auto tensors_expected = EvaluateFetchNodes(item);
GrapplerItem optimized = item.WithGraph(std::move(output)); GrapplerItem optimized = item.WithGraph(std::move(output));
auto tensors = EvaluateFetchNodes(optimized); auto tensors = EvaluateFetchNodes(optimized);
@ -385,7 +385,7 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) {
} }
} }
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
auto tensors_expected = EvaluateFetchNodes(item); auto tensors_expected = EvaluateFetchNodes(item);
GrapplerItem optimized = item.WithGraph(std::move(output)); GrapplerItem optimized = item.WithGraph(std::move(output));
auto tensors = EvaluateFetchNodes(optimized); auto tensors = EvaluateFetchNodes(optimized);
@ -502,7 +502,7 @@ TEST_F(RelaxAllocatorConstraintsTest, DifferentDevice) {
auto node = output.node(2); auto node = output.node(2);
EXPECT_EQ("assign", node.name()); EXPECT_EQ("assign", node.name());
EXPECT_EQ(0, node.attr().count("_grappler_relax_allocator_constraints")); EXPECT_EQ(0, node.attr().count("_grappler_relax_allocator_constraints"));
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
item.fetch = {"exp"}; item.fetch = {"exp"};
item.init_ops = {"variable"}; item.init_ops = {"variable"};
auto tensors_expected = EvaluateFetchNodes(item); auto tensors_expected = EvaluateFetchNodes(item);
@ -629,7 +629,7 @@ TEST_F(RelaxAllocatorConstraintsTest, AssignNodeInFanout) {
EXPECT_EQ(1, node.attr().count("_grappler_relax_allocator_constraints")); EXPECT_EQ(1, node.attr().count("_grappler_relax_allocator_constraints"));
EXPECT_EQ(true, node.attr().at("_grappler_relax_allocator_constraints").b()); EXPECT_EQ(true, node.attr().at("_grappler_relax_allocator_constraints").b());
#if GOOGLE_CUDA #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
item.init_ops = {"exp_cpu", "variable_gpu"}; item.init_ops = {"exp_cpu", "variable_gpu"};
auto tensors_expected = EvaluateFetchNodes(item); auto tensors_expected = EvaluateFetchNodes(item);
GrapplerItem optimized = item.WithGraph(std::move(output)); GrapplerItem optimized = item.WithGraph(std::move(output));

View File

@ -282,6 +282,10 @@ bool IsCpuCompatible(const RemapperContext& ctx, const Pattern& matched) {
// Checks if we can rewrite a pattern to the `_FusedConv2D` on GPU device. // Checks if we can rewrite a pattern to the `_FusedConv2D` on GPU device.
bool IsGpuCompatible(const RemapperContext& ctx, bool IsGpuCompatible(const RemapperContext& ctx,
const ContractionWithBiasAddAndActivation& matched) { const ContractionWithBiasAddAndActivation& matched) {
#if TENSORFLOW_USE_ROCM
// ROCm does not support _FusedConv2D
return false;
#endif
const GraphDef* graph = ctx.graph_view.graph(); const GraphDef* graph = ctx.graph_view.graph();
const NodeDef& contraction_node = graph->node(matched.contraction); const NodeDef& contraction_node = graph->node(matched.contraction);
if (!IsConv2D(contraction_node)) return false; if (!IsConv2D(contraction_node)) return false;

View File

@ -69,9 +69,9 @@ TEST_F(RemapperTest, FusedBatchNorm) {
} }
TEST_F(RemapperTest, FusedBatchNormNCHW) { TEST_F(RemapperTest, FusedBatchNormNCHW) {
#if !GOOGLE_CUDA #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "CUDA is not enabled"; GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA #endif // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
tensorflow::Scope s = tensorflow::Scope::NewRootScope(); tensorflow::Scope s = tensorflow::Scope::NewRootScope();
Output dflt = Output dflt =
ops::Const(s.WithOpName("dflt"), {3.14f, 2.7f, 1.0f, 2.0f, 3.0f, 100.0f}, ops::Const(s.WithOpName("dflt"), {3.14f, 2.7f, 1.0f, 2.0f, 3.0f, 100.0f},

View File

@ -423,10 +423,10 @@ TEST(IsKernelRegisteredForNode, All) {
v.set_type(DataType::DT_FLOAT); v.set_type(DataType::DT_FLOAT);
(*node.mutable_attr())["T"] = v; (*node.mutable_attr())["T"] = v;
TF_EXPECT_OK(IsKernelRegisteredForNode(node)); TF_EXPECT_OK(IsKernelRegisteredForNode(node));
#ifdef GOOGLE_CUDA #ifdef GOOGLE_CUDA || TENSORFLOW_USE_ROCM
node.set_device("/gpu:0"); node.set_device("/gpu:0");
TF_EXPECT_OK(IsKernelRegisteredForNode(node)); TF_EXPECT_OK(IsKernelRegisteredForNode(node));
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// Bad device name. // Bad device name.
node.set_device(""); node.set_device("");