From fe54e03d6a697e28e65f3ba11607a076ea23377a Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Fri, 22 Jan 2021 17:33:54 +0000 Subject: [PATCH] [ROCm] Fix for a bug in ROCm batchnorm implementation. Fixing the bug also makes the unit-test `//tensorflow/python/keras/layers:normalization_test_gpu` pass, so removing the `no_rocm` tag from it as well. --- tensorflow/python/keras/layers/BUILD | 1 - tensorflow/stream_executor/rocm/rocm_dnn.cc | 2 -- 2 files changed, 3 deletions(-) diff --git a/tensorflow/python/keras/layers/BUILD b/tensorflow/python/keras/layers/BUILD index 20d4feef2b7..e742be9b759 100644 --- a/tensorflow/python/keras/layers/BUILD +++ b/tensorflow/python/keras/layers/BUILD @@ -715,7 +715,6 @@ cuda_py_test( python_version = "PY3", shard_count = 4, tags = [ - "no_rocm", "notsan", ], xla_tags = [ diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc index 4c5a740dfb0..8c1596331f3 100644 --- a/tensorflow/stream_executor/rocm/rocm_dnn.cc +++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc @@ -3584,8 +3584,6 @@ bool MIOpenSupport::DoBatchNormalizationForwardImpl( auto status = miopenStatusInvalidValue; if (is_training) { - stream->ThenMemZero(batch_mean, batch_mean->size()); - stream->ThenMemZero(batch_var, batch_var->size()); status = wrap::miopenBatchNormalizationForwardTraining( miopen.handle(), mode, &one, &zero, x_descriptor.handle(), x.opaque(), x_descriptor.handle(), y->opaque(), scale_offset_descriptor.handle(),