From fabbf923dea96afe440014632a3a1cd0b490b071 Mon Sep 17 00:00:00 2001
From: mazharul <mazharul.islam@intel.com>
Date: Fri, 25 Sep 2020 11:46:04 -0700
Subject: [PATCH 1/8] [Intel MKL] Suppressing auto_mixed_precision_test for
 non-avx512f machine like broadwell

---
 tensorflow/python/BUILD                       |   2 +
 tensorflow/python/framework/test_util.py      |   7 +
 .../grappler/auto_mixed_precision_test.py     |  20 ++
 tensorflow/python/util/util_wrapper.cc        | 337 +++++++++---------
 4 files changed, 198 insertions(+), 168 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d83d21907ce..fa18b6ece79 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -743,6 +743,8 @@ tf_python_pybind_extension(
         ":pybind11_lib",
         "//third_party/python_runtime:headers",
         "@pybind11",
+        "//tensorflow/core/platform:platform_port",
+
     ],
 )
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index bbe28991098..6ce226ac5ae 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -44,6 +44,7 @@ from google.protobuf import text_format
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import _pywrap_stacktrace_handler
+from tensorflow.python import _pywrap_utils
 from tensorflow.python import _pywrap_util_port
 from tensorflow.python import tf2
 from tensorflow.python.client import device_lib
@@ -1802,6 +1803,12 @@ def _disable_test(execute_func):
   return disable_test_impl
 
 
+# The description is just for documentation purposes.
+def disable_nonAVX512f(description):  # pylint: disable=unused-argument
+  """Execute the test method only if avx512f is supported."""
+  execute_func = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU()
+  return _disable_test(execute_func)
+
 # The description is just for documentation purposes.
 def disable_xla(description):  # pylint: disable=unused-argument
   """Execute the test method only if xla is not enabled."""
diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index 0066fcb9712..5c5501f129a 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -428,6 +428,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn(self, mode):
     """Test graph with convolution followed by batch norm."""
@@ -459,6 +460,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d_bn(self, mode):
     """Test graph with convolution followed by batch norm."""
@@ -484,6 +486,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d(self, mode):
     """Test grad ops with convolution3d graph."""
@@ -516,6 +519,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
   # MKL
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn_dropout(self, mode):
     """Test dropout precision of convolution batch norm graph."""
@@ -575,6 +579,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
   # TODO(benbarsdell): This test has not been tried with MKL.
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_depthwise_conv2d(self, mode):
     """Test grad ops with depthwise convolution2d graph."""
@@ -610,6 +615,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_simple_loop(self, mode):
     """Test graph with while loop."""
@@ -631,6 +637,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_loop_with_vars_intertwined(self, mode):
     """Test graph with intertwined while loops."""
@@ -655,6 +662,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths(self, mode):
     """Test graph with multiple paths."""
@@ -684,6 +692,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths_2(self, mode):
     """Test graph with multiple paths."""
@@ -717,6 +726,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])  # MKL doesn't support bf16 Sigmoid
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_recurrent_lstm(self, mode):
     """Test graph with recurrent lstm."""
@@ -744,54 +754,63 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_1(self, mode):
     self._run_simple_loop_test(mode, 'W', 'C', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_2(self, mode):
     self._run_simple_loop_test(mode, 'C', 'C', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_3(self, mode):
     self._run_simple_loop_test(mode, 'W', 'G', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_4(self, mode):
     self._run_simple_loop_test(mode, 'W', 'gbg', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_5(self, mode):
     self._run_simple_loop_test(mode, 'b', 'gWC', 'c')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_6(self, mode):
     self._run_simple_loop_test(mode, 'b', 'CWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_7(self, mode):
     self._run_simple_loop_test(mode, 'C', 'GWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_8(self, mode):
     self._run_simple_loop_test(mode, 'C', 'CgbgWC', 'g')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_noninlined_funcdef(self, mode):
     """Test graph with non-inlined function subgraph.
@@ -820,6 +839,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
+  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_ingraph_train_loop(self, mode):
     """Tests a graph containing a while loop around a training update.
diff --git a/tensorflow/python/util/util_wrapper.cc b/tensorflow/python/util/util_wrapper.cc
index 63c70d785cc..a310b5a7fb9 100644
--- a/tensorflow/python/util/util_wrapper.cc
+++ b/tensorflow/python/util/util_wrapper.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "pybind11/pybind11.h"
 #include "pybind11/pytypes.h"
+#include "tensorflow/core/platform/cpu_info.h"
 #include "tensorflow/python/lib/core/pybind11_lib.h"
 #include "tensorflow/python/util/util.h"
 
@@ -34,25 +35,23 @@ PYBIND11_MODULE(_pywrap_utils, m) {
     return tensorflow::PyoOrThrow(
         tensorflow::swig::RegisterPyObject(name.ptr(), type.ptr()));
   });
-  m.def(
-      "IsTensor",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsTensor(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsTensor",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsTensor(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Check if an object is a Tensor.
     )pbdoc");
-  m.def(
-      "IsSequence",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsSequence(o.ptr());
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsSequence",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsSequence(o.ptr());
+          return result;
+        },
+        R"pbdoc(
       Returns true if its input is a collections.Sequence (except strings).
 
       Args:
@@ -62,16 +61,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
         True if the sequence is a not a string and is a collections.Sequence or a
         dict.
     )pbdoc");
-  m.def(
-      "IsSequenceOrComposite",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsSequenceOrComposite(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsSequenceOrComposite",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsSequenceOrComposite(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns true if its input is a sequence or a `CompositeTensor`.
 
       Args:
@@ -81,16 +79,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
         True if the sequence is a not a string and is a collections.Sequence or a
         dict or a CompositeTensor or a TypeSpec (except string and TensorSpec).
     )pbdoc");
-  m.def(
-      "IsCompositeTensor",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsCompositeTensor(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsCompositeTensor",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsCompositeTensor(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns true if its input is a `CompositeTensor`.
 
       Args:
@@ -99,16 +96,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if the sequence is a CompositeTensor.
     )pbdoc");
-  m.def(
-      "IsTypeSpec",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsTypeSpec(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsTypeSpec",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsTypeSpec(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns true if its input is a `TypeSpec`, but is not a `TensorSpec`.
 
       Args:
@@ -117,25 +113,23 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if the sequence is a `TypeSpec`, but is not a `TensorSpec`.
     )pbdoc");
-  m.def(
-      "IsNamedtuple",
-      [](const py::handle& o, bool strict) {
-        return tensorflow::PyoOrThrow(
-            tensorflow::swig::IsNamedtuple(o.ptr(), strict));
-      },
-      R"pbdoc(
+  m.def("IsNamedtuple",
+        [](const py::handle& o, bool strict) {
+          return tensorflow::PyoOrThrow(
+              tensorflow::swig::IsNamedtuple(o.ptr(), strict));
+        },
+        R"pbdoc(
       Check if an object is a NamedTuple.
     )pbdoc");
-  m.def(
-      "IsMapping",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsMapping(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsMapping",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsMapping(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if `instance` is a `collections.Mapping`.
 
       Args:
@@ -144,16 +138,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if `instance` is a `collections.Mapping`.
     )pbdoc");
-  m.def(
-      "IsMutableMapping",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsMutableMapping(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsMutableMapping",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsMutableMapping(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if `instance` is a `collections.MutableMapping`.
 
       Args:
@@ -162,16 +155,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if `instance` is a `collections.MutableMapping`.
     )pbdoc");
-  m.def(
-      "IsMappingView",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsMappingView(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsMappingView",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsMappingView(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if considered a mapping view for the purposes of Flatten()`.
 
       Args:
@@ -180,16 +172,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if considered a mapping view for the purposes of Flatten().
     )pbdoc");
-  m.def(
-      "IsAttrs",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsAttrs(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsAttrs",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsAttrs(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if `instance` is an instance of an `attr.s` decorated class.
 
       Args:
@@ -198,36 +189,33 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if `instance` is an instance of an `attr.s` decorated class.
     )pbdoc");
-  m.def(
-      "SameNamedtuples",
-      [](const py::handle& o1, const py::handle& o2) {
-        return tensorflow::PyoOrThrow(
-            tensorflow::swig::SameNamedtuples(o1.ptr(), o2.ptr()));
-      },
-      R"pbdoc(
+  m.def("SameNamedtuples",
+        [](const py::handle& o1, const py::handle& o2) {
+          return tensorflow::PyoOrThrow(
+              tensorflow::swig::SameNamedtuples(o1.ptr(), o2.ptr()));
+        },
+        R"pbdoc(
       Returns True if the two namedtuples have the same name and fields.
     )pbdoc");
-  m.def(
-      "AssertSameStructure",
-      [](const py::handle& o1, const py::handle& o2, bool check_types,
-         bool expand_composites) {
-        bool result = tensorflow::swig::AssertSameStructure(
-            o1.ptr(), o2.ptr(), check_types, expand_composites);
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("AssertSameStructure",
+        [](const py::handle& o1, const py::handle& o2, bool check_types,
+           bool expand_composites) {
+          bool result = tensorflow::swig::AssertSameStructure(
+              o1.ptr(), o2.ptr(), check_types, expand_composites);
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if the two structures are nested in the same way.
     )pbdoc");
-  m.def(
-      "Flatten",
-      [](const py::handle& o, bool expand_composites) {
-        return tensorflow::PyoOrThrow(
-            tensorflow::swig::Flatten(o.ptr(), expand_composites));
-      },
-      R"pbdoc(
+  m.def("Flatten",
+        [](const py::handle& o, bool expand_composites) {
+          return tensorflow::PyoOrThrow(
+              tensorflow::swig::Flatten(o.ptr(), expand_composites));
+        },
+        R"pbdoc(
       Returns a flat list from a given nested structure.
 
       If `nest` is not a sequence, tuple, or dict, then returns a single-element
@@ -257,16 +245,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Raises:
         TypeError: The nest is or contains a dict with non-sortable keys.
     )pbdoc");
-  m.def(
-      "IsSequenceForData",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsSequenceForData(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsSequenceForData",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsSequenceForData(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns a true if `seq` is a Sequence or dict (except strings/lists).
 
       NOTE(mrry): This differs from `tensorflow.python.util.nest.is_sequence()`,
@@ -281,13 +268,12 @@ PYBIND11_MODULE(_pywrap_utils, m) {
         True if the sequence is a not a string or list and is a
         collections.Sequence.
     )pbdoc");
-  m.def(
-      "FlattenForData",
-      [](const py::handle& o) {
-        return tensorflow::PyoOrThrow(
-            tensorflow::swig::FlattenForData(o.ptr()));
-      },
-      R"pbdoc(
+  m.def("FlattenForData",
+        [](const py::handle& o) {
+          return tensorflow::PyoOrThrow(
+              tensorflow::swig::FlattenForData(o.ptr()));
+        },
+        R"pbdoc(
       Returns a flat sequence from a given nested structure.
 
       If `nest` is not a sequence, this returns a single-element list: `[nest]`.
@@ -299,29 +285,27 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         A Python list, the flattened version of the input.
     )pbdoc");
-  m.def(
-      "AssertSameStructureForData",
-      [](const py::handle& o1, const py::handle& o2, bool check_types) {
-        bool result = tensorflow::swig::AssertSameStructureForData(
-            o1.ptr(), o2.ptr(), check_types);
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("AssertSameStructureForData",
+        [](const py::handle& o1, const py::handle& o2, bool check_types) {
+          bool result = tensorflow::swig::AssertSameStructureForData(
+              o1.ptr(), o2.ptr(), check_types);
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns True if the two structures are nested in the same way in particular tf.data.
     )pbdoc");
-  m.def(
-      "IsResourceVariable",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsResourceVariable(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsResourceVariable",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsResourceVariable(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns 1 if `o` is a ResourceVariable.
 
       Args:
@@ -330,16 +314,15 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if `instance` is a `ResourceVariable`.
     )pbdoc");
-  m.def(
-      "IsVariable",
-      [](const py::handle& o) {
-        bool result = tensorflow::swig::IsVariable(o.ptr());
-        if (PyErr_Occurred()) {
-          throw py::error_already_set();
-        }
-        return result;
-      },
-      R"pbdoc(
+  m.def("IsVariable",
+        [](const py::handle& o) {
+          bool result = tensorflow::swig::IsVariable(o.ptr());
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
       Returns 1 if `o` is a Variable.
 
       Args:
@@ -348,4 +331,22 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Returns:
         True if `instance` is a `Variable`.
     )pbdoc");
+  m.def("IsBF16SupportedByOneDNNOnThisCPU",
+        []() {
+          bool result = tensorflow::port::TestCPUFeature(
+              tensorflow::port::CPUFeature::AVX512F);
+          if (PyErr_Occurred()) {
+            throw py::error_already_set();
+          }
+          return result;
+        },
+        R"pbdoc(
+      Returns 1 if CPU has avx512f feature.
+
+      Args:
+       None
+
+      Returns:
+        True if CPU has avx512f feature.
+    )pbdoc");
 }

From d0d3535077b0ad4caeaf72de145713d45ef6f9af Mon Sep 17 00:00:00 2001
From: Mazhar <58792511+noim210@users.noreply.github.com>
Date: Wed, 16 Dec 2020 11:11:29 -0800
Subject: [PATCH 2/8] Update auto_mixed_precision_test.py

---
 tensorflow/python/grappler/auto_mixed_precision_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index 5c5501f129a..6f14ad17add 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -428,7 +428,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn(self, mode):
     """Test graph with convolution followed by batch norm."""

From cf23bfbfd80a4b20f94e72dd605d3c95ed1e216b Mon Sep 17 00:00:00 2001
From: Mazhar <58792511+noim210@users.noreply.github.com>
Date: Wed, 16 Dec 2020 11:21:39 -0800
Subject: [PATCH 3/8] Update auto_mixed_precision_test.py

---
 .../grappler/auto_mixed_precision_test.py     | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index 6f14ad17add..594e1391529 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -460,7 +460,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d_bn(self, mode):
     """Test graph with convolution followed by batch norm."""
@@ -486,7 +486,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d(self, mode):
     """Test grad ops with convolution3d graph."""
@@ -519,7 +519,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
   # MKL
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn_dropout(self, mode):
     """Test dropout precision of convolution batch norm graph."""
@@ -579,7 +579,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
   # TODO(benbarsdell): This test has not been tried with MKL.
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_depthwise_conv2d(self, mode):
     """Test grad ops with depthwise convolution2d graph."""
@@ -615,7 +615,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_simple_loop(self, mode):
     """Test graph with while loop."""
@@ -637,7 +637,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_loop_with_vars_intertwined(self, mode):
     """Test graph with intertwined while loops."""
@@ -662,7 +662,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths(self, mode):
     """Test graph with multiple paths."""
@@ -692,7 +692,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths_2(self, mode):
     """Test graph with multiple paths."""
@@ -726,7 +726,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])  # MKL doesn't support bf16 Sigmoid
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_recurrent_lstm(self, mode):
     """Test graph with recurrent lstm."""
@@ -754,63 +754,63 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_1(self, mode):
     self._run_simple_loop_test(mode, 'W', 'C', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_2(self, mode):
     self._run_simple_loop_test(mode, 'C', 'C', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_3(self, mode):
     self._run_simple_loop_test(mode, 'W', 'G', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_4(self, mode):
     self._run_simple_loop_test(mode, 'W', 'gbg', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_5(self, mode):
     self._run_simple_loop_test(mode, 'b', 'gWC', 'c')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_6(self, mode):
     self._run_simple_loop_test(mode, 'b', 'CWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_7(self, mode):
     self._run_simple_loop_test(mode, 'C', 'GWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_8(self, mode):
     self._run_simple_loop_test(mode, 'C', 'CgbgWC', 'g')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_noninlined_funcdef(self, mode):
     """Test graph with non-inlined function subgraph.

From b27f331527f47bb1d5157836d18127b777814eb1 Mon Sep 17 00:00:00 2001
From: Mazhar <58792511+noim210@users.noreply.github.com>
Date: Wed, 16 Dec 2020 11:22:40 -0800
Subject: [PATCH 4/8] Update auto_mixed_precision_test.py

---
 tensorflow/python/grappler/auto_mixed_precision_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index 594e1391529..2e29c1ee060 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -839,7 +839,7 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('test will fail with AVX512f e.g. brodwell')
+  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_ingraph_train_loop(self, mode):
     """Tests a graph containing a while loop around a training update.

From d47153ffe3bc91e4aece66de889c996a0c633ca6 Mon Sep 17 00:00:00 2001
From: mazharul <mazharul.islam@intel.com>
Date: Mon, 21 Dec 2020 19:51:07 -0800
Subject: [PATCH 5/8] fixing the conflict for building

---
 tensorflow/python/BUILD      | 33 ---------------------------------
 tensorflow/python/util/BUILD |  1 +
 2 files changed, 1 insertion(+), 33 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e62c6b30275..d141220888e 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -463,35 +463,6 @@ alias(
     actual = "//tensorflow/python/util:_pywrap_utils",
 )
 
-tf_python_pybind_extension(
-    name = "_pywrap_utils",
-    srcs = ["util/util_wrapper.cc"],
-    hdrs = ["util/util.h"],
-    module_name = "_pywrap_utils",
-    deps = [
-        ":pybind11_lib",
-        "//third_party/python_runtime:headers",
-        "@pybind11",
-        "//tensorflow/core/platform:platform_port",
-
-    ],
-)
-
-tf_python_pybind_extension(
-    name = "_pywrap_kernel_registry",
-    srcs = ["util/kernel_registry_wrapper.cc"],
-    hdrs = ["util/kernel_registry.h"],
-    module_name = "_pywrap_kernel_registry",
-    deps = [
-        ":pybind11_lib",
-        "//tensorflow/core:framework_headers_lib",
-        "//tensorflow/core:lib_headers_for_pybind",
-        "//tensorflow/core:protos_all_cc",
-        "//third_party/python_runtime:headers",
-        "@pybind11",
-    ],
-)
-
 tf_python_pybind_extension(
     name = "_pywrap_quantize_training",
     srcs = [
@@ -1620,7 +1591,6 @@ py_library(
     deps = [
         ":platform",
         "//tensorflow/python/util",
-        # TODO(mdan): Remove this once the transitive dependency is fixed.
         "//tensorflow/python/util:tf_stack",
     ],
 )
@@ -2048,7 +2018,6 @@ cuda_py_test(
     python_version = "PY3",
     shard_count = 10,
     tags = [
-        "no_rocm",
         "noasan",
         "optonly",
     ],
@@ -2094,7 +2063,6 @@ tf_py_test(
     srcs = ["framework/importer_test.py"],
     main = "framework/importer_test.py",
     python_version = "PY3",
-    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
@@ -4915,7 +4883,6 @@ cuda_py_test(
     srcs = ["ops/nn_fused_batchnorm_test.py"],
     python_version = "PY3",
     shard_count = 24,
-    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
diff --git a/tensorflow/python/util/BUILD b/tensorflow/python/util/BUILD
index 32bb85a6a49..ec265c4ea46 100644
--- a/tensorflow/python/util/BUILD
+++ b/tensorflow/python/util/BUILD
@@ -76,6 +76,7 @@ tf_python_pybind_extension(
         "//tensorflow/python:pybind11_lib",
         "//third_party/python_runtime:headers",
         "@pybind11",
+        "//tensorflow/core/platform:platform_port",
     ],
 )
 

From 55975390697d667f6b4ab7bcc6a8a6bd5ba73204 Mon Sep 17 00:00:00 2001
From: mazharul <mazharul.islam@intel.com>
Date: Mon, 21 Dec 2020 19:52:06 -0800
Subject: [PATCH 6/8] updating the code after getting reviews about cuda test
 run

---
 tensorflow/python/framework/test_util.py      |  7 ------
 .../grappler/auto_mixed_precision_test.py     | 25 ++++---------------
 2 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 1e767d8d0ee..6b54f03e882 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -43,7 +43,6 @@ from google.protobuf import text_format
 
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
-from tensorflow.python import _pywrap_utils
 from tensorflow.python import tf2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import pywrap_tf_session
@@ -1824,12 +1823,6 @@ def _disable_test(execute_func):
   return disable_test_impl
 
 
-# The description is just for documentation purposes.
-def disable_nonAVX512f(description):  # pylint: disable=unused-argument
-  """Execute the test method only if avx512f is supported."""
-  execute_func = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU()
-  return _disable_test(execute_func)
-
 # The description is just for documentation purposes.
 def disable_xla(description):  # pylint: disable=unused-argument
   """Execute the test method only if xla is not enabled."""
diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py
index ec2eefc1872..c0300aaadb5 100644
--- a/tensorflow/python/grappler/auto_mixed_precision_test.py
+++ b/tensorflow/python/grappler/auto_mixed_precision_test.py
@@ -50,6 +50,7 @@ from tensorflow.python.platform import sysconfig
 from tensorflow.python.platform import test
 from tensorflow.python.training import adam
 from tensorflow.python.training import gradient_descent
+from tensorflow.python.util import _pywrap_utils
 
 
 def _input(shape):
@@ -371,6 +372,10 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
       self.skipTest('No GPU is available')
     if mode == 'mkl' and not test_util.IsMklEnabled():
       self.skipTest('MKL is not enabled')
+    # Test will fail on machines without AVX512f, e.g., Broadwell
+    isAVX512f = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU()
+    if mode == 'mkl' and not isAVX512f:
+      self.skipTest('Skipping test due to non-AVX512f machine')
 
   def _run_simple_loop_test(self, mode, inp, body, out):
     """Runs a test of a simple loop.
@@ -428,7 +433,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn(self, mode):
     """Test graph with convolution followed by batch norm."""
@@ -460,7 +464,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d_bn(self, mode):
     """Test graph with convolution followed by batch norm."""
@@ -486,7 +489,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv3d(self, mode):
     """Test grad ops with convolution3d graph."""
@@ -517,7 +519,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_conv_bn_dropout(self, mode):
     """Test dropout precision of convolution batch norm graph."""
@@ -578,7 +579,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
   # TODO(benbarsdell): This test has not been tried with MKL.
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_depthwise_conv2d(self, mode):
     """Test grad ops with depthwise convolution2d graph."""
@@ -614,7 +614,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_simple_loop(self, mode):
     """Test graph with while loop."""
@@ -636,7 +635,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_loop_with_vars_intertwined(self, mode):
     """Test graph with intertwined while loops."""
@@ -661,7 +659,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths(self, mode):
     """Test graph with multiple paths."""
@@ -691,7 +688,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_multi_paths_2(self, mode):
     """Test graph with multiple paths."""
@@ -725,7 +721,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda'])  # MKL doesn't support bf16 Sigmoid
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_recurrent_lstm(self, mode):
     """Test graph with recurrent lstm."""
@@ -753,63 +748,54 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_1(self, mode):
     self._run_simple_loop_test(mode, 'W', 'C', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_2(self, mode):
     self._run_simple_loop_test(mode, 'C', 'C', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_3(self, mode):
     self._run_simple_loop_test(mode, 'W', 'G', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('v1 loop test')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_4(self, mode):
     self._run_simple_loop_test(mode, 'W', 'gbg', 'W')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_5(self, mode):
     self._run_simple_loop_test(mode, 'b', 'gWC', 'c')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_6(self, mode):
     self._run_simple_loop_test(mode, 'b', 'CWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_7(self, mode):
     self._run_simple_loop_test(mode, 'C', 'GWCG', 'C')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_v1_only('b/138749235')
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_propagation_through_simple_loop_8(self, mode):
     self._run_simple_loop_test(mode, 'C', 'CgbgWC', 'g')
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_noninlined_funcdef(self, mode):
     """Test graph with non-inlined function subgraph.
@@ -838,7 +824,6 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(['cuda', 'mkl'])
   @test_util.run_deprecated_v1
-  @test_util.disable_nonAVX512f('Test will fail on machines without AVX512f, e.g., Broadwell')
   @test_util.disable_xla('This test does not pass with XLA')
   def test_ingraph_train_loop(self, mode):
     """Tests a graph containing a while loop around a training update.

From 51659b29519a4cf6ec180a175baa0628d30004bb Mon Sep 17 00:00:00 2001
From: mazharul <mazharul.islam@intel.com>
Date: Tue, 22 Dec 2020 11:57:41 -0800
Subject: [PATCH 7/8] update the file with new master

---
 tensorflow/python/BUILD | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d141220888e..96e01db5069 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1591,6 +1591,7 @@ py_library(
     deps = [
         ":platform",
         "//tensorflow/python/util",
+        # TODO(mdan): Remove this once the transitive dependency is fixed.
         "//tensorflow/python/util:tf_stack",
     ],
 )
@@ -2018,6 +2019,7 @@ cuda_py_test(
     python_version = "PY3",
     shard_count = 10,
     tags = [
+        "no_rocm",
         "noasan",
         "optonly",
     ],
@@ -2063,6 +2065,7 @@ tf_py_test(
     srcs = ["framework/importer_test.py"],
     main = "framework/importer_test.py",
     python_version = "PY3",
+    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",
@@ -4883,6 +4886,7 @@ cuda_py_test(
     srcs = ["ops/nn_fused_batchnorm_test.py"],
     python_version = "PY3",
     shard_count = 24,
+    tags = ["no_rocm"],
     deps = [
         ":array_ops",
         ":client_testlib",

From e901c15b61f9c8649b3605c080af7dc00e5caacf Mon Sep 17 00:00:00 2001
From: mazharul <mazharul.islam@intel.com>
Date: Tue, 22 Dec 2020 12:07:21 -0800
Subject: [PATCH 8/8] fixing the order to make the lines lexicographic

---
 tensorflow/python/util/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/util/BUILD b/tensorflow/python/util/BUILD
index 8ae6ca3d77d..80c87399710 100644
--- a/tensorflow/python/util/BUILD
+++ b/tensorflow/python/util/BUILD
@@ -73,10 +73,10 @@ tf_python_pybind_extension(
     hdrs = ["util.h"],
     module_name = "_pywrap_utils",
     deps = [
+        "//tensorflow/core/platform:platform_port",
         "//tensorflow/python:pybind11_lib",
         "//third_party/python_runtime:headers",
         "@pybind11",
-        "//tensorflow/core/platform:platform_port",
     ],
 )