From a068d4a458e3a301b873753d471e6fe62a8301f6 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Tue, 30 Apr 2019 01:10:08 -0700
Subject: [PATCH 1/2] [INTEL MKL] Adding support for quantized type gather nd
 op registration

---
 tensorflow/core/kernels/gather_nd_op.cc       |  1 +
 tensorflow/core/kernels/gather_nd_op.h        |  6 ++--
 .../core/kernels/gather_nd_op_cpu_impl.h      |  4 ++-
 tensorflow/core/kernels/gather_nd_op_test.cc  | 32 +++++++++++++++++--
 4 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index b5b6f14bcda..0b82b72ccc3 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -71,6 +71,7 @@ class GatherNdOp : public OpKernel {
 //
 // Same for the GPU kernel.
 TF_CALL_ALL_TYPES(REGISTER_GATHER_ND_CPU);
+TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_ND_CPU);
 
 #undef REGISTER_GATHER_ND_CPU
 
diff --git a/tensorflow/core/kernels/gather_nd_op.h b/tensorflow/core/kernels/gather_nd_op.h
index 46414a38fb0..836a6aa5992 100644
--- a/tensorflow/core/kernels/gather_nd_op.h
+++ b/tensorflow/core/kernels/gather_nd_op.h
@@ -100,9 +100,9 @@ Status DoGatherNd(OpKernelContext* c, const Tensor& params,
   }
 
   if (slice_size_big > std::numeric_limits<Index>::max()) {
-    return errors::InvalidArgument(
-        "slice size is too large for indexing: ", slice_size_big, " > ",
-        std::numeric_limits<Index>::max());
+    return errors::InvalidArgument("slice size is too large for indexing: ",
+                                   slice_size_big, " > ",
+                                   std::numeric_limits<Index>::max());
   }
 
   const Index slice_size = static_cast<Index>(slice_size_big);
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
index cf9817dc306..dd41153e5f9 100644
--- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
@@ -32,6 +32,8 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/util.h"
 
+#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -151,7 +153,7 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
   REGISTER_GATHER_ND_FULL(type, int32); \
   REGISTER_GATHER_ND_FULL(type, int64)
 
-TF_CALL_ALL_TYPES(REGISTER_GATHER_ND_CPU);
+TF_CALL_DATASET_TYPES(REGISTER_GATHER_ND_CPU);
 
 }  // namespace functor
 
diff --git a/tensorflow/core/kernels/gather_nd_op_test.cc b/tensorflow/core/kernels/gather_nd_op_test.cc
index 9f8658ef0e8..b0b5c958b5a 100644
--- a/tensorflow/core/kernels/gather_nd_op_test.cc
+++ b/tensorflow/core/kernels/gather_nd_op_test.cc
@@ -57,9 +57,9 @@ namespace {
 
 class GatherNdOpTest : public OpsTestBase {
  protected:
-  void MakeOp(DataType index_type) {
+  void MakeOp(DataType param_type, DataType index_type) {
     TF_ASSERT_OK(NodeDefBuilder("myop", "GatherNd")
-                     .Input(FakeInput(DT_FLOAT))
+                     .Input(FakeInput(param_type))
                      .Input(FakeInput(index_type))
                      .Finalize(node_def()));
     TF_ASSERT_OK(InitOp());
@@ -67,7 +67,7 @@ class GatherNdOpTest : public OpsTestBase {
 };
 
 TEST_F(GatherNdOpTest, Simple) {
-  MakeOp(DT_INT32);
+  MakeOp(DT_FLOAT, DT_INT32);
 
   // Feed and run
   AddInputFromArray<float>(TensorShape({5}), {0, 1, 2, 8, 4});
@@ -80,6 +80,32 @@ TEST_F(GatherNdOpTest, Simple) {
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
 }
 
+TEST_F(GatherNdOpTest, Quantized_UINT8) {
+  MakeOp(DT_QUINT8, DT_INT32);
+
+  // Feed and run
+  AddInputFromArray<quint8>(TensorShape({5}), {0, 1, 2, 8, 4});
+  AddInputFromArray<int32>(TensorShape({2, 1}), {3, 4});
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Check the output.
+  Tensor expected(allocator(), DT_QUINT8, TensorShape({2}));
+  test::FillValues<quint8>(&expected, {8, 4});
+  test::ExpectTensorEqual<quint8>(expected, *GetOutput(0));
+}
+
+TEST_F(GatherNdOpTest, Quantized_INT8) {
+  MakeOp(DT_QINT8, DT_INT32);
+
+  AddInputFromArray<qint8>(TensorShape({5}), {0, 1, 2, 8, 4});
+  AddInputFromArray<int32>(TensorShape({2, 1}), {3, 4});
+  TF_ASSERT_OK(RunOpKernel());
+
+  Tensor expected(allocator(), DT_QINT8, TensorShape({2}));
+  test::FillValues<qint8>(&expected, {8, 4});
+  test::ExpectTensorEqual<qint8>(expected, *GetOutput(0));
+}
+
 constexpr int kLookups = 2000;
 
 template <typename Index>

From 39170eb1a74b2888839aa080fadd499b0ee95849 Mon Sep 17 00:00:00 2001
From: "Xiaoming (Jason) Cui" <xiaoming.cui@intel.com>
Date: Tue, 30 Apr 2019 11:31:46 -0700
Subject: [PATCH 2/2] [INTEL MKL] Removed the macro TF_CALL_DATASET_TYPES as it
 is redundant

---
 tensorflow/core/kernels/gather_nd_op_cpu_impl.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
index dd41153e5f9..c3d2f701398 100644
--- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h
@@ -32,8 +32,6 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/util.h"
 
-#define TF_CALL_DATASET_TYPES(m) TF_CALL_ALL_TYPES(m) TF_CALL_QUANTIZED_TYPES(m)
-
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -153,7 +151,8 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> {
   REGISTER_GATHER_ND_FULL(type, int32); \
   REGISTER_GATHER_ND_FULL(type, int64)
 
-TF_CALL_DATASET_TYPES(REGISTER_GATHER_ND_CPU);
+TF_CALL_ALL_TYPES(REGISTER_GATHER_ND_CPU);
+TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_ND_CPU);
 
 }  // namespace functor