From 0de3c20c624c774d70b92f5ca38f05b351676f7d Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 22 Apr 2020 09:24:01 -0700
Subject: [PATCH] Rolling forward "[tf.unique] Optimize the hash table
 implementation in `UniqueOp::Compute()`."

The previous version was rolled back because it caused crashes with the previously valid input of `tf.unique([float('nan')])`. This was due to an assertion in `absl::flat_hash_map` that a constructed key is equal to itself. To work around this problem, this change retains the use of `std::unordered_map<>` for float and double arguments, and adds a test for this case to codify the previous behavior.

PiperOrigin-RevId: 307831760
Change-Id: I0c3f3f748a4876e19689e9031ce88d0815d9119b
---
 tensorflow/core/kernels/BUILD                 |  5 +-
 tensorflow/core/kernels/unique_op.cc          | 55 +++++++++++++++++--
 tensorflow/core/kernels/unique_op_test.cc     | 16 +++++-
 .../python/kernel_tests/unique_op_test.py     | 22 ++++++++
 4 files changed, 90 insertions(+), 8 deletions(-)
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0510730cf07..201661235be 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1371,7 +1371,9 @@ tf_kernel_library(
 tf_kernel_library(
     name = "unique_op",
     prefix = "unique_op",
-    deps = ARRAY_DEPS,
+    deps = ARRAY_DEPS + [
+        "@com_google_absl//absl/container:flat_hash_map",
+    ],
 )
 
 tf_kernel_library(
@@ -2335,6 +2337,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "//tensorflow/core/kernels/data:single_threaded_executor",
     ],
 )
 
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 8a9965fe16e..8316018294b 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -17,18 +17,62 @@ limitations under the License.
 #include <unordered_map>
 #include <utility>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/bfloat16/bfloat16.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/hash/hash.h"
 
 namespace tensorflow {
+namespace {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
+// `UniqueOpHashMap` defines the map type that is used when elements of type
+// `T` are to be uniquified. By default, we use `absl::flat_hash_map<T, TIndex>`
+// as the map type. Subsequent specializations are provided for
+// performance and/or correctness.
+template <typename T, typename TIndex>
+struct UniqueOpHashMap {
+  using map_type = absl::flat_hash_map<T, TIndex>;
+};
+
+// NOTE(mrry): For `tstring` elements, we use an `absl::string_view` key to
+// avoid copying the input strings into the map.
+template <typename TIndex>
+struct UniqueOpHashMap<tstring, TIndex> {
+  using map_type = absl::flat_hash_map<absl::string_view, TIndex>;
+};
+
+// NOTE(mrry): `absl::flat_hash_map<float, ...>` does not allow `NaN` as a key,
+// because `NaN != NaN`, so we fall back to `std::unordered_map<>` for
+// floating-point types.
+template <typename TIndex>
+struct UniqueOpHashMap<float, TIndex> {
+  using map_type = std::unordered_map<float, TIndex>;
+};
+template <typename TIndex>
+struct UniqueOpHashMap<double, TIndex> {
+  using map_type = std::unordered_map<double, TIndex>;
+};
+template <typename TIndex>
+struct UniqueOpHashMap<Eigen::half, TIndex> {
+  using map_type = std::unordered_map<Eigen::half, TIndex>;
+};
+template <typename TIndex>
+struct UniqueOpHashMap<bfloat16, TIndex> {
+  using map_type = std::unordered_map<bfloat16, TIndex>;
+};
+
+// `UniqueOp` computes the unique elements in the input tensor.
+//
+// * `T` is the element type.
+// * `TIndex` is the type used to represent indices in the output, either
+//   `int32` or `int64`.
 template <typename T, typename TIndex>
 class UniqueOp : public OpKernel {
  public:
@@ -106,10 +150,10 @@ class UniqueOp : public OpKernel {
       auto Tin = input.flat<T>();
       const int64 N = static_cast<int64>(Tin.size());
 
-      std::unordered_map<T, TIndex> uniq;
+      typename UniqueOpHashMap<T, TIndex>::map_type uniq;
       uniq.reserve(2 * N);
       for (Eigen::Index i = 0, j = 0; i < N; ++i) {
-        auto it = uniq.insert(std::make_pair(Tin(i), j));
+        auto it = uniq.emplace(Tin(i), j);
         idx_vec(i) = it.first->second;
         if (it.second) {
           ++j;
@@ -153,13 +197,14 @@ class UniqueOp : public OpKernel {
         return true;
       };
 
-      std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)>
+      absl::flat_hash_map<int64, int64, decltype(hash_fn),
+                          decltype(equal_to_fn)>
           uniq(0, hash_fn, equal_to_fn);
 
       uniq.reserve(2 * Tin.dimension(1));
 
       for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
-        auto it = uniq.insert(std::make_pair(i, j));
+        auto it = uniq.emplace(i, j);
         idx_vec(i) = it.first->second;
         if (it.second) {
           ++j;
@@ -311,4 +356,6 @@ REGISTER_KERNEL_BUILDER(Name("Unique")
                             .HostMemory("idx"),
                         UniqueOp<int64, int64>);
 #endif  // TENSORFLOW_USE_SYCL
+
+}  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/unique_op_test.cc b/tensorflow/core/kernels/unique_op_test.cc
index 4861a45848c..a0249d9bc4c 100644
--- a/tensorflow/core/kernels/unique_op_test.cc
+++ b/tensorflow/core/kernels/unique_op_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/testlib.h"
 #include "tensorflow/core/kernels/ops_testutil.h"
@@ -75,11 +76,14 @@ static void BM_Unique_INT32(int iters, int dim, int max_int) {
                   .Input(test::graph::Constant(g, input))
                   .Attr("T", DT_INT32)
                   .Finalize(g, &node));
+  FixupSourceAndSinkEdges(g);
 
   testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(int32));
   testing::UseRealTime();
   testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
+  test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
+                  "SINGLE_THREADED_EXECUTOR")
+      .Run(iters);
 }
 
 static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
@@ -95,12 +99,15 @@ static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
                   .Input(test::graph::Constant(g, input))
                   .Attr("T", DT_INT32)
                   .Finalize(g, &node));
+  FixupSourceAndSinkEdges(g);
 
   testing::BytesProcessed(static_cast<int64>(iters) * dim * 200 *
                           sizeof(int32));
   testing::UseRealTime();
   testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
+  test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
+                  "SINGLE_THREADED_EXECUTOR")
+      .Run(iters);
 }
 
 TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
@@ -132,11 +139,14 @@ static void BM_Unique_STRING(int iters, int dim) {
                   .Input(test::graph::Constant(g, input))
                   .Attr("T", DT_STRING)
                   .Finalize(g, &node));
+  FixupSourceAndSinkEdges(g);
 
   testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(tstring));
   testing::UseRealTime();
   testing::StartTiming();
-  test::Benchmark("cpu", g).Run(iters);
+  test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
+                  "SINGLE_THREADED_EXECUTOR")
+      .Run(iters);
 }
 
 BENCHMARK(BM_Unique_INT32)
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 3d08bf04217..7d9e875be2d 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -205,6 +205,28 @@ class UniqueWithCountsTest(test.TestCase):
     for value, count in zip(tf_y, tf_count):
       self.assertEqual(count, np.sum(x == value))
 
+  def testFloat(self):
+    # NOTE(mrry): The behavior when a key is NaN is inherited from
+    # `std::unordered_map<float, ...>`: each NaN becomes a unique key in the
+    # map.
+    x = [0.0, 1.0, np.nan, np.nan]
+    y, idx, count = gen_array_ops.unique_with_counts_v2(
+        x, axis=np.array([], np.int32))
+    tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
+
+    self.assertEqual(len(x), len(tf_idx))
+    self.assertEqual(len(tf_y), len(np.unique(x)))
+    for i in range(len(x)):
+      if np.isnan(x[i]):
+        self.assertTrue(np.isnan(tf_y[tf_idx[i]]))
+      else:
+        self.assertEqual(x[i], tf_y[tf_idx[i]])
+    for value, count in zip(tf_y, tf_count):
+      if np.isnan(value):
+        self.assertEqual(count, 1)
+      else:
+        self.assertEqual(count, np.sum(x == value))
+
 
 if __name__ == '__main__':
   test.main()