From 0de3c20c624c774d70b92f5ca38f05b351676f7d Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 22 Apr 2020 09:24:01 -0700 Subject: [PATCH] Rolling forward "[tf.unique] Optimize the hash table implementation in `UniqueOp::Compute()`." The previous version was rolled back because it caused crashes with the previously valid input of `tf.unique([float('nan')])`. This was due to an assertion in `absl::flat_hash_map` that a constructed key is equal to itself. To work around this problem, this change retains the use of `std::unordered_map<>` for float and double arguments, and adds a test for this case to codify the previous behavior. PiperOrigin-RevId: 307831760 Change-Id: I0c3f3f748a4876e19689e9031ce88d0815d9119b --- tensorflow/core/kernels/BUILD | 5 +- tensorflow/core/kernels/unique_op.cc | 55 +++++++++++++++++-- tensorflow/core/kernels/unique_op_test.cc | 16 +++++- .../python/kernel_tests/unique_op_test.py | 22 ++++++++ 4 files changed, 90 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 0510730cf07..201661235be 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1371,7 +1371,9 @@ tf_kernel_library( tf_kernel_library( name = "unique_op", prefix = "unique_op", - deps = ARRAY_DEPS, + deps = ARRAY_DEPS + [ + "@com_google_absl//absl/container:flat_hash_map", + ], ) tf_kernel_library( @@ -2335,6 +2337,7 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/kernels/data:single_threaded_executor", ], ) diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc index 8a9965fe16e..8316018294b 100644 --- a/tensorflow/core/kernels/unique_op.cc +++ b/tensorflow/core/kernels/unique_op.cc @@ -17,18 +17,62 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/hash/hash.h" namespace tensorflow { +namespace { typedef Eigen::ThreadPoolDevice CPUDevice; +// `UniqueOpHashMap` defines the map type that is used when elements of type +// `T` are to be uniquified. By default, we use `absl::flat_hash_map` +// as the map type. Subsequent specializations are provided for +// performance and/or correctness. +template +struct UniqueOpHashMap { + using map_type = absl::flat_hash_map; +}; + +// NOTE(mrry): For `tstring` elements, we use an `absl::string_view` key to +// avoid copying the input strings into the map. +template +struct UniqueOpHashMap { + using map_type = absl::flat_hash_map; +}; + +// NOTE(mrry): `absl::flat_hash_map` does not allow `NaN` as a key, +// because `NaN != NaN`, so we fall back to `std::unordered_map<>` for +// floating-point types. +template +struct UniqueOpHashMap { + using map_type = std::unordered_map; +}; +template +struct UniqueOpHashMap { + using map_type = std::unordered_map; +}; +template +struct UniqueOpHashMap { + using map_type = std::unordered_map; +}; +template +struct UniqueOpHashMap { + using map_type = std::unordered_map; +}; + +// `UniqueOp` computes the unique elements in the input tensor. +// +// * `T` is the element type. +// * `TIndex` is the type used to represent indices in the output, either +// `int32` or `int64`. template class UniqueOp : public OpKernel { public: @@ -106,10 +150,10 @@ class UniqueOp : public OpKernel { auto Tin = input.flat(); const int64 N = static_cast(Tin.size()); - std::unordered_map uniq; + typename UniqueOpHashMap::map_type uniq; uniq.reserve(2 * N); for (Eigen::Index i = 0, j = 0; i < N; ++i) { - auto it = uniq.insert(std::make_pair(Tin(i), j)); + auto it = uniq.emplace(Tin(i), j); idx_vec(i) = it.first->second; if (it.second) { ++j; @@ -153,13 +197,14 @@ class UniqueOp : public OpKernel { return true; }; - std::unordered_map + absl::flat_hash_map uniq(0, hash_fn, equal_to_fn); uniq.reserve(2 * Tin.dimension(1)); for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { - auto it = uniq.insert(std::make_pair(i, j)); + auto it = uniq.emplace(i, j); idx_vec(i) = it.first->second; if (it.second) { ++j; @@ -311,4 +356,6 @@ REGISTER_KERNEL_BUILDER(Name("Unique") .HostMemory("idx"), UniqueOp); #endif // TENSORFLOW_USE_SYCL + +} // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/unique_op_test.cc b/tensorflow/core/kernels/unique_op_test.cc index 4861a45848c..a0249d9bc4c 100644 --- a/tensorflow/core/kernels/unique_op_test.cc +++ b/tensorflow/core/kernels/unique_op_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/kernels/ops_testutil.h" @@ -75,11 +76,14 @@ static void BM_Unique_INT32(int iters, int dim, int max_int) { .Input(test::graph::Constant(g, input)) .Attr("T", DT_INT32) .Finalize(g, &node)); + FixupSourceAndSinkEdges(g); testing::BytesProcessed(static_cast(iters) * dim * sizeof(int32)); testing::UseRealTime(); testing::StartTiming(); - test::Benchmark("cpu", g).Run(iters); + test::Benchmark("cpu", g, nullptr, nullptr, nullptr, + "SINGLE_THREADED_EXECUTOR") + .Run(iters); } static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) { @@ -95,12 +99,15 @@ static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) { .Input(test::graph::Constant(g, input)) .Attr("T", DT_INT32) .Finalize(g, &node)); + FixupSourceAndSinkEdges(g); testing::BytesProcessed(static_cast(iters) * dim * 200 * sizeof(int32)); testing::UseRealTime(); testing::StartTiming(); - test::Benchmark("cpu", g).Run(iters); + test::Benchmark("cpu", g, nullptr, nullptr, nullptr, + "SINGLE_THREADED_EXECUTOR") + .Run(iters); } TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) { @@ -132,11 +139,14 @@ static void BM_Unique_STRING(int iters, int dim) { .Input(test::graph::Constant(g, input)) .Attr("T", DT_STRING) .Finalize(g, &node)); + FixupSourceAndSinkEdges(g); testing::BytesProcessed(static_cast(iters) * dim * sizeof(tstring)); testing::UseRealTime(); testing::StartTiming(); - test::Benchmark("cpu", g).Run(iters); + test::Benchmark("cpu", g, nullptr, nullptr, nullptr, + "SINGLE_THREADED_EXECUTOR") + .Run(iters); } BENCHMARK(BM_Unique_INT32) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 3d08bf04217..7d9e875be2d 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -205,6 +205,28 @@ class UniqueWithCountsTest(test.TestCase): for value, count in zip(tf_y, tf_count): self.assertEqual(count, np.sum(x == value)) + def testFloat(self): + # NOTE(mrry): The behavior when a key is NaN is inherited from + # `std::unordered_map`: each NaN becomes a unique key in the + # map. + x = [0.0, 1.0, np.nan, np.nan] + y, idx, count = gen_array_ops.unique_with_counts_v2( + x, axis=np.array([], np.int32)) + tf_y, tf_idx, tf_count = self.evaluate([y, idx, count]) + + self.assertEqual(len(x), len(tf_idx)) + self.assertEqual(len(tf_y), len(np.unique(x))) + for i in range(len(x)): + if np.isnan(x[i]): + self.assertTrue(np.isnan(tf_y[tf_idx[i]])) + else: + self.assertEqual(x[i], tf_y[tf_idx[i]]) + for value, count in zip(tf_y, tf_count): + if np.isnan(value): + self.assertEqual(count, 1) + else: + self.assertEqual(count, np.sum(x == value)) + if __name__ == '__main__': test.main()