Rolling forward "[tf.unique] Optimize the hash table implementation in UniqueOp::Compute()."

The previous version was rolled back because it caused crashes with the previously valid input of `tf.unique([float('nan')])`. This was due to an assertion in `absl::flat_hash_map` that a constructed key is equal to itself. To work around this problem, this change retains the use of `std::unordered_map<>` for float and double arguments, and adds a test for this case to codify the previous behavior.

PiperOrigin-RevId: 307831760
Change-Id: I0c3f3f748a4876e19689e9031ce88d0815d9119b
This commit is contained in:
Derek Murray 2020-04-22 09:24:01 -07:00 committed by TensorFlower Gardener
parent e4c6f288d4
commit 0de3c20c62
4 changed files with 90 additions and 8 deletions

View File

@ -1371,7 +1371,9 @@ tf_kernel_library(
tf_kernel_library( tf_kernel_library(
name = "unique_op", name = "unique_op",
prefix = "unique_op", prefix = "unique_op",
deps = ARRAY_DEPS, deps = ARRAY_DEPS + [
"@com_google_absl//absl/container:flat_hash_map",
],
) )
tf_kernel_library( tf_kernel_library(
@ -2335,6 +2337,7 @@ tf_cc_test(
"//tensorflow/core:test", "//tensorflow/core:test",
"//tensorflow/core:test_main", "//tensorflow/core:test_main",
"//tensorflow/core:testlib", "//tensorflow/core:testlib",
"//tensorflow/core/kernels/data:single_threaded_executor",
], ],
) )

View File

@ -17,18 +17,62 @@ limitations under the License.
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include "absl/container/flat_hash_map.h"
#include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/bounds_check.h"
#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/lib/bfloat16/bfloat16.h"
#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/hash/hash.h"
namespace tensorflow { namespace tensorflow {
namespace {
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;
// `UniqueOpHashMap` defines the map type that is used when elements of type
// `T` are to be uniquified. By default, we use `absl::flat_hash_map<T, TIndex>`
// as the map type. Subsequent specializations are provided for
// performance and/or correctness.
template <typename T, typename TIndex>
struct UniqueOpHashMap {
using map_type = absl::flat_hash_map<T, TIndex>;
};
// NOTE(mrry): For `tstring` elements, we use an `absl::string_view` key to
// avoid copying the input strings into the map.
template <typename TIndex>
struct UniqueOpHashMap<tstring, TIndex> {
using map_type = absl::flat_hash_map<absl::string_view, TIndex>;
};
// NOTE(mrry): `absl::flat_hash_map<float, ...>` does not allow `NaN` as a key,
// because `NaN != NaN`, so we fall back to `std::unordered_map<>` for
// floating-point types.
template <typename TIndex>
struct UniqueOpHashMap<float, TIndex> {
using map_type = std::unordered_map<float, TIndex>;
};
template <typename TIndex>
struct UniqueOpHashMap<double, TIndex> {
using map_type = std::unordered_map<double, TIndex>;
};
template <typename TIndex>
struct UniqueOpHashMap<Eigen::half, TIndex> {
using map_type = std::unordered_map<Eigen::half, TIndex>;
};
template <typename TIndex>
struct UniqueOpHashMap<bfloat16, TIndex> {
using map_type = std::unordered_map<bfloat16, TIndex>;
};
// `UniqueOp` computes the unique elements in the input tensor.
//
// * `T` is the element type.
// * `TIndex` is the type used to represent indices in the output, either
// `int32` or `int64`.
template <typename T, typename TIndex> template <typename T, typename TIndex>
class UniqueOp : public OpKernel { class UniqueOp : public OpKernel {
public: public:
@ -106,10 +150,10 @@ class UniqueOp : public OpKernel {
auto Tin = input.flat<T>(); auto Tin = input.flat<T>();
const int64 N = static_cast<int64>(Tin.size()); const int64 N = static_cast<int64>(Tin.size());
std::unordered_map<T, TIndex> uniq; typename UniqueOpHashMap<T, TIndex>::map_type uniq;
uniq.reserve(2 * N); uniq.reserve(2 * N);
for (Eigen::Index i = 0, j = 0; i < N; ++i) { for (Eigen::Index i = 0, j = 0; i < N; ++i) {
auto it = uniq.insert(std::make_pair(Tin(i), j)); auto it = uniq.emplace(Tin(i), j);
idx_vec(i) = it.first->second; idx_vec(i) = it.first->second;
if (it.second) { if (it.second) {
++j; ++j;
@ -153,13 +197,14 @@ class UniqueOp : public OpKernel {
return true; return true;
}; };
std::unordered_map<int64, int64, decltype(hash_fn), decltype(equal_to_fn)> absl::flat_hash_map<int64, int64, decltype(hash_fn),
decltype(equal_to_fn)>
uniq(0, hash_fn, equal_to_fn); uniq(0, hash_fn, equal_to_fn);
uniq.reserve(2 * Tin.dimension(1)); uniq.reserve(2 * Tin.dimension(1));
for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) { for (int64 i = 0, j = 0; i < Tin.dimension(1); ++i) {
auto it = uniq.insert(std::make_pair(i, j)); auto it = uniq.emplace(i, j);
idx_vec(i) = it.first->second; idx_vec(i) = it.first->second;
if (it.second) { if (it.second) {
++j; ++j;
@ -311,4 +356,6 @@ REGISTER_KERNEL_BUILDER(Name("Unique")
.HostMemory("idx"), .HostMemory("idx"),
UniqueOp<int64, int64>); UniqueOp<int64, int64>);
#endif // TENSORFLOW_USE_SYCL #endif // TENSORFLOW_USE_SYCL
} // namespace
} // namespace tensorflow } // namespace tensorflow

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/algorithm.h"
#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/graph/testlib.h"
#include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_testutil.h"
@ -75,11 +76,14 @@ static void BM_Unique_INT32(int iters, int dim, int max_int) {
.Input(test::graph::Constant(g, input)) .Input(test::graph::Constant(g, input))
.Attr("T", DT_INT32) .Attr("T", DT_INT32)
.Finalize(g, &node)); .Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(int32)); testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(int32));
testing::UseRealTime(); testing::UseRealTime();
testing::StartTiming(); testing::StartTiming();
test::Benchmark("cpu", g).Run(iters); test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
} }
static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) { static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
@ -95,12 +99,15 @@ static void BM_Unique_INT32_Repeat(int iters, int dim, int max_int) {
.Input(test::graph::Constant(g, input)) .Input(test::graph::Constant(g, input))
.Attr("T", DT_INT32) .Attr("T", DT_INT32)
.Finalize(g, &node)); .Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * 200 * testing::BytesProcessed(static_cast<int64>(iters) * dim * 200 *
sizeof(int32)); sizeof(int32));
testing::UseRealTime(); testing::UseRealTime();
testing::StartTiming(); testing::StartTiming();
test::Benchmark("cpu", g).Run(iters); test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
} }
TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) { TensorProto GetRandomStringsTensorProto(int dim, int max_str_len) {
@ -132,11 +139,14 @@ static void BM_Unique_STRING(int iters, int dim) {
.Input(test::graph::Constant(g, input)) .Input(test::graph::Constant(g, input))
.Attr("T", DT_STRING) .Attr("T", DT_STRING)
.Finalize(g, &node)); .Finalize(g, &node));
FixupSourceAndSinkEdges(g);
testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(tstring)); testing::BytesProcessed(static_cast<int64>(iters) * dim * sizeof(tstring));
testing::UseRealTime(); testing::UseRealTime();
testing::StartTiming(); testing::StartTiming();
test::Benchmark("cpu", g).Run(iters); test::Benchmark("cpu", g, nullptr, nullptr, nullptr,
"SINGLE_THREADED_EXECUTOR")
.Run(iters);
} }
BENCHMARK(BM_Unique_INT32) BENCHMARK(BM_Unique_INT32)

View File

@ -205,6 +205,28 @@ class UniqueWithCountsTest(test.TestCase):
for value, count in zip(tf_y, tf_count): for value, count in zip(tf_y, tf_count):
self.assertEqual(count, np.sum(x == value)) self.assertEqual(count, np.sum(x == value))
def testFloat(self):
# NOTE(mrry): The behavior when a key is NaN is inherited from
# `std::unordered_map<float, ...>`: each NaN becomes a unique key in the
# map.
x = [0.0, 1.0, np.nan, np.nan]
y, idx, count = gen_array_ops.unique_with_counts_v2(
x, axis=np.array([], np.int32))
tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
self.assertEqual(len(x), len(tf_idx))
self.assertEqual(len(tf_y), len(np.unique(x)))
for i in range(len(x)):
if np.isnan(x[i]):
self.assertTrue(np.isnan(tf_y[tf_idx[i]]))
else:
self.assertEqual(x[i], tf_y[tf_idx[i]])
for value, count in zip(tf_y, tf_count):
if np.isnan(value):
self.assertEqual(count, 1)
else:
self.assertEqual(count, np.sum(x == value))
if __name__ == '__main__': if __name__ == '__main__':
test.main() test.main()