From 7f64e72c1b358b49ad0b21ea511597385e8da4f4 Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Thu, 18 Apr 2019 16:25:46 -0700 Subject: [PATCH 1/6] Add resource_mgr to the op_kernel_ctx and iterator_ctx in DatasetOpsTestBase --- tensorflow/core/kernels/data/dataset_test_base.cc | 3 +++ tensorflow/core/kernels/data/dataset_test_base.h | 1 + 2 files changed, 4 insertions(+) diff --git a/tensorflow/core/kernels/data/dataset_test_base.cc b/tensorflow/core/kernels/data/dataset_test_base.cc index b03abed9359..54de6888508 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.cc +++ b/tensorflow/core/kernels/data/dataset_test_base.cc @@ -190,6 +190,7 @@ Status DatasetOpsTestBase::CreateIteratorContext( OpKernelContext* const op_context, std::unique_ptr* iterator_context) { IteratorContext::Params params(op_context); + params.resource_mgr = op_context->resource_manager(); function_handle_cache_ = absl::make_unique(flr_); params.function_handle_cache = function_handle_cache_.get(); *iterator_context = absl::make_unique(params); @@ -228,6 +229,7 @@ Status DatasetOpsTestBase::InitFunctionLibraryRuntime( TF_RETURN_IF_ERROR(DeviceFactory::AddDevices( options, "/job:localhost/replica:0/task:0", &devices)); device_mgr_ = absl::make_unique(std::move(devices)); + resource_mgr_ = absl::make_unique("default_container"); FunctionDefLibrary proto; for (const auto& fdef : flib) *(proto.add_function()) = fdef; @@ -269,6 +271,7 @@ Status DatasetOpsTestBase::CreateOpKernelContext( step_container_ = absl::make_unique(0, [](const string&) {}); params_->step_container = step_container_.get(); + params_->resource_manager = resource_mgr_.get(); checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper; slice_reader_cache_ = absl::make_unique(); diff --git a/tensorflow/core/kernels/data/dataset_test_base.h b/tensorflow/core/kernels/data/dataset_test_base.h index f3a0d0ae12d..ca2be6b9258 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.h +++ b/tensorflow/core/kernels/data/dataset_test_base.h @@ -206,6 +206,7 @@ class DatasetOpsTestBase : public ::testing::Test { std::function)> runner_; std::unique_ptr device_mgr_; std::unique_ptr lib_def_; + std::unique_ptr resource_mgr_; std::unique_ptr params_; std::unique_ptr slice_reader_cache_; From 1dfaeb0028846836533e3855eee23e9ebdb0924d Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Thu, 18 Apr 2019 16:26:27 -0700 Subject: [PATCH 2/6] Tests for ShuffleDatasetOp --- tensorflow/core/kernels/data/BUILD | 18 + .../kernels/data/shuffle_dataset_op_test.cc | 665 ++++++++++++++++++ 2 files changed, 683 insertions(+) create mode 100644 tensorflow/core/kernels/data/shuffle_dataset_op_test.cc diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 4d81026648c..a860cd62e10 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -667,6 +667,24 @@ tf_kernel_library( ], ) +tf_cc_test( + name = "shuffle_dataset_op_test", + size = "small", + srcs = ["shuffle_dataset_op_test.cc"], + deps = [ + "shuffle_dataset_op", + ":dataset_test_base", + ":dataset_utils", + ":iterator_ops", + ":range_dataset_op", + "//tensorflow/core:framework", + "//tensorflow/core:ptr_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "sparse_tensor_slice_dataset_op", srcs = ["sparse_tensor_slice_dataset_op.cc"], diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc b/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc new file mode 100644 index 00000000000..dbf8c9a5a70 --- /dev/null +++ b/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc @@ -0,0 +1,665 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/data/dataset_test_base.h" + +namespace tensorflow { +namespace data { +namespace { + +constexpr char kNodeName[] = "shuffle_dataset"; +constexpr char kOpName[] = "ShuffleDataset"; + +class ShuffleDatasetOpTest : public DatasetOpsTestBase { + protected: + // Creates a new `ShuffleDataset` op kernel + Status CreateShuffleDatasetOpKernel( + bool reshuffle_each_iteration, const DataTypeVector& output_types, + const std::vector& output_shapes, + std::unique_ptr* shuffle_dataset_kernel) { + NodeDef node_def = test::function::NDef( + kNodeName, kOpName, {"input_dataset", "buffer_size", "seed", "seed2"}, + {{"reshuffle_each_iteration", reshuffle_each_iteration}, + {"output_types", output_types}, + {"output_shapes", output_shapes}}); + TF_RETURN_IF_ERROR(CreateOpKernel(node_def, shuffle_dataset_kernel)); + return Status::OK(); + } + + // Creates a new `ShuffleDataset` op kernel context. + Status CreateShuffleDatasetContext( + OpKernel* const op_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* context) { + TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); + TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); + return Status::OK(); + } +}; + +struct RangeDatasetParam { + int64 start; + int64 end; + int64 step; +}; + +struct TestCase { + RangeDatasetParam range_data_param; + Tensor buffer_size; + Tensor seed; + Tensor seed2; + bool reshuffle_each_iteration; + std::vector expected_outputs; + DataTypeVector expected_output_dtypes; + std::vector expected_output_shapes; + int64 expected_cardinality; + std::vector breakpoints; +}; + +template +std::vector ConvertToTensorVec(std::vector values) { + std::vector tensors; + tensors.reserve(values.size()); + for (auto& value : values) { + tensors.emplace_back( + DatasetOpsTestBase::CreateTensor(TensorShape({}), {value})); + } + return tensors; +} + +// Test case 1: normal case with reshuffle_each_iteration = false +TestCase TestCase1() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {3}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ false, + /*expected_outputs*/ + ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +// Test case 2: normal case with reshuffle_each_iteration = true +TestCase TestCase2() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ true, + /*expected_outputs*/ + ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +// Test case 3: special case with buffer_size = 1 & +// reshuffle_each_iteration = true +TestCase TestCase3() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ true, + /*expected_outputs*/ + ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +TestCase InvalidBufferSizeTestCase() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {-1}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ true, + /*expected_outputs*/ ConvertToTensorVec({}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +class ParameterizedShuffleDatasetOpTest + : public ShuffleDatasetOpTest, + public ::testing::WithParamInterface {}; + +TEST_P(ParameterizedShuffleDatasetOpTest, GetNext) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK( + shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + + bool end_of_sequence = false; + std::vector out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + } + + // When `buffer_size = 1`, the output sequence of `ShuffleDataset` will be in + // order, so we need to consider the element sequence when evaluating the + // result for this case. + bool expect_items_equal = test_case.buffer_size.flat()(0) > 1; + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*expect_items_equal*/ expect_items_equal)); +} + +TEST_F(ShuffleDatasetOpTest, DatasetNodeName) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + EXPECT_EQ(shuffle_dataset->node_name(), kNodeName); +} + +TEST_F(ShuffleDatasetOpTest, DatasetTypeString) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + EXPECT_EQ(shuffle_dataset->type_string(), kOpName); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputDtypes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + TF_EXPECT_OK(VerifyTypesMatch(shuffle_dataset->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + TF_EXPECT_OK(VerifyShapesCompatible(shuffle_dataset->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, Cardinality) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + TF_EXPECT_OK(VerifyShapesCompatible(shuffle_dataset->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, DatasetSave) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr serialization_context; + TF_ASSERT_OK(CreateSerializationContext(&serialization_context)); + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_ASSERT_OK(shuffle_dataset->Save(serialization_context.get(), &writer)); + TF_ASSERT_OK(writer.Flush()); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputDtypes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK( + shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + + TF_EXPECT_OK(VerifyTypesMatch(iterator->output_dtypes(), + test_case.expected_output_dtypes)); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputShapes) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK( + shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + + TF_EXPECT_OK(VerifyShapesCompatible(iterator->output_shapes(), + test_case.expected_output_shapes)); +} + +TEST_F(ShuffleDatasetOpTest, IteratorOutputPrefix) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = TestCase1(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK( + shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + + EXPECT_EQ(iterator->prefix(), "Iterator::Shuffle"); +} + +TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = GetParam(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset)); + core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + + std::unique_ptr iterator_ctx; + TF_ASSERT_OK( + CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + std::unique_ptr iterator; + TF_ASSERT_OK( + shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + + std::unique_ptr serialization_ctx; + TF_ASSERT_OK(CreateSerializationContext(&serialization_ctx)); + + bool end_of_sequence = false; + std::vector out_tensors; + int cur_iteration = 0; + const std::vector& breakpoints = test_case.breakpoints; + for (int breakpoint : breakpoints) { + VariantTensorData data; + VariantTensorDataWriter writer(&data); + TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer)); + TF_EXPECT_OK(writer.Flush()); + VariantTensorDataReader reader(&data); + TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, "Iterator", + *shuffle_dataset, &iterator)); + + while (cur_iteration <= breakpoint) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + cur_iteration++; + } + } + + // When `buffer_size = 1`, the output sequence of `ShuffleDataset` will be in + // order, so we need to consider the element sequence when evaluating the + // result for this case. + bool expect_items_equal = test_case.buffer_size.flat()(0) > 1; + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, + /*expect_items_equal*/ expect_items_equal)); +} + +INSTANTIATE_TEST_SUITE_P(ShuffleDatasetOpTest, + ParameterizedShuffleDatasetOpTest, + ::testing::ValuesIn(std::vector( + {TestCase1(), TestCase2(), TestCase3()}))); + +TEST_F(ShuffleDatasetOpTest, InvalidBufferSize) { + int thread_num = 2, cpu_num = 2; + TestCase test_case = InvalidBufferSizeTestCase(); + TF_ASSERT_OK(InitThreadPool(thread_num)); + TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); + + std::unique_ptr shuffle_dataset_kernel; + TF_ASSERT_OK(CreateShuffleDatasetOpKernel( + test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, + test_case.expected_output_shapes, &shuffle_dataset_kernel)); + + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + + std::unique_ptr shuffle_dataset_context; + TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), + &inputs, &shuffle_dataset_context)); + DatasetBase* shuffle_dataset; + EXPECT_EQ(CreateDataset(shuffle_dataset_kernel.get(), + shuffle_dataset_context.get(), &shuffle_dataset) + .code(), + tensorflow::error::INVALID_ARGUMENT); +} + +} // namespace +} // namespace data +} // namespace tensorflow From 1fa2e34d1156f4edf351481e23da78d405e3db9b Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Mon, 22 Apr 2019 13:57:59 -0700 Subject: [PATCH 3/6] Refactor ExpectEqual() function 1. Rename the argument name from `expect_items_equal` to `compare_order` 2. Enable the function to return tensorflow::errors::Internal if the two input tensor (vectors) are not equal. --- .../core/kernels/data/dataset_test_base.cc | 41 +++++++++++++++---- .../core/kernels/data/dataset_test_base.h | 4 +- .../parallel_interleave_dataset_op_test.cc | 4 +- .../data/parallel_map_dataset_op_test.cc | 4 +- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/data/dataset_test_base.cc b/tensorflow/core/kernels/data/dataset_test_base.cc index 54de6888508..6765a5af74d 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.cc +++ b/tensorflow/core/kernels/data/dataset_test_base.cc @@ -18,12 +18,39 @@ limitations under the License. namespace tensorflow { namespace data { +template +Status IsEqual(const Tensor& t1, const Tensor& t2) { + if (t1.dtype() != t2.dtype()) { + return tensorflow::errors::Internal( + "Two tensors have different dtypes: ", DataTypeString(t1.dtype()), + " vs. ", DataTypeString(t2.dtype())); + } + if (!t1.IsSameSize(t2)) { + return tensorflow::errors::Internal( + "Two tensors have different shapes: ", t1.shape().DebugString(), + " vs. ", t2.shape().DebugString()); + } + + auto flat_t1 = t1.flat(); + auto flat_t2 = t2.flat(); + auto length = flat_t1.size(); + + for (int i = 0; i < length; ++i) { + if (flat_t1(i) != flat_t2(i)) { + return tensorflow::errors::Internal( + "Two tensors have different values " + "at [", + i, "]: ", flat_t1(i), " vs. ", flat_t2(i)); + } + } + return Status::OK(); +} + Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) { - EXPECT_EQ(a.dtype(), b.dtype()); switch (a.dtype()) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - test::ExpectTensorEqual(a, b); \ +#define CASE(DT) \ + case DataTypeToEnum
::value: \ + TF_RETURN_IF_ERROR(IsEqual
(a, b)); \ break; TF_CALL_NUMBER_TYPES(CASE); TF_CALL_string(CASE); @@ -36,7 +63,7 @@ Status DatasetOpsTestBase::ExpectEqual(const Tensor& a, const Tensor& b) { } template -bool compare(Tensor t1, Tensor t2) { +bool compare(const Tensor& t1, const Tensor& t2) { auto flat_t1 = t1.flat(); auto flat_t2 = t2.flat(); auto length = std::min(flat_t1.size(), flat_t2.size()); @@ -49,7 +76,7 @@ bool compare(Tensor t1, Tensor t2) { Status DatasetOpsTestBase::ExpectEqual(std::vector produced_tensors, std::vector expected_tensors, - bool expect_items_equal) { + bool compare_order) { if (produced_tensors.size() != expected_tensors.size()) { return Status(tensorflow::errors::Internal( "The two tensor vectors have different size (", produced_tensors.size(), @@ -64,7 +91,7 @@ Status DatasetOpsTestBase::ExpectEqual(std::vector produced_tensors, ")")); } - if (expect_items_equal) { + if (!compare_order) { const DataType& dtype = produced_tensors[0].dtype(); switch (dtype) { #define CASE(DT) \ diff --git a/tensorflow/core/kernels/data/dataset_test_base.h b/tensorflow/core/kernels/data/dataset_test_base.h index ca2be6b9258..803ae9055a1 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.h +++ b/tensorflow/core/kernels/data/dataset_test_base.h @@ -52,11 +52,11 @@ class DatasetOpsTestBase : public ::testing::Test { static Status ExpectEqual(const Tensor& a, const Tensor& b); // The method validates whether the two tensor vectors have the same tensors. - // If `expect_items_equal` is true, the method will only evaluate the two + // If `compare_order` is false, the method will only evaluate the two // vectors have the same elements regardless of order. static Status ExpectEqual(std::vector produced_tensors, std::vector expected_tensors, - bool expect_items_equal); + bool compare_order); // Creates a tensor with the specified dtype, shape, and value. template diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc index 3c3d1dec2b0..6f30cce3fe1 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op_test.cc @@ -494,7 +494,7 @@ TEST_P(ParameterizedParallelInterleaveDatasetOpTest, GetNext) { } TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ test_case.sloppy)); + /*compare_order*/ !test_case.sloppy)); } TEST_F(ParallelInterleaveDatasetOpTest, InvalidArguments) { @@ -949,7 +949,7 @@ TEST_P(ParameterizedParallelInterleaveDatasetOpTest, Roundtrip) { } TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ test_case.sloppy)); + /*compare_order*/ !test_case.sloppy)); } INSTANTIATE_TEST_SUITE_P( diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc index dc1ff9f5094..abb6e81aff6 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc @@ -334,7 +334,7 @@ TEST_P(ParameterizedParallelMapDatasetOpTest, GetNext) { } TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ test_case.sloppy)); + /*compare_order*/ !test_case.sloppy)); } TEST_F(ParallelMapDatasetOpTest, DatasetNodeName) { @@ -769,7 +769,7 @@ TEST_P(ParameterizedParallelMapDatasetOpTest, Roundtrip) { } TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ test_case.sloppy)); + /*compare_order*/ !test_case.sloppy)); } TEST_F(ParallelMapDatasetOpTest, InvalidNumParallelCalls) { From 6702143f14d1fbf21a3d556ac50c29f96c3fd024 Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Mon, 22 Apr 2019 19:42:13 -0700 Subject: [PATCH 4/6] Improve the Cardinality function and validate the input count --- tensorflow/core/kernels/data/shuffle_dataset_op.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index f426e3cc465..287a7c946c0 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -63,7 +63,15 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { return input_->output_shapes(); } - int64 Cardinality() const override { return input_->Cardinality(); } + int64 Cardinality() const override { + if (count_ == -1 || input_->Cardinality() == kInfiniteCardinality) { + return kInfiniteCardinality; + } else if (input_->Cardinality() == kUnknownCardinality) { + return kUnknownCardinality; + } else { + return input_->Cardinality() * count_; + } + } protected: template @@ -645,6 +653,10 @@ class ShuffleAndRepeatDatasetOp : public ShuffleDatasetOpBase { int64 count; OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "count", &count)); + OP_REQUIRES(ctx, count > 0 || count == -1, + errors::InvalidArgument( + "count must be greater than zero or equal to -1.")); + // By TensorFlow convention, if both seeds are 0, then shuffling should be // seeded non-deterministically. if (seed == 0 && seed2 == 0) { From d469a23e1e9950e4285fabd542f4348649eafe7e Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Mon, 22 Apr 2019 19:43:24 -0700 Subject: [PATCH 5/6] Enhance ShuffleDataset tests and add tests for ShuffleAndRepeatDataset --- .../kernels/data/shuffle_dataset_op_test.cc | 690 ++++++++++++------ 1 file changed, 470 insertions(+), 220 deletions(-) diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc b/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc index dbf8c9a5a70..38b93f13808 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op_test.cc @@ -16,30 +16,41 @@ namespace tensorflow { namespace data { namespace { -constexpr char kNodeName[] = "shuffle_dataset"; -constexpr char kOpName[] = "ShuffleDataset"; +constexpr char kShuffleNodeName[] = "shuffle_dataset"; +constexpr char kShuffleOpName[] = "ShuffleDataset"; +constexpr char kShuffleAndRepeatNodeName[] = "shuffle_and_repeat_dataset"; +constexpr char kShuffleAndRepeatOpName[] = "ShuffleAndRepeatDataset"; class ShuffleDatasetOpTest : public DatasetOpsTestBase { protected: - // Creates a new `ShuffleDataset` op kernel - Status CreateShuffleDatasetOpKernel( - bool reshuffle_each_iteration, const DataTypeVector& output_types, + // Creates a new `ShuffleDataset`/`ShuffleAndRepeatDataset` op kernel + Status CreateDatasetOpKernel( + int64 count, bool reshuffle_each_iteration, + const DataTypeVector& output_types, const std::vector& output_shapes, std::unique_ptr* shuffle_dataset_kernel) { - NodeDef node_def = test::function::NDef( - kNodeName, kOpName, {"input_dataset", "buffer_size", "seed", "seed2"}, - {{"reshuffle_each_iteration", reshuffle_each_iteration}, - {"output_types", output_types}, - {"output_shapes", output_shapes}}); + NodeDef node_def; + if (count == 1) { + node_def = test::function::NDef( + kShuffleNodeName, kShuffleOpName, + {"input_dataset", "buffer_size", "seed", "seed2"}, + {{"reshuffle_each_iteration", reshuffle_each_iteration}, + {"output_types", output_types}, + {"output_shapes", output_shapes}}); + } else { + node_def = test::function::NDef( + kShuffleAndRepeatNodeName, kShuffleAndRepeatOpName, + {"input_dataset", "buffer_size", "seed", "seed2", "count"}, + {{"output_types", output_types}, {"output_shapes", output_shapes}}); + } TF_RETURN_IF_ERROR(CreateOpKernel(node_def, shuffle_dataset_kernel)); return Status::OK(); } - // Creates a new `ShuffleDataset` op kernel context. - Status CreateShuffleDatasetContext( - OpKernel* const op_kernel, - gtl::InlinedVector* const inputs, - std::unique_ptr* context) { + // Creates a new `ShuffleDataset`/`ShuffleAndRepeatDataset` op kernel context. + Status CreateDatasetContext(OpKernel* const op_kernel, + gtl::InlinedVector* const inputs, + std::unique_ptr* context) { TF_RETURN_IF_ERROR(CheckOpKernelInput(*op_kernel, *inputs)); TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context)); return Status::OK(); @@ -57,8 +68,10 @@ struct TestCase { Tensor buffer_size; Tensor seed; Tensor seed2; + Tensor count; bool reshuffle_each_iteration; - std::vector expected_outputs; + std::vector expected_shuffle_outputs; + std::vector expected_reshuffle_outputs; DataTypeVector expected_output_dtypes; std::vector expected_output_shapes; int64 expected_cardinality; @@ -76,7 +89,7 @@ std::vector ConvertToTensorVec(std::vector values) { return tensors; } -// Test case 1: normal case with reshuffle_each_iteration = false +// Test case 1: test shuffle_dataset with reshuffle_each_iteration = false. TestCase TestCase1() { return { /*range_data_param*/ {0, 10, 1}, @@ -84,16 +97,19 @@ TestCase TestCase1() { DatasetOpsTestBase::CreateTensor(TensorShape({}), {3}), /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*reshuffle_each_iteration*/ false, - /*expected_outputs*/ - ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_shuffle_outputs*/ + ConvertToTensorVec({2, 3, 0, 5, 6, 4, 7, 8, 9, 1}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({2, 3, 0, 5, 6, 4, 7, 8, 9, 1}), /*expected_output_dtypes*/ {DT_INT64}, /*expected_output_shapes*/ {PartialTensorShape({})}, /*expected_cardinality*/ 10, /*breakpoints*/ {0, 1, 9}}; } -// Test case 2: normal case with reshuffle_each_iteration = true +// Test case 2: test shuffle_dataset with reshuffle_each_iteration = true. TestCase TestCase2() { return { /*range_data_param*/ {0, 10, 1}, @@ -101,26 +117,73 @@ TestCase TestCase2() { DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*reshuffle_each_iteration*/ true, - /*expected_outputs*/ - ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_shuffle_outputs*/ + ConvertToTensorVec({2, 6, 1, 3, 9, 5, 0, 8, 7, 4}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({1, 6, 0, 5, 2, 7, 4, 3, 9, 8}), /*expected_output_dtypes*/ {DT_INT64}, /*expected_output_shapes*/ {PartialTensorShape({})}, /*expected_cardinality*/ 10, /*breakpoints*/ {0, 1, 9}}; } -// Test case 3: special case with buffer_size = 1 & -// reshuffle_each_iteration = true +// Test case 3: similar with the test case 2 but a smaller buffer size than +// the input dataset. TestCase TestCase3() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*reshuffle_each_iteration*/ true, + /*expected_shuffle_outputs*/ + ConvertToTensorVec({0, 2, 1, 3, 5, 6, 4, 7, 8, 9}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({1, 0, 2, 3, 4, 5, 6, 7, 9, 8}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +// Test case 4: similar with the test case 2 but has different seeds. +TestCase TestCase4() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*reshuffle_each_iteration*/ true, + /*expected_shuffle_outputs*/ + ConvertToTensorVec({3, 0, 8, 1, 5, 4, 7, 2, 6, 9}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({4, 6, 9, 0, 1, 8, 2, 7, 3, 5}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 10, + /*breakpoints*/ {0, 1, 9}}; +} + +// Test case 5: test shuffle_dataset with buffer_size = 1 & +// reshuffle_each_iteration = true. +TestCase TestCase5() { return { /*range_data_param*/ {0, 10, 1}, /*buffer_size*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*reshuffle_each_iteration*/ true, - /*expected_outputs*/ + /*expected_shuffle_outputs*/ + ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), + /*expected_reshuffle_outputs*/ ConvertToTensorVec({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), /*expected_output_dtypes*/ {DT_INT64}, /*expected_output_shapes*/ {PartialTensorShape({})}, @@ -128,21 +191,125 @@ TestCase TestCase3() { /*breakpoints*/ {0, 1, 9}}; } -TestCase InvalidBufferSizeTestCase() { +// Test case 6: test shuffle_dataset with an empty input dataset. +TestCase TestCase6() { + return { + /*range_data_param*/ {0, 0, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*reshuffle_each_iteration*/ true, + /*expected_shuffle_outputs*/ + ConvertToTensorVec({}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 0, + /*breakpoints*/ {0, 1, 9}}; +} + +// Test case 7: test shuffle_and_repeat_dataset with buffer_size = 10 & +// count = 2. +TestCase TestCase7() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ false, + /*expected_shuffle_outputs*/ + ConvertToTensorVec( + {9, 0, 8, 6, 1, 3, 7, 2, 4, 5, 4, 3, 0, 5, 8, 2, 6, 9, 7, 1}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec( + {9, 0, 8, 6, 1, 3, 7, 2, 4, 5, 4, 3, 0, 5, 8, 2, 6, 9, 7, 1}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 20, + /*breakpoints*/ {0, 5, 22}}; +} + +// Test case 8: test shuffle_and_repeat_dataset with buffer_size = 10 & +// count = -1 +TestCase TestCase8() { + return { + /*range_data_param*/ {0, 3, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {-1}), + /*reshuffle_each_iteration*/ false, + /*expected_shuffle_outputs*/ + ConvertToTensorVec( + {2, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0, 2, 2, 0, 1, 1, 0, 2, 2, 1, 0}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec( + {2, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0, 2, 2, 0, 1, 1, 0, 2, 2, 1, 0}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ kInfiniteCardinality, + /*breakpoints*/ {0, 5, 20}}; +} + +TestCase InvalidBufferSizeTestCaseForShuffleDataset() { return { /*range_data_param*/ {0, 10, 1}, /*buffer_size*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {-1}), /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), /*reshuffle_each_iteration*/ true, - /*expected_outputs*/ ConvertToTensorVec({}), + /*expected_shuffle_outputs*/ ConvertToTensorVec({}), + /*expected_reshuffle_outputs*/ ConvertToTensorVec({}), /*expected_output_dtypes*/ {DT_INT64}, /*expected_output_shapes*/ {PartialTensorShape({})}, - /*expected_cardinality*/ 10, + /*expected_cardinality*/ 0, /*breakpoints*/ {0, 1, 9}}; } +TestCase InvalidBufferSizeTestCaseForShuffleAndRepeatDataset() { + return { + /*range_data_param*/ {0, 10, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {-1}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*reshuffle_each_iteration*/ true, + /*expected_shuffle_outputs*/ ConvertToTensorVec({}), + /*expected_reshuffle_outputs*/ ConvertToTensorVec({}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 0, + /*breakpoints*/ {0, 1, 9}}; +} + +TestCase InvalidCountTestCaseForShuffleAndRepeatDataset() { + return { + /*range_data_param*/ {0, 3, 1}, + /*buffer_size*/ + DatasetOpsTestBase::CreateTensor(TensorShape({}), {10}), + /*seed*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {1}), + /*seed2*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {2}), + /*count*/ DatasetOpsTestBase::CreateTensor(TensorShape({}), {0}), + /*reshuffle_each_iteration*/ false, + /*expected_shuffle_outputs*/ + ConvertToTensorVec({}), + /*expected_reshuffle_outputs*/ + ConvertToTensorVec({}), + /*expected_output_dtypes*/ {DT_INT64}, + /*expected_output_shapes*/ {PartialTensorShape({})}, + /*expected_cardinality*/ 0, + /*breakpoints*/ {0, 5, 20}}; +} + class ParameterizedShuffleDatasetOpTest : public ShuffleDatasetOpTest, public ::testing::WithParamInterface {}; @@ -153,10 +320,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, GetNext) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -170,49 +340,78 @@ TEST_P(ParameterizedShuffleDatasetOpTest, GetNext) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr iterator_ctx; - TF_ASSERT_OK( - CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + TF_ASSERT_OK(CreateIteratorContext(dataset_context.get(), &iterator_ctx)); std::unique_ptr iterator; TF_ASSERT_OK( - shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); bool end_of_sequence = false; - std::vector out_tensors; + std::vector shuffled_out_tensors; while (!end_of_sequence) { std::vector next; TF_EXPECT_OK( iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); - out_tensors.insert(out_tensors.end(), next.begin(), next.end()); + shuffled_out_tensors.insert(shuffled_out_tensors.end(), next.begin(), + next.end()); + // For the forever-repeat case, we test only a finite number of steps of + // the infinite sequence. + if (count_value == -1 && shuffled_out_tensors.size() == + test_case.expected_shuffle_outputs.size()) { + break; + } } - // When `buffer_size = 1`, the output sequence of `ShuffleDataset` will be in - // order, so we need to consider the element sequence when evaluating the - // result for this case. - bool expect_items_equal = test_case.buffer_size.flat()(0) > 1; - TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ expect_items_equal)); + // Reshuffle the dataset. + end_of_sequence = false; + TF_ASSERT_OK( + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + std::vector reshuffled_out_tensors; + while (!end_of_sequence) { + std::vector next; + TF_EXPECT_OK( + iterator->GetNext(iterator_ctx.get(), &next, &end_of_sequence)); + reshuffled_out_tensors.insert(reshuffled_out_tensors.end(), next.begin(), + next.end()); + // For the forever-repeat case, we test only a finite number of steps of + // the infinite sequence. + if (count_value == -1 && reshuffled_out_tensors.size() == + test_case.expected_shuffle_outputs.size()) { + break; + } + } + + TF_EXPECT_OK(ExpectEqual(shuffled_out_tensors, + test_case.expected_shuffle_outputs, + /*compare_order*/ true)); + TF_EXPECT_OK(ExpectEqual(reshuffled_out_tensors, + test_case.expected_reshuffle_outputs, + /*compare_order*/ true)); } -TEST_F(ShuffleDatasetOpTest, DatasetNodeName) { +TEST_P(ParameterizedShuffleDatasetOpTest, DatasetNodeName) { int thread_num = 2, cpu_num = 2; - TestCase test_case = TestCase1(); + TestCase test_case = GetParam(); TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -226,28 +425,36 @@ TEST_F(ShuffleDatasetOpTest, DatasetNodeName) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); - EXPECT_EQ(shuffle_dataset->node_name(), kNodeName); + if (count_value == 1) { + EXPECT_EQ(dataset->node_name(), kShuffleNodeName); + } else { + EXPECT_EQ(dataset->node_name(), kShuffleAndRepeatNodeName); + } } -TEST_F(ShuffleDatasetOpTest, DatasetTypeString) { +TEST_P(ParameterizedShuffleDatasetOpTest, DatasetTypeString) { int thread_num = 2, cpu_num = 2; - TestCase test_case = TestCase1(); + TestCase test_case = GetParam(); TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -261,16 +468,21 @@ TEST_F(ShuffleDatasetOpTest, DatasetTypeString) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); - EXPECT_EQ(shuffle_dataset->type_string(), kOpName); + if (count_value == 1) { + EXPECT_EQ(dataset->type_string(), kShuffleOpName); + } else { + EXPECT_EQ(dataset->type_string(), kShuffleAndRepeatOpName); + } } TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputDtypes) { @@ -279,10 +491,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputDtypes) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -296,16 +511,17 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputDtypes) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); - TF_EXPECT_OK(VerifyTypesMatch(shuffle_dataset->output_dtypes(), + TF_EXPECT_OK(VerifyTypesMatch(dataset->output_dtypes(), test_case.expected_output_dtypes)); } @@ -315,10 +531,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputShapes) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -332,16 +551,17 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetOutputShapes) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); - TF_EXPECT_OK(VerifyShapesCompatible(shuffle_dataset->output_shapes(), + TF_EXPECT_OK(VerifyShapesCompatible(dataset->output_shapes(), test_case.expected_output_shapes)); } @@ -351,10 +571,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Cardinality) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -368,17 +591,17 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Cardinality) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); - TF_EXPECT_OK(VerifyShapesCompatible(shuffle_dataset->output_shapes(), - test_case.expected_output_shapes)); + EXPECT_EQ(dataset->Cardinality(), test_case.expected_cardinality); } TEST_P(ParameterizedShuffleDatasetOpTest, DatasetSave) { @@ -387,10 +610,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetSave) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -404,20 +630,21 @@ TEST_P(ParameterizedShuffleDatasetOpTest, DatasetSave) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr serialization_context; TF_ASSERT_OK(CreateSerializationContext(&serialization_context)); VariantTensorData data; VariantTensorDataWriter writer(&data); - TF_ASSERT_OK(shuffle_dataset->Save(serialization_context.get(), &writer)); + TF_ASSERT_OK(dataset->Save(serialization_context.get(), &writer)); TF_ASSERT_OK(writer.Flush()); } @@ -427,10 +654,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputDtypes) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -444,21 +674,21 @@ TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputDtypes) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr iterator_ctx; - TF_ASSERT_OK( - CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + TF_ASSERT_OK(CreateIteratorContext(dataset_context.get(), &iterator_ctx)); std::unique_ptr iterator; TF_ASSERT_OK( - shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); TF_EXPECT_OK(VerifyTypesMatch(iterator->output_dtypes(), test_case.expected_output_dtypes)); @@ -470,10 +700,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputShapes) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -487,36 +720,39 @@ TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputShapes) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr iterator_ctx; - TF_ASSERT_OK( - CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + TF_ASSERT_OK(CreateIteratorContext(dataset_context.get(), &iterator_ctx)); std::unique_ptr iterator; TF_ASSERT_OK( - shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); TF_EXPECT_OK(VerifyShapesCompatible(iterator->output_shapes(), test_case.expected_output_shapes)); } -TEST_F(ShuffleDatasetOpTest, IteratorOutputPrefix) { +TEST_P(ParameterizedShuffleDatasetOpTest, IteratorOutputPrefix) { int thread_num = 2, cpu_num = 2; - TestCase test_case = TestCase1(); + TestCase test_case = GetParam(); TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -530,23 +766,27 @@ TEST_F(ShuffleDatasetOpTest, IteratorOutputPrefix) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr iterator_ctx; - TF_ASSERT_OK( - CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + TF_ASSERT_OK(CreateIteratorContext(dataset_context.get(), &iterator_ctx)); std::unique_ptr iterator; TF_ASSERT_OK( - shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); - EXPECT_EQ(iterator->prefix(), "Iterator::Shuffle"); + if (count_value == 1) { + EXPECT_EQ(iterator->prefix(), "Iterator::Shuffle"); + } else { + EXPECT_EQ(iterator->prefix(), "Iterator::ShuffleAndRepeat"); + } } TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { @@ -555,10 +795,13 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK( + CreateDatasetOpKernel(count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, + test_case.expected_output_shapes, &dataset_kernel)); DatasetBase* range_dataset; TF_ASSERT_OK(CreateRangeDataset( @@ -572,21 +815,21 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { Tensor seed2 = test_case.seed2; gtl::InlinedVector inputs( {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - TF_ASSERT_OK(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset)); - core::ScopedUnref scoped_unref_shuffle_dataset(shuffle_dataset); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* dataset; + TF_ASSERT_OK( + CreateDataset(dataset_kernel.get(), dataset_context.get(), &dataset)); + core::ScopedUnref scoped_unref_dataset(dataset); std::unique_ptr iterator_ctx; - TF_ASSERT_OK( - CreateIteratorContext(shuffle_dataset_context.get(), &iterator_ctx)); + TF_ASSERT_OK(CreateIteratorContext(dataset_context.get(), &iterator_ctx)); std::unique_ptr iterator; TF_ASSERT_OK( - shuffle_dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); + dataset->MakeIterator(iterator_ctx.get(), "Iterator", &iterator)); std::unique_ptr serialization_ctx; TF_ASSERT_OK(CreateSerializationContext(&serialization_ctx)); @@ -602,7 +845,7 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { TF_EXPECT_OK(writer.Flush()); VariantTensorDataReader reader(&data); TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, "Iterator", - *shuffle_dataset, &iterator)); + *dataset, &iterator)); while (cur_iteration <= breakpoint) { std::vector next; @@ -613,51 +856,58 @@ TEST_P(ParameterizedShuffleDatasetOpTest, Roundtrip) { } } - // When `buffer_size = 1`, the output sequence of `ShuffleDataset` will be in - // order, so we need to consider the element sequence when evaluating the - // result for this case. - bool expect_items_equal = test_case.buffer_size.flat()(0) > 1; - TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_outputs, - /*expect_items_equal*/ expect_items_equal)); + TF_EXPECT_OK(ExpectEqual(out_tensors, test_case.expected_shuffle_outputs, + /*compare_order*/ true)); } INSTANTIATE_TEST_SUITE_P(ShuffleDatasetOpTest, ParameterizedShuffleDatasetOpTest, ::testing::ValuesIn(std::vector( - {TestCase1(), TestCase2(), TestCase3()}))); + {TestCase1(), TestCase2(), TestCase3(), + TestCase4(), TestCase5(), TestCase6(), + TestCase7(), TestCase8()}))); -TEST_F(ShuffleDatasetOpTest, InvalidBufferSize) { +TEST_F(ShuffleDatasetOpTest, InvalidArguments) { int thread_num = 2, cpu_num = 2; - TestCase test_case = InvalidBufferSizeTestCase(); + std::vector test_cases = { + InvalidBufferSizeTestCaseForShuffleDataset(), + InvalidBufferSizeTestCaseForShuffleAndRepeatDataset(), + InvalidCountTestCaseForShuffleAndRepeatDataset()}; TF_ASSERT_OK(InitThreadPool(thread_num)); TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num)); - std::unique_ptr shuffle_dataset_kernel; - TF_ASSERT_OK(CreateShuffleDatasetOpKernel( - test_case.reshuffle_each_iteration, test_case.expected_output_dtypes, - test_case.expected_output_shapes, &shuffle_dataset_kernel)); + for (const auto& test_case : test_cases) { + Tensor count = test_case.count; + int64 count_value = count.flat()(0); + std::unique_ptr dataset_kernel; + TF_ASSERT_OK(CreateDatasetOpKernel( + count_value, test_case.reshuffle_each_iteration, + test_case.expected_output_dtypes, test_case.expected_output_shapes, + &dataset_kernel)); - DatasetBase* range_dataset; - TF_ASSERT_OK(CreateRangeDataset( - test_case.range_data_param.start, test_case.range_data_param.end, - test_case.range_data_param.step, "range", &range_dataset)); - Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); - TF_ASSERT_OK( - StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); - Tensor buffer_size = test_case.buffer_size; - Tensor seed = test_case.seed; - Tensor seed2 = test_case.seed2; - gtl::InlinedVector inputs( - {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + DatasetBase* range_dataset; + TF_ASSERT_OK(CreateRangeDataset( + test_case.range_data_param.start, test_case.range_data_param.end, + test_case.range_data_param.step, "range", &range_dataset)); + Tensor range_dataset_tensor(DT_VARIANT, TensorShape({})); + TF_ASSERT_OK( + StoreDatasetInVariantTensor(range_dataset, &range_dataset_tensor)); + Tensor buffer_size = test_case.buffer_size; + Tensor seed = test_case.seed; + Tensor seed2 = test_case.seed2; + gtl::InlinedVector inputs( + {&range_dataset_tensor, &buffer_size, &seed, &seed2}); + if (count_value != 1) inputs.push_back(&count); - std::unique_ptr shuffle_dataset_context; - TF_ASSERT_OK(CreateShuffleDatasetContext(shuffle_dataset_kernel.get(), - &inputs, &shuffle_dataset_context)); - DatasetBase* shuffle_dataset; - EXPECT_EQ(CreateDataset(shuffle_dataset_kernel.get(), - shuffle_dataset_context.get(), &shuffle_dataset) - .code(), - tensorflow::error::INVALID_ARGUMENT); + std::unique_ptr dataset_context; + TF_ASSERT_OK( + CreateDatasetContext(dataset_kernel.get(), &inputs, &dataset_context)); + DatasetBase* shuffle_dataset; + EXPECT_EQ(CreateDataset(dataset_kernel.get(), dataset_context.get(), + &shuffle_dataset) + .code(), + tensorflow::error::INVALID_ARGUMENT); + } } } // namespace From 9ea146a83b5ea9d2d1dceee54ff2994e36b1335b Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Tue, 23 Apr 2019 11:31:07 -0700 Subject: [PATCH 6/6] Revise the docstring for ExpectEqual() --- tensorflow/core/kernels/data/dataset_test_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/dataset_test_base.h b/tensorflow/core/kernels/data/dataset_test_base.h index 803ae9055a1..d82a0c38583 100644 --- a/tensorflow/core/kernels/data/dataset_test_base.h +++ b/tensorflow/core/kernels/data/dataset_test_base.h @@ -52,7 +52,7 @@ class DatasetOpsTestBase : public ::testing::Test { static Status ExpectEqual(const Tensor& a, const Tensor& b); // The method validates whether the two tensor vectors have the same tensors. - // If `compare_order` is false, the method will only evaluate the two + // If `compare_order` is false, the method will only evaluate whether the two // vectors have the same elements regardless of order. static Status ExpectEqual(std::vector produced_tensors, std::vector expected_tensors,