From b3d45cd17cda6d0df210c3d99ec86fbe6bf5301b Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 9 Nov 2020 15:38:16 -0800 Subject: [PATCH] [tf.data] Apply gradient descent method as default algorithm for autotuning optimization. PiperOrigin-RevId: 341499875 Change-Id: Ie2eab5ed5e85e0c9afac1fb5b612057e51bd0e12 --- .../core/kernels/data/optimize_dataset_op.cc | 4 +- .../kernel_tests/optimize_dataset_test.py | 38 +++++++++++++++++-- .../experimental/ops/optimization_options.py | 4 +- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index 15a035e808a..b3df18a53c7 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -84,6 +84,7 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // of the Borg jobs, the experiments will be randomly turned on. // clang-format off absl::flat_hash_map live_experiments = { + {"enable_gradient_descent", 100}, {"map_parallelization", 20} }; // clang-format on @@ -110,9 +111,6 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // The vector stores the graduated experiment names which will be turned on // for all input pipelines. - // - // Note some of the graduated experiments may be hard coded, so not listed - // below. // clang-format off std::vector graduated_experiments = {"disable_intra_op_parallelism"}; // clang-format on diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index f731a714cab..ef22a5ba6f5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -245,6 +245,38 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertDatasetProduces(dataset, expected_output=expected_output) + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(autotune=False, autotune_buffers=False) + + combinations.combine(autotune=True, autotune_buffers=False) + + combinations.combine(autotune=True, autotune_buffers=True), + combinations.combine(set_env=[False, True]))) + def testOptimizationEnableGradientDescent(self, autotune, autotune_buffers, + set_env): + if set_env: + os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "enable_gradient_descent" + os.environ["TF_JOB_NAME"] = "test_job" + + dataset = dataset_ops.Dataset.range(5) + dataset = dataset.prefetch(buffer_size=-1) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=2) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) + dataset = dataset.prefetch(buffer_size=3) + dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1) + dataset = dataset.prefetch(buffer_size=1) + + options = dataset_ops.Options() + options.experimental_optimization.autotune = autotune + options.experimental_optimization.autotune_buffers = autotune_buffers + dataset = dataset.with_options(options) + + self.assertDatasetProduces(dataset, expected_output=list(range(3, 8))) + + if set_env: + del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] + del os.environ["TF_JOB_NAME"] + @combinations.generate( combinations.times( test_base.default_test_combinations(), @@ -543,16 +575,14 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): if autotune_buffers is True: # pylint: disable=g-bool-id-comparison self.assertIn("autotune_buffer_sizes", graph_rewrites.enabled) self.assertIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled) + self.assertEqual(algorithm, + optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) else: self.assertNotIn("autotune_buffer_sizes", graph_rewrites.enabled) self.assertNotIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled) - if autotune_buffers is False: # pylint: disable=g-bool-id-comparison self.assertEqual(algorithm, optimization_options._AutotuneAlgorithm.HILL_CLIMB) - else: - self.assertEqual(algorithm, - optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT) @combinations.generate( combinations.times( diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index a2d6c77cfb7..5c69855e15f 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -228,8 +228,8 @@ class OptimizationOptions(options.OptionsBase): # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by # default, which is more performant for tuning heterogeneous parameters. algorithm = ( - _AutotuneAlgorithm.HILL_CLIMB if self.autotune_buffers is False # pylint: disable=g-bool-id-comparison - else _AutotuneAlgorithm.GRADIENT_DESCENT) + _AutotuneAlgorithm.GRADIENT_DESCENT + if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB) cpu_budget = 0 # Indicates that all CPU cores should be used by default. ram_budget = 0 # Indicates that default value of RAM budget should be used.