From b3d45cd17cda6d0df210c3d99ec86fbe6bf5301b Mon Sep 17 00:00:00 2001
From: Jay Shi <xiaojies@google.com>
Date: Mon, 9 Nov 2020 15:38:16 -0800
Subject: [PATCH] [tf.data] Apply gradient descent method as default algorithm
 for autotuning optimization.

PiperOrigin-RevId: 341499875
Change-Id: Ie2eab5ed5e85e0c9afac1fb5b612057e51bd0e12
---
 .../core/kernels/data/optimize_dataset_op.cc  |  4 +-
 .../kernel_tests/optimize_dataset_test.py     | 38 +++++++++++++++++--
 .../experimental/ops/optimization_options.py  |  4 +-
 3 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc
index 15a035e808a..b3df18a53c7 100644
--- a/tensorflow/core/kernels/data/optimize_dataset_op.cc
+++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc
@@ -84,6 +84,7 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input,
     // of the Borg jobs, the experiments will be randomly turned on.
     // clang-format off
     absl::flat_hash_map<string, uint64> live_experiments = {
+        {"enable_gradient_descent", 100},
         {"map_parallelization", 20}
     };
     // clang-format on
@@ -110,9 +111,6 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input,
 
   // The vector stores the graduated experiment names which will be turned on
   // for all input pipelines.
-  //
-  // Note some of the graduated experiments may be hard coded, so not listed
-  // below.
   // clang-format off
   std::vector<string> graduated_experiments = {"disable_intra_op_parallelism"};
   // clang-format on
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
index f731a714cab..ef22a5ba6f5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
@@ -245,6 +245,38 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     self.assertDatasetProduces(dataset, expected_output=expected_output)
 
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(autotune=False, autotune_buffers=False) +
+          combinations.combine(autotune=True, autotune_buffers=False) +
+          combinations.combine(autotune=True, autotune_buffers=True),
+          combinations.combine(set_env=[False, True])))
+  def testOptimizationEnableGradientDescent(self, autotune, autotune_buffers,
+                                            set_env):
+    if set_env:
+      os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "enable_gradient_descent"
+      os.environ["TF_JOB_NAME"] = "test_job"
+
+    dataset = dataset_ops.Dataset.range(5)
+    dataset = dataset.prefetch(buffer_size=-1)
+    dataset = dataset.map(lambda x: x + 1, num_parallel_calls=2)
+    dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1)
+    dataset = dataset.prefetch(buffer_size=3)
+    dataset = dataset.map(lambda x: x + 1, num_parallel_calls=-1)
+    dataset = dataset.prefetch(buffer_size=1)
+
+    options = dataset_ops.Options()
+    options.experimental_optimization.autotune = autotune
+    options.experimental_optimization.autotune_buffers = autotune_buffers
+    dataset = dataset.with_options(options)
+
+    self.assertDatasetProduces(dataset, expected_output=list(range(3, 8)))
+
+    if set_env:
+      del os.environ["TF_DATA_EXPERIMENT_OPT_IN"]
+      del os.environ["TF_JOB_NAME"]
+
   @combinations.generate(
       combinations.times(
           test_base.default_test_combinations(),
@@ -543,16 +575,14 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     if autotune_buffers is True:  # pylint: disable=g-bool-id-comparison
       self.assertIn("autotune_buffer_sizes", graph_rewrites.enabled)
       self.assertIn("disable_prefetch_legacy_autotune", graph_rewrites.enabled)
+      self.assertEqual(algorithm,
+                       optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT)
     else:
       self.assertNotIn("autotune_buffer_sizes", graph_rewrites.enabled)
       self.assertNotIn("disable_prefetch_legacy_autotune",
                        graph_rewrites.enabled)
-    if autotune_buffers is False:  # pylint: disable=g-bool-id-comparison
       self.assertEqual(algorithm,
                        optimization_options._AutotuneAlgorithm.HILL_CLIMB)
-    else:
-      self.assertEqual(algorithm,
-                       optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT)
 
   @combinations.generate(
       combinations.times(
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index a2d6c77cfb7..5c69855e15f 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -228,8 +228,8 @@ class OptimizationOptions(options.OptionsBase):
     # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by
     # default, which is more performant for tuning heterogeneous parameters.
     algorithm = (
-        _AutotuneAlgorithm.HILL_CLIMB if self.autotune_buffers is False  # pylint: disable=g-bool-id-comparison
-        else _AutotuneAlgorithm.GRADIENT_DESCENT)
+        _AutotuneAlgorithm.GRADIENT_DESCENT
+        if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB)
     cpu_budget = 0  # Indicates that all CPU cores should be used by default.
     ram_budget = 0  # Indicates that default value of RAM budget should be used.