[tf.data] Adding deprecation to experimental APIs for which a non-experimental alternative exists, removing the tf.data.experimental.filter_for_shard API altogether if the favor of the tf.data.Dataset.shard API.

PiperOrigin-RevId: 238030932
2019-03-12 09:27:00 -07:00 · 2019-03-12 09:27:00 -07:00 · f1d30ce1be
commit f1d30ce1be
parent a4c589d5c7
11 changed files with 19 additions and 134 deletions
--- a/tensorflow/python/data/experimental/init.py
+++ b/tensorflow/python/data/experimental/init.py
@ -48,7 +48,6 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
@@copy_to_device
@@dense_to_sparse_batch
@@enumerate_dataset
-@@filter_for_shard
@@get_next_as_optional
@@get_single_element
@@group_by_reducer
@ -92,7 +91,6 @@ from tensorflow.python.data.experimental.ops.cardinality import UNKNOWN as UNKNO
 from tensorflow.python.data.experimental.ops.counter import Counter
 from tensorflow.python.data.experimental.ops.enumerate_ops import enumerate_dataset
 from tensorflow.python.data.experimental.ops.error_ops import ignore_errors
-from tensorflow.python.data.experimental.ops.filter_for_shard_ops import filter_for_shard
 from tensorflow.python.data.experimental.ops.get_single_element import get_single_element
 from tensorflow.python.data.experimental.ops.grouping import bucket_by_sequence_length
 from tensorflow.python.data.experimental.ops.grouping import group_by_reducer
--- a/tensorflow/python/data/experimental/ops/BUILD
+++ b/tensorflow/python/data/experimental/ops/BUILD
@ -162,18 +162,6 @@ py_library(
    ],
 )

-py_library(
-    name = "filter_for_shard_ops",
-    srcs = ["filter_for_shard_ops.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:ops",
-        "//tensorflow/python:tensor_util",
-    ],
-)
-
 py_library(
    name = "error_ops",
    srcs = ["error_ops.py"],
@ -466,7 +454,6 @@ py_library(
        ":distribute",
        ":enumerate_ops",
        ":error_ops",
-        ":filter_for_shard_ops",
        ":get_single_element",
        ":grouping",
        ":indexed_dataset_ops",
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@ -665,6 +665,11 @@ def map_and_batch_with_legacy_function(map_func,
  return _apply_fn


+@deprecation.deprecated(
+    None,
+    "Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by "
+    "`tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data "
+    "optimizations will take care of using the fused implementation.")
@tf_export("data.experimental.map_and_batch")
 def map_and_batch(map_func,
                  batch_size,
--- a/tensorflow/python/data/experimental/ops/filter_for_shard_ops.py
+++ b/tensorflow/python/data/experimental/ops/filter_for_shard_ops.py
@ -1,106 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Naive shard dataset transformation."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
-
-
-@tf_export("data.experimental.filter_for_shard")
-def filter_for_shard(num_shards, shard_index):
-  """Creates a `Dataset` that includes only 1/`num_shards` of this dataset.
-
-  This dataset operator is very useful when running distributed training, as
-  it allows each worker to read a unique subset.
-
-  When reading a single input file, you can skip elements as follows:
-
-  ```python
-  d = tf.data.TFRecordDataset(FLAGS.input_file)
-  d = d.apply(tf.data.experimental.naive_shard(FLAGS.num_workers,
-                                               FLAGS.worker_index))
-  d = d.repeat(FLAGS.num_epochs)
-  d = d.shuffle(FLAGS.shuffle_buffer_size)
-  d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads)
-  ```
-
-  Important caveats:
-
-  - Be sure to shard before you use any randomizing operator (such as
-    shuffle).
-  - Generally it is best if the shard operator is used early in the dataset
-    pipeline. For example, when reading from a set of TFRecord files, shard
-    before converting the dataset to input samples. This avoids reading every
-    file on every worker. The following is an example of an efficient
-    sharding strategy within a complete pipeline:
-
-  ```python
-  d = Dataset.list_files(FLAGS.pattern)
-  d = d.apply(tf.data.experimental.naive_shard(FLAGS.num_workers,
-                                               FLAGS.worker_index))
-  d = d.repeat(FLAGS.num_epochs)
-  d = d.shuffle(FLAGS.shuffle_buffer_size)
-  d = d.interleave(tf.data.TFRecordDataset,
-                   cycle_length=FLAGS.num_readers, block_length=1)
-  d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads)
-  ```
-
-  Args:
-    num_shards: A `tf.int64` scalar `tf.Tensor`, representing the number of
-      shards operating in parallel.
-    shard_index: A `tf.int64` scalar `tf.Tensor`, representing the worker index.
-
-  Returns:
-    A `Dataset` transformation function, which can be passed to
-    `tf.data.Dataset.apply`.
-
-  Raises:
-    ValueError: if `num_shards` or `shard_index` are illegal values. Note: error
-      checking is done on a best-effort basis, and errors aren't guaranteed to
-      be caught upon dataset creation. (e.g. providing in a placeholder tensor
-      bypasses the early checking, and will instead result in an error during
-      a session.run call.)
-  """
-  num_shards = ops.convert_to_tensor(
-      num_shards, name="num_shards", dtype=dtypes.int64)
-  num_shards_static = tensor_util.constant_value(num_shards)
-  shard_index = ops.convert_to_tensor(shard_index, name="shard_index",
-                                      dtype=dtypes.int64)
-  shard_index_static = tensor_util.constant_value(shard_index)
-
-  if num_shards_static is not None and num_shards_static < 1:
-    raise ValueError("num_shards must be >= 1; got: %s" % num_shards_static)
-  if shard_index_static is not None and shard_index_static < 0:
-    raise ValueError("shard_index must be >= 0; got: %s" % shard_index_static)
-  if (shard_index_static is not None and num_shards_static is not None and
-      shard_index_static >= num_shards_static):
-    raise ValueError("shard_index must be < num_shards; %s is not < %s" %
-                     (shard_index_static, num_shards_static))
-
-  def filter_fn(elem_index, _):
-    mod_result = math_ops.mod(elem_index, num_shards)
-    return math_ops.equal(mod_result, shard_index)
-
-  def _apply_fn(dataset):
-    # pylint: disable=protected-access
-    return dataset._enumerate().filter(filter_fn).map(lambda _, elem: elem)
-
-  return _apply_fn
--- a/tensorflow/python/data/experimental/ops/interleave_ops.py
+++ b/tensorflow/python/data/experimental/ops/interleave_ops.py
@ -28,6 +28,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.ops import gen_stateless_random_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export


@ -82,6 +83,11 @@ class _ParallelInterleaveDataset(dataset_ops.UnaryDataset):
    return "tf.data.experimental.parallel_interleave()"


+@deprecation.deprecated(
+    None,
+    "Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, "
+    "num_parallel_calls=tf.data.experimental.AUTOTUNE)` instead. If sloppy "
+    "execution is desired, use `tf.data.Options.experimental_determinstic`.")
@tf_export("data.experimental.parallel_interleave")
 def parallel_interleave(map_func,
                        cycle_length,
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@ -23,6 +23,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export


@ -50,6 +51,11 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryUnchangedStructureDataset):
                                                   variant_tensor)


+@deprecation.deprecated(
+    None,
+    "Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by "
+    "`tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take "
+    "care of using the fused implementation.")
@tf_export("data.experimental.shuffle_and_repeat")
 def shuffle_and_repeat(buffer_size, count=None, seed=None):
  """Shuffles and repeats a Dataset returning a new permutation for each epoch.
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@ -26,7 +26,6 @@ py_library(
        "//tensorflow/python:tensor_shape",
        "//tensorflow/python:tensor_util",
        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:filter_for_shard_ops",
        "//tensorflow/python/data/experimental/ops:optimization_options",
        "//tensorflow/python/data/experimental/ops:stats_options",
        "//tensorflow/python/data/experimental/ops:threading_options",
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@ -368,7 +368,6 @@ py_library(
    srcs = ["input_ops.py"],
    deps = [
        "//tensorflow/python:framework_ops",
-        "//tensorflow/python/data/experimental/ops:filter_for_shard_ops",
        "//tensorflow/python/data/util:nest",
    ],
 )
--- a/tensorflow/python/distribute/input_ops.py
+++ b/tensorflow/python/distribute/input_ops.py
@ -40,10 +40,9 @@ def auto_shard_dataset(dataset, num_shards, index):
    dataset: A `tf.data.Dataset` instance, typically the result of a bunch of
      dataset transformations.
    num_shards: A `tf.int64` scalar `tf.Tensor`, representing the number of
-        shards operating in parallel. Same usage as in
-        `tf.data.experimental.filter_for_shard`.
+        shards operating in parallel. Same usage as in `tf.data.Dataset.shard`.
    index: A `tf.int64` scalar `tf.Tensor`, representing the worker index.
-      Same usage as in `Dataset.shard`.
+      Same usage as in `tf.data.Dataset.shard`.

  Returns:
    A modified `Dataset` obtained by updating the pipeline sharded by the
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@ -112,10 +112,6 @@ tf_module {
    name: "enumerate_dataset"
    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
  }
-  member_method {
-    name: "filter_for_shard"
-    argspec: "args=[\'num_shards\', \'shard_index\'], varargs=None, keywords=None, defaults=None"
-  }
  member_method {
    name: "get_next_as_optional"
    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@ -112,10 +112,6 @@ tf_module {
    name: "enumerate_dataset"
    argspec: "args=[\'start\'], varargs=None, keywords=None, defaults=[\'0\'], "
  }
-  member_method {
-    name: "filter_for_shard"
-    argspec: "args=[\'num_shards\', \'shard_index\'], varargs=None, keywords=None, defaults=None"
-  }
  member_method {
    name: "get_next_as_optional"
    argspec: "args=[\'iterator\'], varargs=None, keywords=None, defaults=None"