Delete tf.contrib.training.python_input.

It has been replaced by tf.contrib.data.Dataset.from_generator. PiperOrigin-RevId: 167004190
2017-08-30 09:27:00 -07:00 · 2017-08-30 09:27:00 -07:00 · 48e3b62541
commit 48e3b62541
parent f9c5e921dd
6 changed files with 20 additions and 389 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -9,6 +9,9 @@
  for LSTMs and stacked LSTMs.  This bug fix follows recommendations from
  published literature, but is a behavioral change.  State dropout behavior
  may be customized via the new `dropout_state_filter_visitor` argument.
+* Removed `tf.contrib.training.python_input`.  The same behavior, in a more
+  flexible and reproducible package, is available via the new
+  `tf.contrib.data.Dataset.from_generator` method!

 # Release 1.3.0

--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@ -595,6 +595,23 @@ class Dataset(object):
    The elements generated by `generator` must be compatible with the given
    `output_types` and (optional) `output_shapes` arguments.

+    For example:
+
+    ```python
+    import itertools
+
+    def gen():
+      for i in itertools.count(1):
+        yield (i, [1] * i)
+
+    ds = Dataset.from_generator(
+        gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None])))
+    value = ds.make_one_shot_iterator().get_next()
+
+    sess.run(value)  # (1, array([1]))
+    sess.run(value)  # (2, array([1, 1]))
+    ```
+
    Args:
      generator: A callable object that takes no arguments and returns an
        object that supports the `iter()` protocol.
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@ -23,7 +23,6 @@ py_library(
        "python/training/evaluation.py",
        "python/training/feeding_queue_runner.py",
        "python/training/hparam.py",
-        "python/training/python_input.py",
        "python/training/resample.py",
        "python/training/sampling_ops.py",
        "python/training/sequence_queueing_state_saver.py",
@ -226,23 +225,6 @@ py_test(
    ],
 )

-py_test(
-    name = "python_input_test",
-    size = "medium",
-    srcs = ["python/training/python_input_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["manual"],
-    deps = [
-        ":training_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_test(
    name = "evaluation_test",
    size = "small",
--- a/tensorflow/contrib/training/init.py
+++ b/tensorflow/contrib/training/init.py
@ -36,7 +36,6 @@ See @{$python/contrib.training} guide.
@@HParams
@@HParamDef
@@parse_values
-@@python_input
 """

 from __future__ import absolute_import
@ -55,7 +54,6 @@ from tensorflow.contrib.training.python.training.evaluation import SummaryAtEndH
 from tensorflow.contrib.training.python.training.evaluation import wait_for_new_checkpoint
 from tensorflow.contrib.training.python.training.feeding_queue_runner import FeedingQueueRunner
 from tensorflow.contrib.training.python.training.hparam import *
-from tensorflow.contrib.training.python.training.python_input import python_input
 from tensorflow.contrib.training.python.training.resample import *
 from tensorflow.contrib.training.python.training.sampling_ops import *
 from tensorflow.contrib.training.python.training.sequence_queueing_state_saver import *
--- a/tensorflow/contrib/training/python/training/python_input.py
+++ b/tensorflow/contrib/training/python/training/python_input.py
@ -1,178 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Operations for asynchronously reading data from python into queues.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import threading
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import script_ops
-
-
-def _process_yielded_dict(feature_values, keys, features, dtypes, shapes):
-  """Read feature_values from the generator and emit a proper output dict."""
-  if not isinstance(feature_values, dict):
-    raise TypeError("generator must return dict, saw: %s" % feature_values)
-
-  processed_values = {}
-  for pk in keys:
-    if feature_values.get(pk, None) is not None:
-      processed_values[pk] = np.asarray(
-          feature_values[pk], dtype=dtypes[pk].as_numpy_dtype)
-      check_shape = tensor_shape.TensorShape(processed_values[pk].shape)
-      if not shapes[pk].is_compatible_with(check_shape):
-        raise ValueError(
-            "Feature '%s' has shape %s that is incompatible with declared "
-            "shape: %s" % (pk, shapes[pk], check_shape))
-      continue
-    if isinstance(features[pk], parsing_ops.FixedLenFeature):
-      if features[pk].default_value is not None:
-        processed_values[pk] = np.asarray(
-            features[pk].default_value, dtype=dtypes[pk].as_numpy_dtype)
-    elif isinstance(features[pk], parsing_ops.FixedLenSequenceFeature):
-      processed_values[pk] = np.empty(
-          [0] + features[pk].shape.aslist(), dtype=dtypes[pk].as_numpy_dtype)
-    else:
-      raise ValueError(
-          "Expected generator to return key '%s' with non-empty value" % pk)
-
-  return processed_values
-
-
-def python_input(generator, features, name=None):
-  """Easily feed data from a python generator into TensorFlow queues.
-
-  Example usage:
-
-  ```python
-  def generator():
-    for i in range(3):
-      yield {"value": i}
-
-  features = {
-    "value": tf.FixedLenFeature(shape=[], dtype=dtypes.int32)
-  }
-
-  tensor_dict = tf.contrib.training.python_input(generator, features)
-  batched_dict = tf.train.batch(
-    tensor_dict, batch_size=2, allow_smaller_final_batch=True)
-
-  s = tf.Session()
-  tf.train.start_queue_runners()
-
-  batch1 = s.run(batched_dict)  # returns {"value": np.array([0, 1])}
-  batch2 = s.run(batched_dict)  # returns {"value": np.array([2])}
-  s.run(batched_dict)  # error: Queue is closed (generator finished at i==3)
-  ```
-
-  Args:
-    generator: A python generator that takes no arguments, and yields dicts
-      containing a single minibatch entry one at a time.
-    features: A python `dict` mapping keys expected from the generator to
-      instances of `tf.FixedLenFeature`, or `tf.FixedLenSequenceFeature`.
-    name: (Optional) A name for the operations.
-
-  Returns:
-    A dict mapping keys of the `features` dict to `Tensor` objects.
-    These `Tensor` objects are outputs of a queue that is fed by `generator`.
-
-  Raises:
-    TypeError: If generator is not callable or features is not a dict.
-    TypeError: If any of features' values are not a Feature object.
-    NotImplementedError: If any of features' values are instances of
-      `SparseFeature` or `VarLenFeature`  (these are not currently supported).
-    ValueError: If any FixedLenSequenceFeatures contain a default value
-      (this field is not supported).
-    ValueError: if any FixedLenSequenceFeatures have allow_missing=False
-      (this field is not supported).
-  """
-  if not callable(generator):
-    raise TypeError("generator must be callable, saw: %s" % generator)
-  if not isinstance(features, dict):
-    raise TypeError("features must be a dict, saw: %s"
-                    % type(features).__name__)
-
-  with ops.name_scope(name, "python_input"):
-    shapes = {}
-    dtypes = {}
-    for k, v in features.items():
-      if isinstance(v, parsing_ops.FixedLenFeature):
-        if v.default_value is not None:
-          value = ops.convert_to_tensor(v.default_value, dtype=v.dtype, name=k)
-          shapes[k] = value.shape
-          dtypes[k] = value.dtype
-        else:
-          tensor_shape.TensorShape(v.shape).assert_is_fully_defined()
-          shapes[k] = tensor_shape.TensorShape(v.shape)
-          dtypes[k] = v.dtype
-      elif isinstance(v, parsing_ops.VarLenFeature):
-        raise NotImplementedError("VarLenFeature not supported")
-      elif isinstance(v, parsing_ops.SparseFeature):
-        raise NotImplementedError("SparseFeature not supported")
-      elif isinstance(v, parsing_ops.FixedLenSequenceFeature):
-        if v.default_value is not None:
-          raise ValueError("FixedLenSequenceFeature with default value not "
-                           "supported")
-        if not v.allow_missing:
-          raise ValueError("FixedLenSequenceFeature with allow_missing=False "
-                           "not supported")
-        tensor_shape.TensorShape(v.shape).assert_is_fully_defined()
-        shapes[k] = tensor_shape.TensorShape([None]).concatenate(v.shape)
-        dtypes[k] = v.dtype
-      else:
-        raise TypeError(
-            "Expected value for features key '%s' to be one of "
-            "FixedLenFeature, VarLenFeature, SparseFeature, or "
-            "FixedLenSequenceFeature.  Got: %s" % (k, v))
-
-    keys = list(shapes.keys())
-    dtypes_list = [dtypes[pk] for pk in keys]
-
-    counter = [0]
-    lock = threading.Lock()
-    iterator = iter(generator())
-
-    def generator_iter():
-      """Iterate through generator output and return np.arrays to py_func."""
-      with lock:
-        try:
-          feature_values = next(iterator)
-          counter[0] += 1
-        except StopIteration as e:
-          raise StopIteration("Iteration finished.  Processed %d entries (%s)"
-                              % (counter[0], e))
-
-      processed_dict = _process_yielded_dict(
-          feature_values, keys, features, dtypes, shapes)
-      return [processed_dict[pk] for pk in keys]
-
-    generator_pyfunc_values = script_ops.py_func(
-        generator_iter, inp=[], Tout=dtypes_list, stateful=True)
-
-    pyfunc_input = {k: v for (k, v) in zip(keys, generator_pyfunc_values)}
-    for k, v in shapes.items():
-      pyfunc_input[k].set_shape(v)
-
-  return pyfunc_input
-
-
-__all__ = ["python_input"]
--- a/tensorflow/contrib/training/python/training/python_input_test.py
+++ b/tensorflow/contrib/training/python/training/python_input_test.py
@ -1,191 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for tf.contrib.training.python_input."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from tensorflow.contrib.training.python.training import bucket_ops
-from tensorflow.contrib.training.python.training import python_input
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import input as core_input
-from tensorflow.python.training import queue_runner_impl
-
-
-class PythonInputTest(test.TestCase):
-
-  def testGenerator(self):
-    def simple_generator():
-      for i in range(2):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-    self.assertEqual(["value"], tensors.keys())
-    self.assertEqual(dtypes.int32, tensors["value"].dtype)
-    self.assertEqual((), tensors["value"].shape)
-
-    with self.test_session() as sess:
-      self.assertEqual({"value": 0}, sess.run(tensors))
-      self.assertEqual({"value": 1}, sess.run(tensors))
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-
-  def testInvalidGenerator(self):
-    generator1 = lambda: iter([{"value": "a"}])
-    int_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors1 = python_input.python_input(generator1, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("invalid literal"):
-        # Can't convert a string to an integer
-        sess.run(tensors1)
-
-    generator2 = lambda: iter([None])
-    tensors2 = python_input.python_input(generator2, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("generator must return dict"):
-        sess.run(tensors2)
-
-    generator3 = lambda: iter([{"value": [1, 2]}])
-    tensors3 = python_input.python_input(generator3, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("incompatible with declared shape"):
-        sess.run(tensors3)
-
-  def testGeneratorWorksWithBatching(self):
-    def simple_generator():
-      for i in range(5):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-
-    # Request batches of size 4 at a time, the final batch may be smaller.
-    batched_tensors = core_input.batch(tensors, batch_size=4,
-                                       allow_smaller_final_batch=True)
-
-    self.assertEqual(["value"], batched_tensors.keys())
-    self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
-    self.assertEqual([None], batched_tensors["value"].shape.as_list())
-
-    with self.test_session() as sess:
-      # The generator emits 5 items total.  The first 4 are returned in
-      # the first session run; the final one is returned in the
-      # second.  This works because allow_smaller_final_batch=True.
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      r1 = sess.run(batched_tensors)
-      r2 = sess.run(batched_tensors)
-      self.assertAllEqual([0, 1, 2, 3], r1["value"])
-      self.assertEqual([4], r2["value"])
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-      coord.request_stop()
-      for thread in threads:
-        thread.join()
-
-  def testGeneratorWorksWithManyBatchingThreads(self):
-    def simple_generator():
-      for i in range(5000):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-
-    # Request batches of size 20 at a time, the final batch may be smaller.
-    _, batched_tensors = bucket_ops.bucket(
-        tensors, which_bucket=tensors["value"] % 5,
-        batch_size=20, num_buckets=5, num_threads=7, capacity=17,
-        allow_smaller_final_batch=True)
-
-    self.assertEqual(["value"], batched_tensors.keys())
-    self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
-    self.assertEqual([None], batched_tensors["value"].shape.as_list())
-
-    with self.test_session() as sess:
-      # The generator emits 5 items total.  The first 4 are returned in
-      # the first session run; the final one is returned in the
-      # second.  This works because allow_smaller_final_batch=True.
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      results = []
-      while True:
-        try:
-          r = sess.run(batched_tensors)
-          results.extend(r["value"].tolist())
-        except errors.OutOfRangeError:
-          break
-      coord.request_stop()
-      for thread in threads:
-        thread.join()
-    self.assertEqual(sorted(results),
-                     list(range(5000)))
-
-  def testVaryingFieldsInGenerator(self):
-    def simple_generator():
-      for i in range(2):
-        yield {"value": i,
-               "seqlen_value": np.ones((i, 1))}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32),
-        "seqlen_value": parsing_ops.FixedLenSequenceFeature(
-            shape=[1], dtype=dtypes.float32, allow_missing=True),
-        "empty_value": parsing_ops.FixedLenFeature(
-            default_value=[-1, -2], dtype=dtypes.int32, shape=[2])
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-    self.assertEqual(
-        set(["value", "seqlen_value", "empty_value"]), set(tensors.keys()))
-    self.assertEqual(dtypes.int32, tensors["value"].dtype)
-    self.assertEqual((), tensors["value"].shape)
-    self.assertEqual(dtypes.float32, tensors["seqlen_value"].dtype)
-    self.assertEqual([None, 1], tensors["seqlen_value"].shape.as_list())
-    self.assertEqual(dtypes.int32, tensors["empty_value"].dtype)
-    self.assertEqual([2], tensors["empty_value"].shape)
-
-    with self.test_session() as sess:
-      r1 = sess.run(tensors)
-      self.assertAllEqual(0, r1["value"])
-      self.assertAllEqual(np.ones((0, 1)), r1["seqlen_value"])
-      self.assertAllEqual([-1, -2], r1["empty_value"])
-
-      r2 = sess.run(tensors)
-      self.assertAllEqual(1, r2["value"])
-      self.assertAllEqual([[1]], r2["seqlen_value"])
-      self.assertAllEqual([-1, -2], r2["empty_value"])
-
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-
-
-if __name__ == "__main__":
-  test.main()