Add doctests to lookup layers.

PiperOrigin-RevId: 312571334 Change-Id: I3792e5165194ea01369544ecdc6f158fcf44bcbf
2020-05-20 15:52:22 -07:00 · 2020-05-20 15:52:22 -07:00 · 786ee6565f
commit 786ee6565f
parent 4be466a87e
2 changed files with 156 additions and 0 deletions
--- a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
+++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
@ -59,6 +59,84 @@ class IntegerLookup(index_lookup.IndexLookup):
      error will be thrown.
    invert: If true, this layer will map indices to vocabulary items instead
      of mapping vocabulary items to indices.
+
+  Examples:
+
+  Creating a lookup layer with a known vocabulary
+
+  This example creates a lookup layer with a pre-existing vocabulary.
+
+  >>> vocab = [12, 36, 1138, 42]
+  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
+  >>> layer = IntegerLookup(vocabulary=vocab)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[2, 4, 5],
+         [5, 1, 3]])>
+
+
+  Creating a lookup layer with an adapted vocabulary
+
+  This example creates a lookup layer and generates the vocabulary by analyzing
+  the dataset.
+
+  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
+  >>> layer = IntegerLookup()
+  >>> layer.adapt(data)
+  >>> layer.get_vocabulary()
+  [0, -1, 42, 1138, 1000, 36, 12]
+
+  Note how the mask value 0 and the OOV value -1 have been added to the
+  vocabulary. The remaining tokens are sorted by frequency (1138, which has
+  2 occurrences, is first) then by inverse sort order.
+
+  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
+  >>> layer = IntegerLookup()
+  >>> layer.adapt(data)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[6, 3, 2],
+         [2, 4, 5]])>
+
+
+  Inverse lookup
+
+  This example demonstrates how to map indices to values using this layer. (You
+  can also use adapt() with inverse=True, but for simplicity we'll pass the
+  vocab in this example.)
+
+  >>> vocab = [12, 36, 1138, 42]
+  >>> data = tf.constant([[1, 3, 4], [4, 5, 2]])
+  >>> layer = IntegerLookup(vocabulary=vocab, invert=True)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[  12, 1138,   42],
+         [  42,   -1,   36]])>
+
+  Note that the integer 5, which is out of the vocabulary space, returns an OOV
+  token.
+
+
+  Forward and inverse lookup pairs
+
+  This example demonstrates how to use the vocabulary of a standard lookup
+  layer to create an inverse lookup layer.
+
+  >>> vocab = [12, 36, 1138, 42]
+  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
+  >>> layer = IntegerLookup(vocabulary=vocab)
+  >>> i_layer = IntegerLookup(vocabulary=layer.get_vocabulary(), invert=True)
+  >>> int_data = layer(data)
+  >>> i_layer(int_data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[  12, 1138,   42],
+         [  42,   -1,   36]])>
+
+  In this example, the input value 1000 resulted in an output of -1, since
+  1000 was not in the vocabulary - it got represented as an OOV, and all OOV
+  values are returned as -1 in the inverse layer. Also, note that for the
+  inverse to work, you must have already set the forward layer vocabulary
+  either directly or via fit() before calling get_vocabulary().
  """

  def __init__(self,
--- a/tensorflow/python/keras/layers/preprocessing/string_lookup.py
+++ b/tensorflow/python/keras/layers/preprocessing/string_lookup.py
@ -60,6 +60,84 @@ class StringLookup(index_lookup.IndexLookup):
    encoding: The Python string encoding to use. Defaults to `'utf-8'`.
    invert: If true, this layer will map indices to vocabulary items instead
      of mapping vocabulary items to indices.
+
+  Examples:
+
+  Creating a lookup layer with a known vocabulary
+
+  This example creates a lookup layer with a pre-existing vocabulary.
+
+  >>> vocab = ["a", "b", "c", "d"]
+  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
+  >>> layer = StringLookup(vocabulary=vocab)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[2, 4, 5],
+         [5, 1, 3]])>
+
+
+  Creating a lookup layer with an adapted vocabulary
+
+  This example creates a lookup layer and generates the vocabulary by analyzing
+  the dataset.
+
+  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
+  >>> layer = StringLookup()
+  >>> layer.adapt(data)
+  >>> layer.get_vocabulary()
+  ['', '[OOV]', 'd', 'z', 'c', 'b', 'a']
+
+  Note how the mask token '' and the OOV token [OOV] have been added to the
+  vocabulary. The remaining tokens are sorted by frequency ('d', which has
+  2 occurrences, is first) then by inverse sort order.
+
+  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
+  >>> layer = StringLookup()
+  >>> layer.adapt(data)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
+  array([[6, 4, 2],
+         [2, 3, 5]])>
+
+
+  Inverse lookup
+
+  This example demonstrates how to map indices to strings using this layer. (You
+  can also use adapt() with inverse=True, but for simplicity we'll pass the
+  vocab in this example.)
+
+  >>> vocab = ["a", "b", "c", "d"]
+  >>> data = tf.constant([[1, 3, 4], [4, 5, 2]])
+  >>> layer = StringLookup(vocabulary=vocab, invert=True)
+  >>> layer(data)
+  <tf.Tensor: shape=(2, 3), dtype=string, numpy=
+  array([[b'a', b'c', b'd'],
+         [b'd', b'[OOV]', b'b']], dtype=object)>
+
+  Note that the integer 5, which is out of the vocabulary space, returns an OOV
+  token.
+
+
+  Forward and inverse lookup pairs
+
+  This example demonstrates how to use the vocabulary of a standard lookup
+  layer to create an inverse lookup layer.
+
+  >>> vocab = ["a", "b", "c", "d"]
+  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
+  >>> layer = StringLookup(vocabulary=vocab)
+  >>> i_layer = StringLookup(vocabulary=layer.get_vocabulary(), invert=True)
+  >>> int_data = layer(data)
+  >>> i_layer(int_data)
+  <tf.Tensor: shape=(2, 3), dtype=string, numpy=
+  array([[b'a', b'c', b'd'],
+         [b'd', b'[OOV]', b'b']], dtype=object)>
+
+  In this example, the input value 'z' resulted in an output of '[OOV]', since
+  1000 was not in the vocabulary - it got represented as an OOV, and all OOV
+  values are returned as '[OOV}' in the inverse layer. Also, note that for the
+  inverse to work, you must have already set the forward layer vocabulary
+  either directly or via fit() before calling get_vocabulary().
  """

  def __init__(self,