Add doctests to lookup layers.

PiperOrigin-RevId: 312571334
Change-Id: I3792e5165194ea01369544ecdc6f158fcf44bcbf
This commit is contained in:
A. Unique TensorFlower 2020-05-20 15:52:22 -07:00 committed by TensorFlower Gardener
parent 4be466a87e
commit 786ee6565f
2 changed files with 156 additions and 0 deletions

View File

@ -59,6 +59,84 @@ class IntegerLookup(index_lookup.IndexLookup):
error will be thrown.
invert: If true, this layer will map indices to vocabulary items instead
of mapping vocabulary items to indices.
Examples:
Creating a lookup layer with a known vocabulary
This example creates a lookup layer with a pre-existing vocabulary.
>>> vocab = [12, 36, 1138, 42]
>>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
>>> layer = IntegerLookup(vocabulary=vocab)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[2, 4, 5],
[5, 1, 3]])>
Creating a lookup layer with an adapted vocabulary
This example creates a lookup layer and generates the vocabulary by analyzing
the dataset.
>>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
>>> layer = IntegerLookup()
>>> layer.adapt(data)
>>> layer.get_vocabulary()
[0, -1, 42, 1138, 1000, 36, 12]
Note how the mask value 0 and the OOV value -1 have been added to the
vocabulary. The remaining tokens are sorted by frequency (1138, which has
2 occurrences, is first) then by inverse sort order.
>>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
>>> layer = IntegerLookup()
>>> layer.adapt(data)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[6, 3, 2],
[2, 4, 5]])>
Inverse lookup
This example demonstrates how to map indices to values using this layer. (You
can also use adapt() with inverse=True, but for simplicity we'll pass the
vocab in this example.)
>>> vocab = [12, 36, 1138, 42]
>>> data = tf.constant([[1, 3, 4], [4, 5, 2]])
>>> layer = IntegerLookup(vocabulary=vocab, invert=True)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[ 12, 1138, 42],
[ 42, -1, 36]])>
Note that the integer 5, which is out of the vocabulary space, returns an OOV
token.
Forward and inverse lookup pairs
This example demonstrates how to use the vocabulary of a standard lookup
layer to create an inverse lookup layer.
>>> vocab = [12, 36, 1138, 42]
>>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
>>> layer = IntegerLookup(vocabulary=vocab)
>>> i_layer = IntegerLookup(vocabulary=layer.get_vocabulary(), invert=True)
>>> int_data = layer(data)
>>> i_layer(int_data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[ 12, 1138, 42],
[ 42, -1, 36]])>
In this example, the input value 1000 resulted in an output of -1, since
1000 was not in the vocabulary - it got represented as an OOV, and all OOV
values are returned as -1 in the inverse layer. Also, note that for the
inverse to work, you must have already set the forward layer vocabulary
either directly or via fit() before calling get_vocabulary().
"""
def __init__(self,

View File

@ -60,6 +60,84 @@ class StringLookup(index_lookup.IndexLookup):
encoding: The Python string encoding to use. Defaults to `'utf-8'`.
invert: If true, this layer will map indices to vocabulary items instead
of mapping vocabulary items to indices.
Examples:
Creating a lookup layer with a known vocabulary
This example creates a lookup layer with a pre-existing vocabulary.
>>> vocab = ["a", "b", "c", "d"]
>>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
>>> layer = StringLookup(vocabulary=vocab)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[2, 4, 5],
[5, 1, 3]])>
Creating a lookup layer with an adapted vocabulary
This example creates a lookup layer and generates the vocabulary by analyzing
the dataset.
>>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
>>> layer = StringLookup()
>>> layer.adapt(data)
>>> layer.get_vocabulary()
['', '[OOV]', 'd', 'z', 'c', 'b', 'a']
Note how the mask token '' and the OOV token [OOV] have been added to the
vocabulary. The remaining tokens are sorted by frequency ('d', which has
2 occurrences, is first) then by inverse sort order.
>>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
>>> layer = StringLookup()
>>> layer.adapt(data)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[6, 4, 2],
[2, 3, 5]])>
Inverse lookup
This example demonstrates how to map indices to strings using this layer. (You
can also use adapt() with inverse=True, but for simplicity we'll pass the
vocab in this example.)
>>> vocab = ["a", "b", "c", "d"]
>>> data = tf.constant([[1, 3, 4], [4, 5, 2]])
>>> layer = StringLookup(vocabulary=vocab, invert=True)
>>> layer(data)
<tf.Tensor: shape=(2, 3), dtype=string, numpy=
array([[b'a', b'c', b'd'],
[b'd', b'[OOV]', b'b']], dtype=object)>
Note that the integer 5, which is out of the vocabulary space, returns an OOV
token.
Forward and inverse lookup pairs
This example demonstrates how to use the vocabulary of a standard lookup
layer to create an inverse lookup layer.
>>> vocab = ["a", "b", "c", "d"]
>>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
>>> layer = StringLookup(vocabulary=vocab)
>>> i_layer = StringLookup(vocabulary=layer.get_vocabulary(), invert=True)
>>> int_data = layer(data)
>>> i_layer(int_data)
<tf.Tensor: shape=(2, 3), dtype=string, numpy=
array([[b'a', b'c', b'd'],
[b'd', b'[OOV]', b'b']], dtype=object)>
In this example, the input value 'z' resulted in an output of '[OOV]', since
1000 was not in the vocabulary - it got represented as an OOV, and all OOV
values are returned as '[OOV}' in the inverse layer. Also, note that for the
inverse to work, you must have already set the forward layer vocabulary
either directly or via fit() before calling get_vocabulary().
"""
def __init__(self,