Improves documentation for keras.preprocessing.pad_sequence

PiperOrigin-RevId: 297247197 Change-Id: I4c85e8ba6d4ae43d4c249442ef9c47bb6a5805c6
2020-02-25 17:50:37 -08:00 · 2020-02-25 17:50:37 -08:00 · 82bcbf0560
commit 82bcbf0560
parent 42e6af1ac7
1 changed files with 69 additions and 2 deletions
--- a/tensorflow/python/keras/preprocessing/sequence.py
+++ b/tensorflow/python/keras/preprocessing/sequence.py
@ -24,7 +24,6 @@ from keras_preprocessing import sequence
 from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.util.tf_export import keras_export

-pad_sequences = sequence.pad_sequences
 make_sampling_table = sequence.make_sampling_table
 skipgrams = sequence.skipgrams
 # TODO(fchollet): consider making `_remove_long_seq` public.
@ -34,6 +33,7 @@ _remove_long_seq = sequence._remove_long_seq  # pylint: disable=protected-access
@keras_export('keras.preprocessing.sequence.TimeseriesGenerator')
 class TimeseriesGenerator(sequence.TimeseriesGenerator, data_utils.Sequence):
  """Utility class for generating batches of temporal data.
+
  This class takes in a sequence of data-points gathered at
  equal intervals, along with time series parameters such as
  stride, length of history, etc., to produce batches for
@ -89,7 +89,74 @@ class TimeseriesGenerator(sequence.TimeseriesGenerator, data_utils.Sequence):
  pass


-keras_export('keras.preprocessing.sequence.pad_sequences')(pad_sequences)
+@keras_export('keras.preprocessing.sequence.pad_sequences')
+def pad_sequences(sequences, maxlen=None, dtype='int32',
+                  padding='pre', truncating='pre', value=0.):
+  """Pads sequences to the same length.
+
+  This function transforms a list (of length `num_samples`)
+  of sequences (lists of integers)
+  into a 2D Numpy array of shape `(num_samples, num_timesteps)`.
+  `num_timesteps` is either the `maxlen` argument if provided,
+  or the length of the longest sequence in the list.
+
+  Sequences that are shorter than `num_timesteps`
+  are padded with `value` until they are `num_timesteps` long.
+
+  Sequences longer than `num_timesteps` are truncated
+  so that they fit the desired length.
+
+  The position where padding or truncation happens is determined by
+  the arguments `padding` and `truncating`, respectively.
+  Pre-padding or removing values from the beginning of the sequence is the
+  default.
+
+  >>> sequence = [[1], [2, 3], [4, 5, 6]]
+  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence)
+  array([[0, 0, 1],
+         [0, 2, 3],
+         [4, 5, 6]], dtype=int32)
+
+  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, value=-1)
+  array([[-1, -1,  1],
+         [-1,  2,  3],
+         [ 4,  5,  6]], dtype=int32)
+
+  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, padding='post')
+  array([[1, 0, 0],
+         [2, 3, 0],
+         [4, 5, 6]], dtype=int32)
+
+  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=2)
+  array([[0, 1],
+         [2, 3],
+         [5, 6]], dtype=int32)
+
+  Arguments:
+      sequences: List of sequences (each sequence is a list of integers).
+      maxlen: Optional Int, maximum length of all sequences. If not provided,
+          sequences will be padded to the length of the longest individual
+          sequence.
+      dtype: (Optional, defaults to int32). Type of the output sequences.
+          To pad sequences with variable length strings, you can use `object`.
+      padding: String, 'pre' or 'post' (optional, defaults to 'pre'):
+          pad either before or after each sequence.
+      truncating: String, 'pre' or 'post' (optional, defaults to 'pre'):
+          remove values from sequences larger than
+          `maxlen`, either at the beginning or at the end of the sequences.
+      value: Float or String, padding value. (Optional, defaults to 0.)
+
+  Returns:
+      Numpy array with shape `(len(sequences), maxlen)`
+
+  Raises:
+      ValueError: In case of invalid values for `truncating` or `padding`,
+          or in case of invalid shape for a `sequences` entry.
+  """
+  return sequence.pad_sequences(
+      sequences, maxlen=maxlen, dtype=dtype,
+      padding=padding, truncating=truncating, value=value)
+
 keras_export(
    'keras.preprocessing.sequence.make_sampling_table')(make_sampling_table)
 keras_export('keras.preprocessing.sequence.skipgrams')(skipgrams)