From 128add4e084bd537e27ad854895bbadd8c8df0f7 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 18 Mar 2019 11:02:04 -0700 Subject: [PATCH] [tf.data] Improve `TFRecordDataset` documentation. PiperOrigin-RevId: 239019801 --- tensorflow/python/data/ops/readers.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index 477ff94a8d3..d276bb697de 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -178,19 +178,21 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2): num_parallel_reads=None): """Creates a `TFRecordDataset` to read one or more TFRecord files. - NOTE: The `num_parallel_reads` argument can be used to improve performance - when reading from a remote filesystem. - Args: filenames: A `tf.string` tensor or `tf.data.Dataset` containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar representing the number of - bytes in the read buffer. 0 means no buffering. + bytes in the read buffer. If your input pipeline is I/O bottlenecked, + consider setting this parameter to a value 1-100 MBs. If `None`, a + sensible default for both local and remote file systems is used. num_parallel_reads: (Optional.) A `tf.int64` scalar representing the - number of files to read in parallel. Defaults to reading files - sequentially. + number of files to read in parallel. If greater than one, the records of + files read in parallel are outputted in an interleaved order. If your + input pipeline is I/O bottlenecked, consider setting this parameter to a + value greater than one to parallelize the I/O. If `None`, files will be + read sequentially. Raises: TypeError: If any argument does not have the expected type.