From 52b0b571dcf57dc17ec3a6fd1cff0f38161b5a87 Mon Sep 17 00:00:00 2001 From: Dominic Jack Date: Wed, 9 Dec 2020 08:26:14 +1000 Subject: [PATCH 1/3] ellaborated docs on determinism in dataset ops --- tensorflow/python/data/ops/dataset_ops.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 0448dcc30c9..3c30dc98092 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1774,6 +1774,11 @@ name=None)) in general precludes the possibility of executing user-defined transformations in parallel (because of Python GIL). + The order of elements yielded by this transformation is + deterministic, as long as `map_func` is a pure function and + `deterministic=True`. If `map_func` contains any stateful operations, the + order in which that state is accessed is undefined. + Performance can often be improved by setting `num_parallel_calls` so that `map` will use multiple threads to process elements. If deterministic order isn't required, it can also improve performance to set @@ -1792,11 +1797,10 @@ name=None)) `tf.data.AUTOTUNE` is used, then the number of parallel calls is set dynamically based on available CPU. deterministic: (Optional.) A boolean controlling whether determinism - should be traded for performance by allowing elements to be produced out + should be traded for performance by allowing elements to be yielded out of order. If `deterministic` is `None`, the `tf.data.Options.experimental_deterministic` dataset option (`True` by - default) is used to decide whether to produce elements - deterministically. + default) is used to decide whether to run deterministically. Returns: Dataset: A `Dataset`. @@ -1925,8 +1929,7 @@ name=None)) should be traded for performance by allowing elements to be produced out of order. If `deterministic` is `None`, the `tf.data.Options.experimental_deterministic` dataset option (`True` by - default) is used to decide whether to produce elements - deterministically. + default) is used to decide whether to run deterministically. Returns: Dataset: A `Dataset`. From 80d74fd7859e8ff8b873398dc603682a51990abf Mon Sep 17 00:00:00 2001 From: Dominic Jack Date: Wed, 9 Dec 2020 18:29:41 +1000 Subject: [PATCH 2/3] moved determinism desc after num_parallel_calls, clarified --- tensorflow/python/data/ops/dataset_ops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 3c30dc98092..6698e2ecc50 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1774,11 +1774,6 @@ name=None)) in general precludes the possibility of executing user-defined transformations in parallel (because of Python GIL). - The order of elements yielded by this transformation is - deterministic, as long as `map_func` is a pure function and - `deterministic=True`. If `map_func` contains any stateful operations, the - order in which that state is accessed is undefined. - Performance can often be improved by setting `num_parallel_calls` so that `map` will use multiple threads to process elements. If deterministic order isn't required, it can also improve performance to set @@ -1789,6 +1784,12 @@ name=None)) ... num_parallel_calls=tf.data.AUTOTUNE, ... deterministic=False) + If `num_parallel_calls > 1`, the order of elements yielded by this + transformation is deterministic if `deterministic=True`. If `map_func` + contains stateful operations and `num_parallel_calls > 1`, the order in + which that state is accessed is undefined, so the values of output elements + may not be deterministic regardless of the `deterministic` flag value. + Args: map_func: A function mapping a dataset element to another dataset element. num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`, From 50579d8901ea48cd4019336ffde4b7626321d697 Mon Sep 17 00:00:00 2001 From: Dominic Jack Date: Thu, 10 Dec 2020 07:13:03 +1000 Subject: [PATCH 3/3] clarified deterministic order independent of num_parallel_calls --- tensorflow/python/data/ops/dataset_ops.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 6698e2ecc50..46af724339a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1784,11 +1784,11 @@ name=None)) ... num_parallel_calls=tf.data.AUTOTUNE, ... deterministic=False) - If `num_parallel_calls > 1`, the order of elements yielded by this - transformation is deterministic if `deterministic=True`. If `map_func` - contains stateful operations and `num_parallel_calls > 1`, the order in - which that state is accessed is undefined, so the values of output elements - may not be deterministic regardless of the `deterministic` flag value. + The order of elements yielded by this transformation is deterministic if + `deterministic=True`. If `map_func` contains stateful operations and + `num_parallel_calls > 1`, the order in which that state is accessed is + undefined, so the values of output elements may not be deterministic + regardless of the `deterministic` flag value. Args: map_func: A function mapping a dataset element to another dataset element.