118 lines
4.6 KiB
Python
118 lines
4.6 KiB
Python
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Cardinality analysis of `Dataset` objects."""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
from tensorflow.python.data.ops import dataset_ops
|
|
from tensorflow.python.framework import dtypes
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import gen_dataset_ops
|
|
from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
|
|
from tensorflow.python.util.tf_export import tf_export
|
|
|
|
|
|
INFINITE = -1
|
|
UNKNOWN = -2
|
|
tf_export("data.experimental.INFINITE_CARDINALITY").export_constant(
|
|
__name__, "INFINITE")
|
|
tf_export("data.experimental.UNKNOWN_CARDINALITY").export_constant(
|
|
__name__, "UNKNOWN")
|
|
|
|
|
|
# TODO(b/157691652): Deprecate this method after migrating users to the new API.
|
|
@tf_export("data.experimental.cardinality")
|
|
def cardinality(dataset):
|
|
"""Returns the cardinality of `dataset`, if known.
|
|
|
|
The operation returns the cardinality of `dataset`. The operation may return
|
|
`tf.data.experimental.INFINITE_CARDINALITY` if `dataset` contains an infinite
|
|
number of elements or `tf.data.experimental.UNKNOWN_CARDINALITY` if the
|
|
analysis fails to determine the number of elements in `dataset` (e.g. when the
|
|
dataset source is a file).
|
|
|
|
>>> dataset = tf.data.Dataset.range(42)
|
|
>>> print(tf.data.experimental.cardinality(dataset).numpy())
|
|
42
|
|
>>> dataset = dataset.repeat()
|
|
>>> cardinality = tf.data.experimental.cardinality(dataset)
|
|
>>> print((cardinality == tf.data.experimental.INFINITE_CARDINALITY).numpy())
|
|
True
|
|
>>> dataset = dataset.filter(lambda x: True)
|
|
>>> cardinality = tf.data.experimental.cardinality(dataset)
|
|
>>> print((cardinality == tf.data.experimental.UNKNOWN_CARDINALITY).numpy())
|
|
True
|
|
|
|
Args:
|
|
dataset: A `tf.data.Dataset` for which to determine cardinality.
|
|
|
|
Returns:
|
|
A scalar `tf.int64` `Tensor` representing the cardinality of `dataset`. If
|
|
the cardinality is infinite or unknown, the operation returns the named
|
|
constant `INFINITE_CARDINALITY` and `UNKNOWN_CARDINALITY` respectively.
|
|
"""
|
|
|
|
return gen_dataset_ops.dataset_cardinality(dataset._variant_tensor) # pylint: disable=protected-access
|
|
|
|
|
|
@tf_export("data.experimental.assert_cardinality")
|
|
def assert_cardinality(expected_cardinality):
|
|
"""Asserts the cardinality of the input dataset.
|
|
|
|
NOTE: The following assumes that "examples.tfrecord" contains 42 records.
|
|
|
|
>>> dataset = tf.data.TFRecordDataset("examples.tfrecord")
|
|
>>> cardinality = tf.data.experimental.cardinality(dataset)
|
|
>>> print((cardinality == tf.data.experimental.UNKNOWN_CARDINALITY).numpy())
|
|
True
|
|
>>> dataset = dataset.apply(tf.data.experimental.assert_cardinality(42))
|
|
>>> print(tf.data.experimental.cardinality(dataset).numpy())
|
|
42
|
|
|
|
Args:
|
|
expected_cardinality: The expected cardinality of the input dataset.
|
|
|
|
Returns:
|
|
A `Dataset` transformation function, which can be passed to
|
|
`tf.data.Dataset.apply`.
|
|
|
|
Raises:
|
|
FailedPreconditionError: The assertion is checked at runtime (when iterating
|
|
the dataset) and an error is raised if the actual and expected cardinality
|
|
differ.
|
|
"""
|
|
def _apply_fn(dataset):
|
|
return _AssertCardinalityDataset(dataset, expected_cardinality)
|
|
|
|
return _apply_fn
|
|
|
|
|
|
class _AssertCardinalityDataset(dataset_ops.UnaryUnchangedStructureDataset):
|
|
"""A `Dataset` that assert the cardinality of its input."""
|
|
|
|
def __init__(self, input_dataset, expected_cardinality):
|
|
self._input_dataset = input_dataset
|
|
self._expected_cardinality = ops.convert_to_tensor(
|
|
expected_cardinality, dtype=dtypes.int64, name="expected_cardinality")
|
|
|
|
# pylint: enable=protected-access
|
|
variant_tensor = ged_ops.assert_cardinality_dataset(
|
|
self._input_dataset._variant_tensor, # pylint: disable=protected-access
|
|
self._expected_cardinality,
|
|
**self._flat_structure)
|
|
super(_AssertCardinalityDataset, self).__init__(input_dataset,
|
|
variant_tensor)
|