STT-tensorflow/tensorflow/python/kernel_tests/parsing_ops_test.py
Akshay Modi f446a69e5c Fix repeat, and add tests
PiperOrigin-RevId: 302538059
Change-Id: Ie8a17b4fe5818d04260cc5a3f8868178b92e0014
2020-03-23 16:05:23 -07:00

2510 lines
89 KiB
Python

# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow.ops.parsing_ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import itertools
import numpy as np
from google.protobuf import json_format
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import parsing_ops
from tensorflow.python.ops.ragged import ragged_concat_ops
from tensorflow.python.ops.ragged import ragged_factory_ops
from tensorflow.python.ops.ragged import ragged_tensor
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging
# Helpers for creating Example objects
example = example_pb2.Example
feature = feature_pb2.Feature
features = lambda d: feature_pb2.Features(feature=d)
bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
# Helpers for creating SequenceExample objects
feature_list = lambda l: feature_pb2.FeatureList(feature=l)
feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
sequence_example = example_pb2.SequenceExample
def flatten(list_of_lists):
"""Flatten one level of nesting."""
return itertools.chain.from_iterable(list_of_lists)
def _compare_output_to_expected(tester, actual, expected):
tester.assertEqual(set(actual.keys()), set(expected.keys()))
for k, v in actual.items():
expected_v = expected[k]
tf_logging.info("Comparing key: %s", k)
if isinstance(v, sparse_tensor.SparseTensor):
tester.assertTrue(isinstance(expected_v, tuple))
tester.assertLen(expected_v, 3)
tester.assertAllEqual(v.indices, expected_v[0])
tester.assertAllEqual(v.values, expected_v[1])
tester.assertAllEqual(v.dense_shape, expected_v[2])
else:
tester.assertAllEqual(v, expected_v)
@test_util.run_all_in_graph_and_eager_modes
class ParseExampleTest(test.TestCase):
def _test(self, kwargs, expected_values=None, expected_err=None):
if expected_err:
if not context.executing_eagerly():
with self.assertRaisesWithPredicateMatch(expected_err[0],
expected_err[1]):
self.evaluate(parsing_ops.parse_example(**kwargs))
else:
with self.assertRaises(Exception):
parsing_ops.parse_example(**kwargs)
return
else:
out = parsing_ops.parse_example(**kwargs)
_compare_output_to_expected(self, out, expected_values)
# Check shapes; if serialized is a Tensor we need its size to
# properly check.
serialized = kwargs["serialized"]
batch_size = (
self.evaluate(serialized).size
if isinstance(serialized, ops.Tensor) else np.asarray(serialized).size)
for k, f in kwargs["features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
self.assertEqual(tuple(out[k].shape.as_list()), (batch_size,) + f.shape)
elif isinstance(f, parsing_ops.VarLenFeature):
if context.executing_eagerly():
out[k].indices.shape.assert_is_compatible_with([None, 2])
out[k].values.shape.assert_is_compatible_with([None])
out[k].dense_shape.shape.assert_is_compatible_with([2])
else:
self.assertEqual(out[k].indices.shape.as_list(), [None, 2])
self.assertEqual(out[k].values.shape.as_list(), [None])
self.assertEqual(out[k].dense_shape.shape.as_list(), [2])
def testEmptySerializedWithAllDefaults(self):
sparse_name = "st_a"
a_name = "a"
b_name = "b"
c_name = "c:has_a_tricky_name"
a_default = [0, 42, 0]
b_default = np.random.rand(3, 3).astype(bytes)
c_default = np.random.rand(2).astype(np.float32)
expected_st_a = ( # indices, values, shape
np.empty((0, 2), dtype=np.int64), # indices
np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
expected_output = {
sparse_name: expected_st_a,
a_name: np.array(2 * [[a_default]]),
b_name: np.array(2 * [b_default]),
c_name: np.array(2 * [c_default]),
}
self._test(
{
"example_names": np.empty((0,), dtype=bytes),
"serialized": ops.convert_to_tensor(["", ""]),
"features": {
sparse_name:
parsing_ops.VarLenFeature(dtypes.int64),
a_name:
parsing_ops.FixedLenFeature(
(1, 3), dtypes.int64, default_value=a_default),
b_name:
parsing_ops.FixedLenFeature(
(3, 3), dtypes.string, default_value=b_default),
c_name:
parsing_ops.FixedLenFeature(
(2,), dtypes.float32, default_value=c_default),
}
}, expected_output)
def testEmptySerializedWithoutDefaultsShouldFail(self):
input_features = {
"st_a":
parsing_ops.VarLenFeature(dtypes.int64),
"a":
parsing_ops.FixedLenFeature((1, 3),
dtypes.int64,
default_value=[0, 42, 0]),
"b":
parsing_ops.FixedLenFeature(
(3, 3),
dtypes.string,
default_value=np.random.rand(3, 3).astype(bytes)),
# Feature "c" is missing a default, this gap will cause failure.
"c":
parsing_ops.FixedLenFeature((2,), dtype=dtypes.float32),
}
# Edge case where the key is there but the feature value is empty
original = example(features=features({"c": feature()}))
self._test(
{
"example_names": ["in1"],
"serialized": [original.SerializeToString()],
"features": input_features,
},
expected_err=(
errors_impl.OpError,
"Name: in1, Feature: c \\(data type: float\\) is required"))
# Standard case of missing key and value.
self._test(
{
"example_names": ["in1", "in2"],
"serialized": ["", ""],
"features": input_features,
},
expected_err=(
errors_impl.OpError,
"Name: in1, Feature: c \\(data type: float\\) is required"))
def testDenseNotMatchingShapeShouldFail(self):
original = [
example(features=features({
"a": float_feature([1, 1, 3]),
})),
example(features=features({
"a": float_feature([-1, -1]),
}))
]
names = ["passing", "failing"]
serialized = [m.SerializeToString() for m in original]
self._test(
{
"example_names": names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
"a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
}
},
expected_err=(errors_impl.OpError,
"Name: failing, Key: a, Index: 1. Number of float val"))
def testDenseDefaultNoShapeShouldFail(self):
original = [
example(features=features({
"a": float_feature([1, 1, 3]),
})),
]
serialized = [m.SerializeToString() for m in original]
self._test(
{
"example_names": ["failing"],
"serialized": ops.convert_to_tensor(serialized),
"features": {
"a": parsing_ops.FixedLenFeature(None, dtypes.float32)
}
},
expected_err=(ValueError, "Missing shape for feature a"))
def testSerializedContainingSparse(self):
original = [
example(features=features({"st_c": float_feature([3, 4])})),
example(
features=features({
"st_c": float_feature([]), # empty float list
})),
example(
features=features({
"st_d": feature(), # feature with nothing in it
})),
example(
features=features({
"st_c": float_feature([1, 2, -1]),
"st_d": bytes_feature([b"hi"])
}))
]
serialized = [m.SerializeToString() for m in original]
expected_st_c = ( # indices, values, shape
np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
np.array([4, 3], dtype=np.int64)) # batch == 2, max_elems = 3
expected_st_d = ( # indices, values, shape
np.array([[3, 0]], dtype=np.int64), np.array(["hi"], dtype=bytes),
np.array([4, 1], dtype=np.int64)) # batch == 2, max_elems = 1
expected_output = {
"st_c": expected_st_c,
"st_d": expected_st_d,
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"st_c": parsing_ops.VarLenFeature(dtypes.float32),
"st_d": parsing_ops.VarLenFeature(dtypes.string)
}
}, expected_output)
def testSerializedContainingSparseFeature(self):
original = [
example(
features=features({
"val": float_feature([3, 4]),
"idx": int64_feature([5, 10])
})),
example(
features=features({
"val": float_feature([]), # empty float list
"idx": int64_feature([])
})),
example(
features=features({
"val": feature(), # feature with nothing in it
# missing idx feature
})),
example(
features=features({
"val": float_feature([1, 2, -1]),
"idx":
int64_feature([0, 9, 3]) # unsorted
}))
]
serialized = [m.SerializeToString() for m in original]
expected_sp = ( # indices, values, shape
np.array([[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
np.array([4, 13], dtype=np.int64)) # batch == 4, max_elems = 13
expected_output = {
"sp": expected_sp,
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"sp":
parsing_ops.SparseFeature(["idx"], "val", dtypes.float32,
[13])
}
}, expected_output)
def testSerializedContainingSparseFeatureReuse(self):
original = [
example(
features=features({
"val1": float_feature([3, 4]),
"val2": float_feature([5, 6]),
"idx": int64_feature([5, 10])
})),
example(
features=features({
"val1": float_feature([]), # empty float list
"idx": int64_feature([])
})),
]
serialized = [m.SerializeToString() for m in original]
expected_sp1 = ( # indices, values, shape
np.array([[0, 5], [0, 10]],
dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32),
np.array([2, 13], dtype=np.int64)) # batch == 2, max_elems = 13
expected_sp2 = ( # indices, values, shape
np.array([[0, 5], [0, 10]],
dtype=np.int64), np.array([5.0, 6.0], dtype=np.float32),
np.array([2, 7], dtype=np.int64)) # batch == 2, max_elems = 13
expected_output = {
"sp1": expected_sp1,
"sp2": expected_sp2,
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"sp1":
parsing_ops.SparseFeature("idx", "val1", dtypes.float32,
13),
"sp2":
parsing_ops.SparseFeature(
"idx",
"val2",
dtypes.float32,
size=7,
already_sorted=True)
}
}, expected_output)
def testSerializedContaining3DSparseFeature(self):
original = [
example(
features=features({
"val": float_feature([3, 4]),
"idx0": int64_feature([5, 10]),
"idx1": int64_feature([0, 2]),
})),
example(
features=features({
"val": float_feature([]), # empty float list
"idx0": int64_feature([]),
"idx1": int64_feature([]),
})),
example(
features=features({
"val": feature(), # feature with nothing in it
# missing idx feature
})),
example(
features=features({
"val": float_feature([1, 2, -1]),
"idx0": int64_feature([0, 9, 3]), # unsorted
"idx1": int64_feature([1, 0, 2]),
}))
]
serialized = [m.SerializeToString() for m in original]
expected_sp = (
# indices
np.array([[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
dtype=np.int64),
# values
np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
# shape batch == 4, max_elems = 13
np.array([4, 13, 3], dtype=np.int64))
expected_output = {
"sp": expected_sp,
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"sp":
parsing_ops.SparseFeature(["idx0", "idx1"], "val",
dtypes.float32, [13, 3])
}
}, expected_output)
def testSerializedContainingDense(self):
aname = "a"
bname = "b*has+a:tricky_name"
original = [
example(
features=features({
aname: float_feature([1, 1]),
bname: bytes_feature([b"b0_str"]),
})),
example(
features=features({
aname: float_feature([-1, -1]),
bname: bytes_feature([b""]),
}))
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
aname:
np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
bname:
np.array(["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
}
# No defaults, values required
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenFeature(
(1, 2, 1), dtype=dtypes.float32),
bname:
parsing_ops.FixedLenFeature(
(1, 1, 1, 1), dtype=dtypes.string),
}
}, expected_output)
# This test is identical as the previous one except
# for the creation of 'serialized'.
def testSerializedContainingDenseWithConcat(self):
aname = "a"
bname = "b*has+a:tricky_name"
# TODO(lew): Feature appearing twice should be an error in future.
original = [
(example(features=features({
aname: float_feature([10, 10]),
})),
example(
features=features({
aname: float_feature([1, 1]),
bname: bytes_feature([b"b0_str"]),
}))),
(
example(features=features({
bname: bytes_feature([b"b100"]),
})),
example(
features=features({
aname: float_feature([-1, -1]),
bname: bytes_feature([b"b1"]),
})),
),
]
serialized = [
m.SerializeToString() + n.SerializeToString() for (m, n) in original
]
expected_output = {
aname:
np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
bname:
np.array(["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
}
# No defaults, values required
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenFeature(
(1, 2, 1), dtype=dtypes.float32),
bname:
parsing_ops.FixedLenFeature(
(1, 1, 1, 1), dtype=dtypes.string),
}
}, expected_output)
def testSerializedContainingDenseScalar(self):
original = [
example(features=features({
"a": float_feature([1]),
})),
example(features=features({}))
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
"a":
np.array([[1], [-1]], dtype=np.float32) # 2x1 (column vector)
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"a":
parsing_ops.FixedLenFeature(
(1,), dtype=dtypes.float32, default_value=-1),
}
}, expected_output)
def testSerializedContainingDenseWithDefaults(self):
original = [
example(features=features({
"a": float_feature([1, 1]),
})),
example(features=features({
"b": bytes_feature([b"b1"]),
})),
example(features=features({"b": feature()})),
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
"a":
np.array([[1, 1], [3, -3], [3, -3]],
dtype=np.float32).reshape(3, 1, 2, 1),
"b":
np.array(["tmp_str", "b1", "tmp_str"],
dtype=bytes).reshape(3, 1, 1, 1, 1),
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": {
"a":
parsing_ops.FixedLenFeature((1, 2, 1),
dtype=dtypes.float32,
default_value=[3.0, -3.0]),
"b":
parsing_ops.FixedLenFeature((1, 1, 1, 1),
dtype=dtypes.string,
default_value="tmp_str"),
}
}, expected_output)
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
expected_st_a = ( # indices, values, shape
np.empty((0, 2), dtype=np.int64), # indices
np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
expected_sp = ( # indices, values, shape
np.array([[0, 0], [0, 3], [1, 7]],
dtype=np.int64), np.array(["a", "b", "c"], dtype="|S"),
np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
original = [
example(
features=features({
"c": float_feature([3, 4]),
"val": bytes_feature([b"a", b"b"]),
"idx": int64_feature([0, 3])
})),
example(
features=features({
"c": float_feature([1, 2]),
"val": bytes_feature([b"c"]),
"idx": int64_feature([7])
}))
]
names = ["in1", "in2"]
serialized = [m.SerializeToString() for m in original]
a_default = [1, 2, 3]
b_default = np.random.rand(3, 3).astype(bytes)
expected_output = {
"st_a": expected_st_a,
"sp": expected_sp,
"a": np.array(2 * [[a_default]]),
"b": np.array(2 * [b_default]),
"c": np.array([[3, 4], [1, 2]], dtype=np.float32),
}
self._test(
{
"example_names": names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
"st_a":
parsing_ops.VarLenFeature(dtypes.int64),
"sp":
parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
"a":
parsing_ops.FixedLenFeature(
(1, 3), dtypes.int64, default_value=a_default),
"b":
parsing_ops.FixedLenFeature(
(3, 3), dtypes.string, default_value=b_default),
# Feature "c" must be provided, since it has no default_value.
"c":
parsing_ops.FixedLenFeature((2,), dtypes.float32),
}
},
expected_output)
def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
expected_idx = ( # indices, values, shape
np.array([[0, 0], [0, 1], [1, 0], [1, 1]],
dtype=np.int64), np.array([0, 3, 7, 1]),
np.array([2, 2], dtype=np.int64)) # batch == 4, max_elems = 2
expected_sp = ( # indices, values, shape
np.array([[0, 0], [0, 3], [1, 1], [1, 7]],
dtype=np.int64), np.array(["a", "b", "d", "c"], dtype="|S"),
np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
original = [
example(
features=features({
"val": bytes_feature([b"a", b"b"]),
"idx": int64_feature([0, 3])
})),
example(
features=features({
"val": bytes_feature([b"c", b"d"]),
"idx": int64_feature([7, 1])
}))
]
names = ["in1", "in2"]
serialized = [m.SerializeToString() for m in original]
expected_output = {
"idx": expected_idx,
"sp": expected_sp,
}
self._test(
{
"example_names": names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
"idx":
parsing_ops.VarLenFeature(dtypes.int64),
"sp":
parsing_ops.SparseFeature(["idx"], "val", dtypes.string,
[13]),
}
}, expected_output)
def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size):
# During parsing, data read from the serialized proto is stored in buffers.
# For small batch sizes, a buffer will contain one minibatch entry.
# For larger batch sizes, a buffer may contain several minibatch
# entries. This test identified a bug where the code that copied
# data out of the buffers and into the output tensors assumed each
# buffer only contained one minibatch entry. The bug has since been fixed.
truth_int = [i for i in range(batch_size)]
truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()]
for i in range(batch_size)]
expected_str = copy.deepcopy(truth_str)
# Delete some intermediate entries. (Skip the first entry, to ensure that
# we have at least one entry with length 2, to get the expected padding.)
for i in range(1, batch_size):
col = 1
if np.random.rand() < 0.25:
# w.p. 25%, drop out the second entry
expected_str[i][col] = b"default"
col -= 1
truth_str[i].pop()
if np.random.rand() < 0.25:
# w.p. 25%, drop out the second entry (possibly again)
expected_str[i][col] = b"default"
truth_str[i].pop()
expected_output = {
# Batch size batch_size, 1 time step.
"a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1),
# Batch size batch_size, 2 time steps.
"b": np.array(expected_str, dtype="|S").reshape(batch_size, 2),
}
original = [
example(
features=features({
"a": int64_feature([truth_int[i]]),
"b": bytes_feature(truth_str[i])
})) for i in range(batch_size)
]
serialized = [m.SerializeToString() for m in original]
self._test(
{
"serialized":
ops.convert_to_tensor(serialized, dtype=dtypes.string),
"features": {
"a":
parsing_ops.FixedLenSequenceFeature(
shape=(),
dtype=dtypes.int64,
allow_missing=True,
default_value=-1),
"b":
parsing_ops.FixedLenSequenceFeature(
shape=[],
dtype=dtypes.string,
allow_missing=True,
default_value="default"),
}
}, expected_output)
def testSerializedContainingVarLenDenseLargerBatch(self):
np.random.seed(3456)
for batch_size in (1, 10, 20, 100, 256):
self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
def testSerializedContainingVarLenDense(self):
aname = "a"
bname = "b"
cname = "c"
dname = "d"
example_names = ["in1", "in2", "in3", "in4"]
original = [
example(features=features({
cname: int64_feature([2]),
})),
example(
features=features({
aname: float_feature([1, 1]),
bname: bytes_feature([b"b0_str", b"b1_str"]),
})),
example(
features=features({
aname: float_feature([-1, -1, 2, 2]),
bname: bytes_feature([b"b1"]),
})),
example(
features=features({
aname: float_feature([]),
cname: int64_feature([3]),
})),
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
aname:
np.array([
[0, 0, 0, 0],
[1, 1, 0, 0],
[-1, -1, 2, 2],
[0, 0, 0, 0],
],
dtype=np.float32).reshape(4, 2, 2, 1),
bname:
np.array([["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
dtype=bytes).reshape(4, 2, 1, 1, 1),
cname:
np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1),
dname:
np.empty(shape=(4, 0), dtype=bytes),
}
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=True),
dname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
}, expected_output)
# Test with padding values.
expected_output_custom_padding = dict(expected_output)
expected_output_custom_padding[aname] = np.array([
[-2, -2, -2, -2],
[1, 1, -2, -2],
[-1, -1, 2, 2],
[-2, -2, -2, -2],
],
dtype=np.float32).reshape(
4, 2, 2, 1)
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature((2, 1),
dtype=dtypes.float32,
allow_missing=True,
default_value=-2.0),
bname:
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=True),
dname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
}, expected_output_custom_padding)
# Change number of required values so the inputs are not a
# multiple of this size.
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(
errors_impl.OpError, "Name: in3, Key: b, Index: 2. "
"Number of bytes values is not a multiple of stride length."))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature((2, 1),
dtype=dtypes.float32,
allow_missing=True,
default_value=[]),
bname:
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(ValueError,
"Cannot reshape a tensor with 0 elements to shape"))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenFeature(
(None, 2, 1), dtype=dtypes.float32),
bname:
parsing_ops.FixedLenSequenceFeature(
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(ValueError,
"First dimension of shape for feature a unknown. "
"Consider using FixedLenSequenceFeature."))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
cname:
parsing_ops.FixedLenFeature(
(1, None), dtype=dtypes.int64, default_value=[[1]]),
}
},
expected_err=(ValueError,
"All dimensions of shape for feature c need to be known "
r"but received \(1, None\)."))
self._test(
{
"example_names": example_names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
(2, 1), dtype=dtypes.float32, allow_missing=True),
bname:
parsing_ops.FixedLenSequenceFeature(
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
cname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.int64, allow_missing=False),
dname:
parsing_ops.FixedLenSequenceFeature(
shape=[], dtype=dtypes.string, allow_missing=True),
}
},
expected_err=(ValueError,
"Unsupported: FixedLenSequenceFeature requires "
"allow_missing to be True."))
def testSerializedContainingRaggedFeatureWithNoPartitions(self):
original = [
example(features=features({"rt_c": float_feature([3, 4])})),
example(
features=features({
"rt_c": float_feature([]), # empty float list
})),
example(
features=features({
"rt_d": feature(), # feature with nothing in it
})),
example(
features=features({
"rt_c": float_feature([1, 2, -1]),
"rt_d": bytes_feature([b"hi"])
}))
]
serialized = [m.SerializeToString() for m in original]
test_features = {
"rt_c":
parsing_ops.RaggedFeature(dtype=dtypes.float32),
"rt_d":
parsing_ops.RaggedFeature(
dtype=dtypes.string, row_splits_dtype=dtypes.int64)
}
expected_rt_c = ragged_factory_ops.constant(
[[3.0, 4.0], [], [], [1.0, 2.0, -1.0]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_rt_d = ragged_factory_ops.constant([[], [], [], [b"hi"]])
expected_output = {
"rt_c": expected_rt_c,
"rt_d": expected_rt_d,
}
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": test_features
}, expected_output)
# Test with a large enough batch to ensure that the minibatch size is >1.
batch_serialized = serialized * 64
self.assertEqual(expected_rt_c.row_splits.dtype, np.int32)
batch_expected_out = {
"rt_c": ragged_concat_ops.concat([expected_rt_c] * 64, axis=0),
"rt_d": ragged_concat_ops.concat([expected_rt_d] * 64, axis=0)
}
self.assertEqual(batch_expected_out["rt_c"].row_splits.dtype, dtypes.int32)
self._test(
{
"serialized": ops.convert_to_tensor(batch_serialized),
"features": test_features
}, batch_expected_out)
def testSerializedContainingRaggedFeature(self):
original = [
example(
features=features({
# rt = [[3], [4, 5, 6]]
"rt_values": float_feature([3, 4, 5, 6]),
"rt_splits": int64_feature([0, 1, 4]),
"rt_lengths": int64_feature([1, 3]),
"rt_starts": int64_feature([0, 1]),
"rt_limits": int64_feature([1, 4]),
"rt_rowids": int64_feature([0, 1, 1, 1]),
})),
example(
features=features({
# rt = []
"rt_values": float_feature([]),
"rt_splits": int64_feature([0]),
"rt_lengths": int64_feature([]),
"rt_starts": int64_feature([]),
"rt_limits": int64_feature([]),
"rt_rowids": int64_feature([]),
})),
example(
features=features({
# rt = []
"rt_values": feature(), # feature with nothing in it
"rt_splits": int64_feature([0]),
"rt_lengths": feature(),
"rt_starts": feature(),
"rt_limits": feature(),
"rt_rowids": feature(),
})),
example(
features=features({
# rt = [[1.0, 2.0, -1.0], [], [8.0, 9.0], [5.0]]
"rt_values": float_feature([1, 2, -1, 8, 9, 5]),
"rt_splits": int64_feature([0, 3, 3, 5, 6]),
"rt_lengths": int64_feature([3, 0, 2, 1]),
"rt_starts": int64_feature([0, 3, 3, 5]),
"rt_limits": int64_feature([3, 3, 5, 6]),
"rt_rowids": int64_feature([0, 0, 0, 2, 2, 3]),
}))
]
serialized = ops.convert_to_tensor(
[m.SerializeToString() for m in original])
test_features = {
"rt1":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowSplits("rt_splits")],
dtype=dtypes.float32),
"rt2":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowLengths("rt_lengths")],
dtype=dtypes.float32),
"rt3":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowStarts("rt_starts")],
dtype=dtypes.float32),
"rt4":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowLimits("rt_limits")],
dtype=dtypes.float32),
"rt5":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.ValueRowIds("rt_rowids")],
dtype=dtypes.float32),
"uniform1":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.UniformRowLength(2)],
dtype=dtypes.float32),
"uniform2":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[
parsing_ops.RaggedFeature.UniformRowLength(2),
parsing_ops.RaggedFeature.RowSplits("rt_splits")
],
dtype=dtypes.float32),
}
expected_rt = ragged_factory_ops.constant(
[[[3], [4, 5, 6]], [], [], [[1, 2, -1], [], [8, 9], [5]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_uniform1 = ragged_factory_ops.constant(
[[[3, 4], [5, 6]], [], [], [[1, 2], [-1, 8], [9, 5]]],
ragged_rank=1,
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_uniform2 = ragged_factory_ops.constant(
[[[[3], [4, 5, 6]]], [], [], [[[1, 2, -1], []], [[8, 9], [5]]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_output = {
"rt1": expected_rt,
"rt2": expected_rt,
"rt3": expected_rt,
"rt4": expected_rt,
"rt5": expected_rt,
"uniform1": expected_uniform1,
"uniform2": expected_uniform2,
}
self._test({
"serialized": serialized,
"features": test_features
}, expected_output)
def testSerializedContainingNestedRaggedFeature(self):
"""Test RaggedFeature with 3 partitions."""
original = [
# rt shape: [(batch), 2, None, None]
example(
features=features({
# rt = [[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]]
"rt_values": float_feature([1, 2, 3, 4, 5, 6, 7]),
"lengths_axis2": int64_feature([1, 2, 0, 1]),
"lengths_axis3": int64_feature([1, 2, 1, 3]),
"splits_axis3": int64_feature([0, 1, 3, 4, 7]),
})),
example(
features=features({
# rt = [[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]
"rt_values": float_feature([1, 2, 3, 4, 5, 6, 7, 8]),
"lengths_axis2": int64_feature([2, 3]),
"lengths_axis3": int64_feature([3, 1, 1, 1, 2]),
"splits_axis3": int64_feature([0, 3, 4, 5, 6, 8]),
}))
]
serialized = ops.convert_to_tensor(
[m.SerializeToString() for m in original])
test_features = {
"rt1":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[
parsing_ops.RaggedFeature.UniformRowLength(2),
parsing_ops.RaggedFeature.RowLengths("lengths_axis2"),
parsing_ops.RaggedFeature.RowSplits("splits_axis3"),
],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int64,
),
}
expected_rt = ragged_factory_ops.constant(
[[[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]],
[[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int64)
expected_output = {
"rt1": expected_rt,
}
self._test({
"serialized": serialized,
"features": test_features
}, expected_output)
@test_util.run_all_in_graph_and_eager_modes
class ParseSingleExampleTest(test.TestCase):
def _test(self, kwargs, expected_values=None, expected_err=None):
if expected_err:
with self.assertRaisesWithPredicateMatch(expected_err[0],
expected_err[1]):
self.evaluate(parsing_ops.parse_single_example(**kwargs))
else:
out = parsing_ops.parse_single_example(**kwargs)
_compare_output_to_expected(self, out, expected_values)
# Check shapes.
for k, f in kwargs["features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
self.assertEqual(
tuple(out[k].get_shape()), tensor_shape.as_shape(f.shape))
elif isinstance(f, parsing_ops.VarLenFeature):
if context.executing_eagerly():
self.assertEqual(tuple(out[k].indices.shape.as_list()), (2, 1))
self.assertEqual(tuple(out[k].values.shape.as_list()), (2,))
self.assertEqual(tuple(out[k].dense_shape.shape.as_list()), (1,))
else:
self.assertEqual(tuple(out[k].indices.shape.as_list()), (None, 1))
self.assertEqual(tuple(out[k].values.shape.as_list()), (None,))
self.assertEqual(tuple(out[k].dense_shape.shape.as_list()), (1,))
def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
original = example(
features=features({
"c": float_feature([3, 4]),
"d": float_feature([0.0, 1.0]),
"val": bytes_feature([b"a", b"b"]),
"idx": int64_feature([0, 3]),
"st_a": float_feature([3.0, 4.0])
}))
serialized = original.SerializeToString()
a_default = [1, 2, 3]
b_default = np.random.rand(3, 3).astype(bytes)
test_features = {
"st_a":
parsing_ops.VarLenFeature(dtypes.float32),
"sp":
parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
"a":
parsing_ops.FixedLenFeature((1, 3),
dtypes.int64,
default_value=a_default),
"b":
parsing_ops.FixedLenFeature((3, 3),
dtypes.string,
default_value=b_default),
# Feature "c" must be provided, since it has no default_value.
"c":
parsing_ops.FixedLenFeature(2, dtypes.float32),
"d":
parsing_ops.FixedLenSequenceFeature([],
dtypes.float32,
allow_missing=True)
}
expected_st_a = (
np.array([[0], [1]], dtype=np.int64), # indices
np.array([3.0, 4.0], dtype=np.float32), # values
np.array([2], dtype=np.int64)) # shape: max_values = 2
expected_sp = ( # indices, values, shape
np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype="|S"),
np.array([13], dtype=np.int64)) # max_values = 13
expected_output = {
"st_a": expected_st_a,
"sp": expected_sp,
"a": [a_default],
"b": b_default,
"c": np.array([3, 4], dtype=np.float32),
"d": np.array([0.0, 1.0], dtype=np.float32),
}
self._test(
{
"example_names": ops.convert_to_tensor("in1"),
"serialized": ops.convert_to_tensor(serialized),
"features": test_features,
}, expected_output)
# Note: if example_names is None, then a different code-path gets used.
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"features": test_features,
}, expected_output)
def testSingleExampleWithAllFeatureTypes(self):
original = example(
features=features({
# FixLen features
"c": float_feature([3, 4]),
"d": float_feature([0.0, 1.0]),
# Sparse features
"val": bytes_feature([b"a", b"b"]), # for sp
"idx": int64_feature([0, 3]), # for sp
"st_a": float_feature([3.0, 4.0]),
# Ragged features
"rt_1d": float_feature([3.0, 4.0]),
"rt_values": float_feature([5, 6, 7]), # for rt_2d
"rt_splits": int64_feature([0, 1, 1, 3]), # for rt_2d
"rt_lengths": int64_feature([1, 0, 2]), # for rt_2d
"rt_starts": int64_feature([0, 1, 1]), # for rt_2d
"rt_limits": int64_feature([1, 1, 3]), # for rt_2d
"rt_rowids": int64_feature([0, 2, 2]), # for rt_2d
"rt_splits2": int64_feature([0, 2, 3]), # for rt_3d
}))
serialized = original.SerializeToString()
a_default = [1, 2, 3]
b_default = np.random.rand(3, 3).astype(bytes)
test_features = {
"st_a":
parsing_ops.VarLenFeature(dtypes.float32),
"sp":
parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
"a":
parsing_ops.FixedLenFeature((1, 3),
dtypes.int64,
default_value=a_default),
"b":
parsing_ops.FixedLenFeature((3, 3),
dtypes.string,
default_value=b_default),
# Feature "c" must be provided, since it has no default_value.
"c":
parsing_ops.FixedLenFeature(2, dtypes.float32),
"d":
parsing_ops.FixedLenSequenceFeature([],
dtypes.float32,
allow_missing=True),
"rt_1d":
parsing_ops.RaggedFeature(dtypes.float32),
"rt_2d_with_splits":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowSplits("rt_splits")],
dtype=dtypes.float32),
"rt_2d_with_lengths":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowLengths("rt_lengths")],
dtype=dtypes.float32),
"rt_2d_with_starts":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowStarts("rt_starts")],
dtype=dtypes.float32),
"rt_2d_with_limits":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.RowLimits("rt_limits")],
dtype=dtypes.float32),
"rt_2d_with_rowids":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.ValueRowIds("rt_rowids")],
dtype=dtypes.float32),
"rt_2d_with_uniform_row_length":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[parsing_ops.RaggedFeature.UniformRowLength(1)],
dtype=dtypes.float32),
"rt_3d":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[
parsing_ops.RaggedFeature.RowSplits("rt_splits2"),
parsing_ops.RaggedFeature.RowSplits("rt_splits")
],
dtype=dtypes.float32),
"rt_3d_with_uniform_row_length":
parsing_ops.RaggedFeature(
value_key="rt_values",
partitions=[
parsing_ops.RaggedFeature.UniformRowLength(1),
parsing_ops.RaggedFeature.RowSplits("rt_splits")
],
dtype=dtypes.float32),
}
expected_st_a = (
np.array([[0], [1]], dtype=np.int64), # indices
np.array([3.0, 4.0], dtype=np.float32), # values
np.array([2], dtype=np.int64)) # shape: max_values = 2
expected_sp = ( # indices, values, shape
np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype="|S"),
np.array([13], dtype=np.int64)) # max_values = 13
expected_rt_1d = constant_op.constant([3, 4], dtypes.float32)
expected_rt_2d = ragged_factory_ops.constant([[5], [], [6, 7]],
dtype=dtypes.float32)
expected_rt_2d_uniform = constant_op.constant([[5], [6], [7]],
dtype=dtypes.float32)
expected_rt_3d = ragged_factory_ops.constant([[[5], []], [[6, 7]]],
dtype=dtypes.float32)
expected_rt_3d_with_uniform = (
ragged_tensor.RaggedTensor.from_uniform_row_length(
expected_rt_2d, uniform_row_length=1))
expected_output = {
"st_a": expected_st_a,
"sp": expected_sp,
"a": [a_default],
"b": b_default,
"c": np.array([3, 4], dtype=np.float32),
"d": np.array([0.0, 1.0], dtype=np.float32),
"rt_1d": expected_rt_1d,
"rt_2d_with_splits": expected_rt_2d,
"rt_2d_with_lengths": expected_rt_2d,
"rt_2d_with_starts": expected_rt_2d,
"rt_2d_with_limits": expected_rt_2d,
"rt_2d_with_rowids": expected_rt_2d,
"rt_2d_with_uniform_row_length": expected_rt_2d_uniform,
"rt_3d": expected_rt_3d,
"rt_3d_with_uniform_row_length": expected_rt_3d_with_uniform,
}
self._test(
{
"example_names": ops.convert_to_tensor("in1"),
"serialized": ops.convert_to_tensor(serialized),
"features": test_features,
}, expected_output)
@test_util.run_all_in_graph_and_eager_modes
class ParseSequenceExampleTest(test.TestCase):
def testCreateSequenceExample(self):
value = sequence_example(
context=features({
"global_feature": float_feature([1, 2, 3]),
}),
feature_lists=feature_lists({
"repeated_feature_2_frames":
feature_list([
bytes_feature([b"a", b"b", b"c"]),
bytes_feature([b"a", b"d", b"e"])
]),
"repeated_feature_3_frames":
feature_list([
int64_feature([3, 4, 5, 6, 7]),
int64_feature([-1, 0, 0, 0, 0]),
int64_feature([1, 2, 3, 4, 5])
])
}))
value.SerializeToString() # Smoke test
def _test(self,
kwargs,
expected_context_values=None,
expected_feat_list_values=None,
expected_length_values=None,
expected_err=None,
batch=False):
expected_context_values = expected_context_values or {}
expected_feat_list_values = expected_feat_list_values or {}
expected_length_values = expected_length_values or {}
if expected_err:
with self.assertRaisesWithPredicateMatch(expected_err[0],
expected_err[1]):
if batch:
self.evaluate(parsing_ops.parse_sequence_example(**kwargs))
else:
self.evaluate(parsing_ops.parse_single_sequence_example(**kwargs))
else:
if batch:
(context_out, feat_list_out,
lengths_out) = parsing_ops.parse_sequence_example(**kwargs)
else:
(context_out,
feat_list_out) = parsing_ops.parse_single_sequence_example(**kwargs)
lengths_out = {}
# Check values.
_compare_output_to_expected(self, context_out, expected_context_values)
_compare_output_to_expected(self, feat_list_out,
expected_feat_list_values)
_compare_output_to_expected(self, lengths_out, expected_length_values)
# Check shapes; if serialized is a Tensor we need its size to
# properly check.
if "context_features" in kwargs:
for k, f in kwargs["context_features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
if batch:
self.assertEqual(tuple(context_out[k].shape.as_list()[1:]), f.shape)
else:
self.assertEqual(tuple(context_out[k].shape.as_list()), f.shape)
elif isinstance(f, parsing_ops.VarLenFeature) and batch:
if context.executing_eagerly():
context_out[k].indices.shape.assert_is_compatible_with([None, 2])
context_out[k].values.shape.assert_is_compatible_with([None])
context_out[k].dense_shape.shape.assert_is_compatible_with([2])
else:
self.assertEqual(context_out[k].indices.shape.as_list(), [None, 2])
self.assertEqual(context_out[k].values.shape.as_list(), [None])
self.assertEqual(context_out[k].dense_shape.shape.as_list(), [2])
elif isinstance(f, parsing_ops.VarLenFeature) and not batch:
if context.executing_eagerly():
context_out[k].indices.shape.assert_is_compatible_with([None, 1])
context_out[k].values.shape.assert_is_compatible_with([None])
context_out[k].dense_shape.shape.assert_is_compatible_with([1])
else:
self.assertEqual(context_out[k].indices.shape.as_list(), [None, 1])
self.assertEqual(context_out[k].values.shape.as_list(), [None])
self.assertEqual(context_out[k].dense_shape.shape.as_list(), [1])
def _testBoth(self,
kwargs,
expected_context_values=None,
expected_feat_list_values=None,
expected_err=None):
# Test using tf.io.parse_single_sequence_example
self._test(
kwargs,
expected_context_values=expected_context_values,
expected_feat_list_values=expected_feat_list_values,
expected_err=expected_err,
batch=False)
# Convert the input to a batch of size 1, and test using
# tf.parse_sequence_example.
# Some replacements are needed for the batch version.
kwargs["serialized"] = [kwargs.pop("serialized")]
kwargs["example_names"] = [kwargs.pop("example_name")
] if "example_name" in kwargs else None
# Add a batch dimension to expected output
if expected_context_values:
new_values = {}
for k in expected_context_values:
v = expected_context_values[k]
if isinstance(kwargs["context_features"][k],
(parsing_ops.FixedLenFeature, parsing_ops.RaggedFeature)):
new_values[k] = np.expand_dims(v, axis=0)
else:
# Sparse tensor.
new_values[k] = (np.insert(v[0], 0, 0,
axis=1), v[1], np.insert(v[2], 0, 1))
expected_context_values = new_values
expected_length_values = {}
if expected_feat_list_values:
new_values = {}
for k in expected_feat_list_values:
v = expected_feat_list_values[k]
if isinstance(kwargs["sequence_features"][k],
parsing_ops.FixedLenSequenceFeature):
expected_length_values[k] = [np.shape(v)[0]]
new_values[k] = np.expand_dims(v, axis=0)
elif isinstance(kwargs["sequence_features"][k],
parsing_ops.RaggedFeature):
new_values[k] = np.expand_dims(v, axis=0)
else:
# Sparse tensor.
new_values[k] = (np.insert(v[0], 0, 0,
axis=1), v[1], np.insert(v[2], 0, 1))
expected_feat_list_values = new_values
self._test(
kwargs,
expected_context_values=expected_context_values,
expected_feat_list_values=expected_feat_list_values,
expected_length_values=expected_length_values,
expected_err=expected_err,
batch=True)
def testSequenceExampleWithSparseAndDenseContext(self):
original = sequence_example(
context=features({
"c": float_feature([3, 4]),
"st_a": float_feature([3.0, 4.0])
}))
serialized = original.SerializeToString()
expected_st_a = (
np.array([[0], [1]], dtype=np.int64), # indices
np.array([3.0, 4.0], dtype=np.float32), # values
np.array([2], dtype=np.int64)) # shape: num_features = 2
a_default = [[1, 2, 3]]
b_default = np.random.rand(3, 3).astype(bytes)
expected_context_output = {
"st_a": expected_st_a,
"a": a_default,
"b": b_default,
"c": np.array([3, 4], dtype=np.float32),
}
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"context_features": {
"st_a":
parsing_ops.VarLenFeature(dtypes.float32),
"a":
parsing_ops.FixedLenFeature(
(1, 3), dtypes.int64, default_value=a_default),
"b":
parsing_ops.FixedLenFeature(
(3, 3), dtypes.string, default_value=b_default),
# Feature "c" must be provided, since it has no default_value.
"c":
parsing_ops.FixedLenFeature((2,), dtypes.float32),
}
},
expected_context_values=expected_context_output)
def testSequenceExampleWithMultipleSizeFeatureLists(self):
original = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([
int64_feature([-1, 0, 1]),
int64_feature([2, 3, 4]),
int64_feature([5, 6, 7]),
int64_feature([8, 9, 10]),
]),
"b":
feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
"c":
feature_list([float_feature([3, 4]),
float_feature([-1, 2])]),
}))
serialized = original.SerializeToString()
expected_feature_list_output = {
"a":
np.array(
[ # outer dimension is time.
[[-1, 0, 1]], # inside are 1x3 matrices
[[2, 3, 4]],
[[5, 6, 7]],
[[8, 9, 10]]
],
dtype=np.int64),
"b":
np.array(
[ # outer dimension is time, inside are 2x2 matrices
[[b"r00", b"r01"], [b"r10", b"r11"]]
],
dtype=bytes),
"c":
np.array(
[ # outer dimension is time, inside are 2-vectors
[3, 4], [-1, 2]
],
dtype=np.float32),
"d":
np.empty(shape=(0, 5), dtype=np.float32), # empty_allowed_missing
}
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a":
parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
"b":
parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string),
"c":
parsing_ops.FixedLenSequenceFeature(2, dtypes.float32),
"d":
parsing_ops.FixedLenSequenceFeature(
(5,), dtypes.float32, allow_missing=True),
}
},
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithoutDebugName(self):
original = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([int64_feature([3, 4]),
int64_feature([1, 0])]),
"st_a":
feature_list([
float_feature([3.0, 4.0]),
float_feature([5.0]),
float_feature([])
]),
"st_b":
feature_list([
bytes_feature([b"a"]),
bytes_feature([]),
bytes_feature([]),
bytes_feature([b"b", b"c"])
])
}))
serialized = original.SerializeToString()
expected_st_a = (
np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_st_b = (
np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
np.array(["a", "b", "c"], dtype="|S"), # values
np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
expected_st_c = (
np.empty((0, 2), dtype=np.int64), # indices
np.empty((0,), dtype=np.int64), # values
np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
expected_feature_list_output = {
"a": np.array([[3, 4], [1, 0]], dtype=np.int64),
"st_a": expected_st_a,
"st_b": expected_st_b,
"st_c": expected_st_c,
}
self._testBoth(
{
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
"st_b": parsing_ops.VarLenFeature(dtypes.string),
"st_c": parsing_ops.VarLenFeature(dtypes.int64),
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
}
},
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithSparseAndDenseFeatureLists(self):
original = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([int64_feature([3, 4]),
int64_feature([1, 0])]),
"st_a":
feature_list([
float_feature([3.0, 4.0]),
float_feature([5.0]),
float_feature([])
]),
"st_b":
feature_list([
bytes_feature([b"a"]),
bytes_feature([]),
bytes_feature([]),
bytes_feature([b"b", b"c"])
])
}))
serialized = original.SerializeToString()
expected_st_a = (
np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_st_b = (
np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
np.array(["a", "b", "c"], dtype="|S"), # values
np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
expected_st_c = (
np.empty((0, 2), dtype=np.int64), # indices
np.empty((0,), dtype=np.int64), # values
np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
expected_feature_list_output = {
"a": np.array([[3, 4], [1, 0]], dtype=np.int64),
"st_a": expected_st_a,
"st_b": expected_st_b,
"st_c": expected_st_c,
}
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
"st_b": parsing_ops.VarLenFeature(dtypes.string),
"st_c": parsing_ops.VarLenFeature(dtypes.int64),
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
}
},
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
original = sequence_example(
feature_lists=feature_lists({
"st_a":
feature_list([
float_feature([3.0, 4.0]),
feature(),
float_feature([5.0]),
]),
}))
serialized = original.SerializeToString()
expected_st_a = (
np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_feature_list_output = {
"st_a": expected_st_a,
}
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
}
},
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleListWithInconsistentDataFails(self):
original = sequence_example(
feature_lists=feature_lists({
"a": feature_list([int64_feature([-1, 0]),
float_feature([2, 3])])
}))
serialized = original.SerializeToString()
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(errors_impl.OpError, "Feature list: a, Index: 1."
" Data types don't match. Expected type: int64"))
def testSequenceExampleListWithWrongDataTypeFails(self):
original = sequence_example(
feature_lists=feature_lists(
{"a": feature_list([float_feature([2, 3])])}))
serialized = original.SerializeToString()
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(errors_impl.OpError,
"Feature list: a, Index: 0. Data types don't match."
" Expected type: int64"))
def testSequenceExampleListWithWrongSparseDataTypeFails(self):
original = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([
int64_feature([3, 4]),
int64_feature([1, 2]),
float_feature([2.0, 3.0])
])
}))
serialized = original.SerializeToString()
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(errors_impl.OpError,
"Name: in1, Feature list: a, Index: 2."
" Data types don't match. Expected type: int64"))
def testSequenceExampleListWithWrongShapeFails(self):
original = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([int64_feature([2, 3]),
int64_feature([2, 3, 4])]),
}))
serialized = original.SerializeToString()
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(
errors_impl.OpError,
# message from ParseSingleExample.
r"Name: in1, Key: a, Index: 1."
r" Number of int64 values != expected."
r" values size: 3 but output shape: \[2\]"
# or message from FastParseSequenceExample
r"|Feature list 'a' has an unexpected number of values. "
r"Total values size: 5 is not consistent with output "
r"shape: \[\?,2\]"))
def testSequenceExampleListWithWrongShapeFails2(self):
# This exercises a different code path for FastParseSequenceExample than
# testSequenceExampleListWithWrongShapeFails (in that test, we can tell that
# the shape is bad based on the total number of values; in this test, we
# can't tell the shape is bad until we look at individual rows.)
original = sequence_example(
feature_lists=feature_lists({
"a": feature_list([int64_feature([2]),
int64_feature([2, 3, 4])]),
}))
serialized = original.SerializeToString()
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(errors_impl.OpError, r"Name: in1, Key: a, Index: 0."
r" Number of (int64 )?values != expected."
r" values size: 1 but output shape: \[2\]"))
def testSequenceExampleWithMissingFeatureListFails(self):
original = sequence_example(feature_lists=feature_lists({}))
# Test fails because we didn't add:
# feature_list_dense_defaults = {"a": None}
self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(original.SerializeToString()),
"sequence_features": {
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
}
},
expected_err=(
errors_impl.OpError,
"Name: in1, Feature list 'a' is required but could not be found."
" Did you mean to include it in"
" feature_list_dense_missing_assumed_empty or"
" feature_list_dense_defaults?"))
def testSequenceExampleBatch(self):
first = sequence_example(
feature_lists=feature_lists({
"a":
feature_list([
int64_feature([-1, 0, 1]),
int64_feature([2, 3, 4]),
int64_feature([5, 6, 7]),
int64_feature([8, 9, 10]),
])
}))
second = sequence_example(
context=features({"c": float_feature([7])}),
feature_lists=feature_lists({
"a": feature_list([
int64_feature([21, 2, 11]),
]),
"b": feature_list([
int64_feature([5]),
]),
}))
serialized = [first.SerializeToString(), second.SerializeToString()]
expected_context_output = {
"c": np.array([-1, 7], dtype=np.float32),
}
expected_feature_list_output = {
"a":
np.array(
[ # outermost dimension is example id
[ # middle dimension is time.
[[-1, 0, 1]], # inside are 1x3 matrices
[[2, 3, 4]],
[[5, 6, 7]],
[[8, 9, 10]]
],
[ # middle dimension is time.
[[21, 2, 11]], # inside are 1x3 matrices
[[0, 0, 0]], # additional entries are padded with 0
[[0, 0, 0]],
[[0, 0, 0]]
]
],
dtype=np.int64),
"b":
np.array([[0], [5]], dtype=np.int64),
"d":
np.empty(shape=(2, 0, 5), dtype=np.float32), # allowed_missing
}
self._test(
{
"example_names": ops.convert_to_tensor(["in1", "in2"]),
"serialized": ops.convert_to_tensor(serialized),
"context_features": {
"c":
parsing_ops.FixedLenFeature(
(), dtypes.float32, default_value=-1),
},
"sequence_features": {
"a":
parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
"b":
parsing_ops.FixedLenSequenceFeature(
(), dtypes.int64, allow_missing=True),
"d":
parsing_ops.FixedLenSequenceFeature(
(5,), dtypes.float32, allow_missing=True),
}
},
expected_context_values=expected_context_output,
expected_feat_list_values=expected_feature_list_output,
expected_length_values={
"a": [4, 1],
"b": [0, 1],
"d": [0, 0]
},
batch=True)
def testSerializedContainingRaggedFeatureWithNoPartitions(self):
original = [
sequence_example(
context=features({"a": float_feature([3, 4])}),
feature_lists=feature_lists({
"b": feature_list([float_feature([5]),
float_feature([3])]),
"c": feature_list([int64_feature([6, 7, 8, 9])])
})),
sequence_example(
context=features({"a": float_feature([9])}),
feature_lists=feature_lists({
"b": feature_list([]),
"c": feature_list([int64_feature([]),
int64_feature([1, 2, 3])])
})),
sequence_example(
feature_lists=feature_lists({
"b":
feature_list([
float_feature([1]),
float_feature([1, 2]),
float_feature([1, 2, 3])
])
})),
sequence_example(
context=features({"a": feature()}),
feature_lists=feature_lists({
"b": feature_list([feature()]),
"c": feature_list([int64_feature([3, 3, 3])])
}))
]
serialized = [m.SerializeToString() for m in original]
context_features = {"a": parsing_ops.RaggedFeature(dtype=dtypes.float32)}
sequence_features = {
"b":
parsing_ops.RaggedFeature(dtype=dtypes.float32),
"c":
parsing_ops.RaggedFeature(
dtype=dtypes.int64, row_splits_dtype=dtypes.int64)
}
expected_a = ragged_factory_ops.constant([[3, 4], [9], [], []],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_b = ragged_factory_ops.constant(
[[[5], [3]], [], [[1], [1, 2], [1, 2, 3]], [[]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32)
expected_c = ragged_factory_ops.constant(
[[[6, 7, 8, 9]], [[], [1, 2, 3]], [], [[3, 3, 3]]],
dtype=dtypes.int64,
row_splits_dtype=dtypes.int64)
expected_context_output = dict(a=expected_a)
expected_feature_list_output = dict(b=expected_b, c=expected_c)
self._test(
{
"serialized": ops.convert_to_tensor(serialized),
"context_features": context_features,
"sequence_features": sequence_features,
},
expected_context_output,
expected_feature_list_output,
batch=True)
self._test(
{
"serialized": ops.convert_to_tensor(serialized)[0],
"context_features": context_features,
"sequence_features": sequence_features,
},
expected_context_values={"a": [3, 4]},
expected_feat_list_values={
"b": [[5], [3]],
"c": [[6, 7, 8, 9]]
},
batch=False)
# Test with a larger batch of examples.
batch_serialized = serialized * 64
batch_context_expected_out = {
"a": ragged_concat_ops.concat([expected_a] * 64, axis=0)
}
batch_feature_list_expected_out = {
"b": ragged_concat_ops.concat([expected_b] * 64, axis=0),
"c": ragged_concat_ops.concat([expected_c] * 64, axis=0)
}
self._test(
{
"serialized": ops.convert_to_tensor(batch_serialized),
"context_features": context_features,
"sequence_features": sequence_features,
},
batch_context_expected_out,
batch_feature_list_expected_out,
batch=True)
def testSerializedContainingNestedRaggedFeature(self):
"""Test RaggedFeatures with nested partitions."""
original = [
# rt shape: [(batch), 2, None, None]
sequence_example(
context=features({
# a[0] = [[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]]
"a_values": float_feature([1, 2, 3, 4, 5, 6, 7]),
"a_lengths_axis2": int64_feature([1, 2, 0, 1]),
"a_lengths_axis3": int64_feature([1, 2, 1, 3]),
"a_splits_axis3": int64_feature([0, 1, 3, 4, 7])
}),
feature_lists=feature_lists({
# b[0] = [[[1], [2, 3, 4]], [[2, 4], [6]]]
"b_values":
feature_list(
[float_feature([1, 2, 3, 4]),
float_feature([2, 4, 6])]),
"b_splits":
feature_list(
[int64_feature([0, 1, 4]),
int64_feature([0, 2, 3])]),
})),
sequence_example(
# a[1] = []
# b[1] = []
),
sequence_example(
context=features({
# a[2] = [[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]
"a_values": float_feature([1, 2, 3, 4, 5, 6, 7, 8]),
"a_lengths_axis2": int64_feature([2, 3]),
"a_lengths_axis3": int64_feature([3, 1, 1, 1, 2]),
"a_splits_axis3": int64_feature([0, 3, 4, 5, 6, 8])
}),
feature_lists=feature_lists({
# b[2] = [[[9], [8, 7, 6], [5]], [[4, 3, 2, 1]], [[0]]]
"b_values":
feature_list([
float_feature([9, 8, 7, 6, 5]),
float_feature([4, 3, 2, 1]),
float_feature([0])
]),
"b_splits":
feature_list([
int64_feature([0, 1, 4, 5]),
int64_feature([0, 4]),
int64_feature([0, 1])
])
}))
]
serialized = [m.SerializeToString() for m in original]
context_features = {
"a":
parsing_ops.RaggedFeature(
value_key="a_values",
partitions=[
parsing_ops.RaggedFeature.UniformRowLength(2),
parsing_ops.RaggedFeature.RowLengths("a_lengths_axis2"),
parsing_ops.RaggedFeature.RowSplits("a_splits_axis3"),
],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int64,
)
}
sequence_features = {
"b":
parsing_ops.RaggedFeature(
value_key="b_values",
dtype=dtypes.float32,
partitions=[parsing_ops.RaggedFeature.RowSplits("b_splits")]),
"c":
parsing_ops.RaggedFeature(
value_key="b_values",
dtype=dtypes.float32,
partitions=[parsing_ops.RaggedFeature.UniformRowLength(1)]),
}
expected_context = {
"a":
ragged_factory_ops.constant(
[[[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]], [],
[[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int64)
}
expected_feature_list = {
"b":
ragged_factory_ops.constant(
[[[[1], [2, 3, 4]], [[2, 4], [6]]], [],
[[[9], [8, 7, 6], [5]], [[4, 3, 2, 1]], [[0]]]],
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32),
"c":
ragged_factory_ops.constant(
[[[[1], [2], [3], [4]], [[2], [4], [6]]], [],
[[[9], [8], [7], [6], [5]], [[4], [3], [2], [1]], [[0]]]],
ragged_rank=2,
dtype=dtypes.float32,
row_splits_dtype=dtypes.int32),
}
self._test(
dict(
serialized=ops.convert_to_tensor(serialized),
context_features=context_features,
sequence_features=sequence_features),
expected_context,
expected_feature_list,
batch=True)
self._test(
dict(
serialized=ops.convert_to_tensor(serialized)[0],
context_features=context_features,
sequence_features=sequence_features),
{"a": expected_context["a"][0]}, {
"b": expected_feature_list["b"][0],
"c": expected_feature_list["c"][0]
},
batch=False)
def testSerializedContainingMisalignedNestedRaggedFeature(self):
"""FeatureList with 2 value tensors but only one splits tensor."""
original = sequence_example(
feature_lists=feature_lists({
"b_values":
feature_list(
[float_feature([1, 2, 3, 4]),
float_feature([2, 4, 6])]),
"b_splits":
feature_list([int64_feature([0, 1, 4])]),
}))
sequence_features = {
"b":
parsing_ops.RaggedFeature(
value_key="b_values",
dtype=dtypes.float32,
partitions=[parsing_ops.RaggedFeature.RowSplits("b_splits")],
validate=True)
}
self._testBoth(
dict(
serialized=ops.convert_to_tensor(original.SerializeToString()),
sequence_features=sequence_features),
expected_err=(
(errors_impl.InvalidArgumentError, ValueError),
# Message for batch=true:
"Feature b: values and partitions are not aligned"
# Message for batch=false in graph mode:
"|.* do not form a valid RaggedTensor"
# Message for batch=false in eager mode:
"|Incompatible shapes"))
@test_util.run_all_in_graph_and_eager_modes
class DecodeRawTest(test.TestCase):
def _decode_v1(self, words):
with self.cached_session():
examples = np.array(words)
example_tensor = constant_op.constant(
examples, shape=examples.shape, dtype=dtypes.string)
byte_tensor = parsing_ops.decode_raw_v1(example_tensor, dtypes.uint8)
return self.evaluate(byte_tensor)
def _decode_v2(self, words, fixed_length=None):
with self.cached_session():
examples = np.array(words)
byte_tensor = parsing_ops.decode_raw(
examples, dtypes.uint8, fixed_length=fixed_length)
return self.evaluate(byte_tensor)
def _ordinalize(self, words, fixed_length=None):
outputs = []
if fixed_length is None:
fixed_length = len(words[0])
for word in words:
output = []
for i in range(fixed_length):
if i < len(word):
output.append(ord(word[i]))
else:
output.append(0)
outputs.append(output)
return np.array(outputs)
def testDecodeRawV1EqualLength(self):
words = ["string1", "string2"]
observed = self._decode_v1(words)
expected = self._ordinalize(words)
self.assertAllEqual(expected.shape, observed.shape)
self.assertAllEqual(expected, observed)
def testDecodeRawV2FallbackEqualLength(self):
words = ["string1", "string2"]
observed = self._decode_v2(words)
expected = self._ordinalize(words)
self.assertAllEqual(expected.shape, observed.shape)
self.assertAllEqual(expected, observed)
def testDecodeRawV1VariableLength(self):
words = ["string", "longer_string"]
with self.assertRaises(errors_impl.InvalidArgumentError):
self._decode_v1(words)
def testDecodeRawV2FallbackVariableLength(self):
words = ["string", "longer_string"]
with self.assertRaises(errors_impl.InvalidArgumentError):
self._decode_v2(words)
def testDecodeRawV2VariableLength(self):
words = ["string", "longer_string"]
observed = self._decode_v2(words, fixed_length=8)
expected = self._ordinalize(words, fixed_length=8)
self.assertAllEqual(expected.shape, observed.shape)
self.assertAllEqual(expected, observed)
@test_util.run_all_in_graph_and_eager_modes
class DecodeJSONExampleTest(test.TestCase):
def _testRoundTrip(self, examples):
examples = np.array(examples, dtype=np.object)
json_tensor = constant_op.constant(
[json_format.MessageToJson(m) for m in examples.flatten()],
shape=examples.shape,
dtype=dtypes.string)
binary_tensor = parsing_ops.decode_json_example(json_tensor)
binary_val = self.evaluate(binary_tensor)
if examples.shape:
self.assertShapeEqual(binary_val, json_tensor)
for input_example, output_binary in zip(
np.array(examples).flatten(), binary_val.flatten()):
output_example = example_pb2.Example()
output_example.ParseFromString(output_binary)
self.assertProtoEquals(input_example, output_example)
else:
output_example = example_pb2.Example()
output_example.ParseFromString(binary_val)
self.assertProtoEquals(examples.item(), output_example)
def testEmptyTensor(self):
self._testRoundTrip([])
self._testRoundTrip([[], [], []])
def testEmptyExamples(self):
self._testRoundTrip([example(), example(), example()])
def testDenseFeaturesScalar(self):
self._testRoundTrip(
example(features=features({"a": float_feature([1, 1, 3])})))
def testDenseFeaturesVector(self):
self._testRoundTrip([
example(features=features({"a": float_feature([1, 1, 3])})),
example(features=features({"a": float_feature([-1, -1, 2])})),
])
def testDenseFeaturesMatrix(self):
self._testRoundTrip([
[example(features=features({"a": float_feature([1, 1, 3])}))],
[example(features=features({"a": float_feature([-1, -1, 2])}))],
])
def testSparseFeatures(self):
self._testRoundTrip([
example(features=features({"st_c": float_feature([3, 4])})),
example(features=features({"st_c": float_feature([])})),
example(features=features({"st_d": feature()})),
example(
features=features({
"st_c": float_feature([1, 2, -1]),
"st_d": bytes_feature([b"hi"])
})),
])
def testSerializedContainingBytes(self):
aname = "a"
bname = "b*has+a:tricky_name"
self._testRoundTrip([
example(
features=features({
aname: float_feature([1, 1]),
bname: bytes_feature([b"b0_str"])
})),
example(
features=features({
aname: float_feature([-1, -1]),
bname: bytes_feature([b"b1"])
})),
])
def testInvalidSyntax(self):
json_tensor = constant_op.constant(["{]"])
if context.executing_eagerly():
with self.assertRaisesRegexp(errors.InvalidArgumentError,
"Error while parsing JSON"):
parsing_ops.decode_json_example(json_tensor)
else:
binary_tensor = parsing_ops.decode_json_example(json_tensor)
with self.assertRaisesOpError("Error while parsing JSON"):
self.evaluate(binary_tensor)
class ParseTensorOpTest(test.TestCase):
@test_util.run_deprecated_v1
def testToFloat32(self):
with self.cached_session():
expected = np.random.rand(3, 4, 5).astype(np.float32)
tensor_proto = tensor_util.make_tensor_proto(expected)
serialized = array_ops.placeholder(dtypes.string)
tensor = parsing_ops.parse_tensor(serialized, dtypes.float32)
result = tensor.eval(
feed_dict={serialized: tensor_proto.SerializeToString()})
self.assertAllEqual(expected, result)
@test_util.run_deprecated_v1
def testToUint8(self):
with self.cached_session():
expected = np.random.rand(3, 4, 5).astype(np.uint8)
tensor_proto = tensor_util.make_tensor_proto(expected)
serialized = array_ops.placeholder(dtypes.string)
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint8)
result = tensor.eval(
feed_dict={serialized: tensor_proto.SerializeToString()})
self.assertAllEqual(expected, result)
@test_util.run_deprecated_v1
def testTypeMismatch(self):
with self.cached_session():
expected = np.random.rand(3, 4, 5).astype(np.uint8)
tensor_proto = tensor_util.make_tensor_proto(expected)
serialized = array_ops.placeholder(dtypes.string)
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
with self.assertRaisesOpError(
r"Type mismatch between parsed tensor \(uint8\) and dtype "
r"\(uint16\)"):
tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
@test_util.run_deprecated_v1
def testInvalidInput(self):
with self.cached_session():
serialized = array_ops.placeholder(dtypes.string)
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
with self.assertRaisesOpError(
"Could not parse `serialized` as TensorProto: 'bogus'"):
tensor.eval(feed_dict={serialized: "bogus"})
with self.assertRaisesOpError(
r"Expected `serialized` to be a scalar, got shape: \[1\]"):
tensor.eval(feed_dict={serialized: ["bogus"]})
if __name__ == "__main__":
test.main()