2510 lines
89 KiB
Python
2510 lines
89 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Tests for tensorflow.ops.parsing_ops."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import copy
|
|
import itertools
|
|
|
|
import numpy as np
|
|
|
|
from google.protobuf import json_format
|
|
|
|
from tensorflow.core.example import example_pb2
|
|
from tensorflow.core.example import feature_pb2
|
|
from tensorflow.python.eager import context
|
|
from tensorflow.python.framework import constant_op
|
|
from tensorflow.python.framework import dtypes
|
|
from tensorflow.python.framework import errors
|
|
from tensorflow.python.framework import errors_impl
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.framework import sparse_tensor
|
|
from tensorflow.python.framework import tensor_shape
|
|
from tensorflow.python.framework import tensor_util
|
|
from tensorflow.python.framework import test_util
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import parsing_ops
|
|
from tensorflow.python.ops.ragged import ragged_concat_ops
|
|
from tensorflow.python.ops.ragged import ragged_factory_ops
|
|
from tensorflow.python.ops.ragged import ragged_tensor
|
|
from tensorflow.python.platform import test
|
|
from tensorflow.python.platform import tf_logging
|
|
|
|
# Helpers for creating Example objects
|
|
example = example_pb2.Example
|
|
feature = feature_pb2.Feature
|
|
features = lambda d: feature_pb2.Features(feature=d)
|
|
bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
|
|
int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
|
|
float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
|
|
# Helpers for creating SequenceExample objects
|
|
feature_list = lambda l: feature_pb2.FeatureList(feature=l)
|
|
feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
|
|
sequence_example = example_pb2.SequenceExample
|
|
|
|
|
|
def flatten(list_of_lists):
|
|
"""Flatten one level of nesting."""
|
|
return itertools.chain.from_iterable(list_of_lists)
|
|
|
|
|
|
def _compare_output_to_expected(tester, actual, expected):
|
|
tester.assertEqual(set(actual.keys()), set(expected.keys()))
|
|
for k, v in actual.items():
|
|
expected_v = expected[k]
|
|
tf_logging.info("Comparing key: %s", k)
|
|
if isinstance(v, sparse_tensor.SparseTensor):
|
|
tester.assertTrue(isinstance(expected_v, tuple))
|
|
tester.assertLen(expected_v, 3)
|
|
tester.assertAllEqual(v.indices, expected_v[0])
|
|
tester.assertAllEqual(v.values, expected_v[1])
|
|
tester.assertAllEqual(v.dense_shape, expected_v[2])
|
|
else:
|
|
tester.assertAllEqual(v, expected_v)
|
|
|
|
|
|
@test_util.run_all_in_graph_and_eager_modes
|
|
class ParseExampleTest(test.TestCase):
|
|
|
|
def _test(self, kwargs, expected_values=None, expected_err=None):
|
|
if expected_err:
|
|
if not context.executing_eagerly():
|
|
with self.assertRaisesWithPredicateMatch(expected_err[0],
|
|
expected_err[1]):
|
|
self.evaluate(parsing_ops.parse_example(**kwargs))
|
|
else:
|
|
with self.assertRaises(Exception):
|
|
parsing_ops.parse_example(**kwargs)
|
|
return
|
|
else:
|
|
out = parsing_ops.parse_example(**kwargs)
|
|
_compare_output_to_expected(self, out, expected_values)
|
|
|
|
# Check shapes; if serialized is a Tensor we need its size to
|
|
# properly check.
|
|
serialized = kwargs["serialized"]
|
|
batch_size = (
|
|
self.evaluate(serialized).size
|
|
if isinstance(serialized, ops.Tensor) else np.asarray(serialized).size)
|
|
for k, f in kwargs["features"].items():
|
|
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
|
|
self.assertEqual(tuple(out[k].shape.as_list()), (batch_size,) + f.shape)
|
|
elif isinstance(f, parsing_ops.VarLenFeature):
|
|
if context.executing_eagerly():
|
|
out[k].indices.shape.assert_is_compatible_with([None, 2])
|
|
out[k].values.shape.assert_is_compatible_with([None])
|
|
out[k].dense_shape.shape.assert_is_compatible_with([2])
|
|
else:
|
|
self.assertEqual(out[k].indices.shape.as_list(), [None, 2])
|
|
self.assertEqual(out[k].values.shape.as_list(), [None])
|
|
self.assertEqual(out[k].dense_shape.shape.as_list(), [2])
|
|
|
|
def testEmptySerializedWithAllDefaults(self):
|
|
sparse_name = "st_a"
|
|
a_name = "a"
|
|
b_name = "b"
|
|
c_name = "c:has_a_tricky_name"
|
|
a_default = [0, 42, 0]
|
|
b_default = np.random.rand(3, 3).astype(bytes)
|
|
c_default = np.random.rand(2).astype(np.float32)
|
|
|
|
expected_st_a = ( # indices, values, shape
|
|
np.empty((0, 2), dtype=np.int64), # indices
|
|
np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
|
|
np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
|
|
|
|
expected_output = {
|
|
sparse_name: expected_st_a,
|
|
a_name: np.array(2 * [[a_default]]),
|
|
b_name: np.array(2 * [b_default]),
|
|
c_name: np.array(2 * [c_default]),
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": np.empty((0,), dtype=bytes),
|
|
"serialized": ops.convert_to_tensor(["", ""]),
|
|
"features": {
|
|
sparse_name:
|
|
parsing_ops.VarLenFeature(dtypes.int64),
|
|
a_name:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 3), dtypes.int64, default_value=a_default),
|
|
b_name:
|
|
parsing_ops.FixedLenFeature(
|
|
(3, 3), dtypes.string, default_value=b_default),
|
|
c_name:
|
|
parsing_ops.FixedLenFeature(
|
|
(2,), dtypes.float32, default_value=c_default),
|
|
}
|
|
}, expected_output)
|
|
|
|
def testEmptySerializedWithoutDefaultsShouldFail(self):
|
|
input_features = {
|
|
"st_a":
|
|
parsing_ops.VarLenFeature(dtypes.int64),
|
|
"a":
|
|
parsing_ops.FixedLenFeature((1, 3),
|
|
dtypes.int64,
|
|
default_value=[0, 42, 0]),
|
|
"b":
|
|
parsing_ops.FixedLenFeature(
|
|
(3, 3),
|
|
dtypes.string,
|
|
default_value=np.random.rand(3, 3).astype(bytes)),
|
|
# Feature "c" is missing a default, this gap will cause failure.
|
|
"c":
|
|
parsing_ops.FixedLenFeature((2,), dtype=dtypes.float32),
|
|
}
|
|
|
|
# Edge case where the key is there but the feature value is empty
|
|
original = example(features=features({"c": feature()}))
|
|
self._test(
|
|
{
|
|
"example_names": ["in1"],
|
|
"serialized": [original.SerializeToString()],
|
|
"features": input_features,
|
|
},
|
|
expected_err=(
|
|
errors_impl.OpError,
|
|
"Name: in1, Feature: c \\(data type: float\\) is required"))
|
|
|
|
# Standard case of missing key and value.
|
|
self._test(
|
|
{
|
|
"example_names": ["in1", "in2"],
|
|
"serialized": ["", ""],
|
|
"features": input_features,
|
|
},
|
|
expected_err=(
|
|
errors_impl.OpError,
|
|
"Name: in1, Feature: c \\(data type: float\\) is required"))
|
|
|
|
def testDenseNotMatchingShapeShouldFail(self):
|
|
original = [
|
|
example(features=features({
|
|
"a": float_feature([1, 1, 3]),
|
|
})),
|
|
example(features=features({
|
|
"a": float_feature([-1, -1]),
|
|
}))
|
|
]
|
|
|
|
names = ["passing", "failing"]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
self._test(
|
|
{
|
|
"example_names": names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
|
|
}
|
|
},
|
|
expected_err=(errors_impl.OpError,
|
|
"Name: failing, Key: a, Index: 1. Number of float val"))
|
|
|
|
def testDenseDefaultNoShapeShouldFail(self):
|
|
original = [
|
|
example(features=features({
|
|
"a": float_feature([1, 1, 3]),
|
|
})),
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
self._test(
|
|
{
|
|
"example_names": ["failing"],
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"a": parsing_ops.FixedLenFeature(None, dtypes.float32)
|
|
}
|
|
},
|
|
expected_err=(ValueError, "Missing shape for feature a"))
|
|
|
|
def testSerializedContainingSparse(self):
|
|
original = [
|
|
example(features=features({"st_c": float_feature([3, 4])})),
|
|
example(
|
|
features=features({
|
|
"st_c": float_feature([]), # empty float list
|
|
})),
|
|
example(
|
|
features=features({
|
|
"st_d": feature(), # feature with nothing in it
|
|
})),
|
|
example(
|
|
features=features({
|
|
"st_c": float_feature([1, 2, -1]),
|
|
"st_d": bytes_feature([b"hi"])
|
|
}))
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_st_c = ( # indices, values, shape
|
|
np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
|
|
np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
|
|
np.array([4, 3], dtype=np.int64)) # batch == 2, max_elems = 3
|
|
|
|
expected_st_d = ( # indices, values, shape
|
|
np.array([[3, 0]], dtype=np.int64), np.array(["hi"], dtype=bytes),
|
|
np.array([4, 1], dtype=np.int64)) # batch == 2, max_elems = 1
|
|
|
|
expected_output = {
|
|
"st_c": expected_st_c,
|
|
"st_d": expected_st_d,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"st_c": parsing_ops.VarLenFeature(dtypes.float32),
|
|
"st_d": parsing_ops.VarLenFeature(dtypes.string)
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingSparseFeature(self):
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"val": float_feature([3, 4]),
|
|
"idx": int64_feature([5, 10])
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": float_feature([]), # empty float list
|
|
"idx": int64_feature([])
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": feature(), # feature with nothing in it
|
|
# missing idx feature
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": float_feature([1, 2, -1]),
|
|
"idx":
|
|
int64_feature([0, 9, 3]) # unsorted
|
|
}))
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_sp = ( # indices, values, shape
|
|
np.array([[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
|
|
np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
|
|
np.array([4, 13], dtype=np.int64)) # batch == 4, max_elems = 13
|
|
|
|
expected_output = {
|
|
"sp": expected_sp,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"sp":
|
|
parsing_ops.SparseFeature(["idx"], "val", dtypes.float32,
|
|
[13])
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingSparseFeatureReuse(self):
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"val1": float_feature([3, 4]),
|
|
"val2": float_feature([5, 6]),
|
|
"idx": int64_feature([5, 10])
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val1": float_feature([]), # empty float list
|
|
"idx": int64_feature([])
|
|
})),
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_sp1 = ( # indices, values, shape
|
|
np.array([[0, 5], [0, 10]],
|
|
dtype=np.int64), np.array([3.0, 4.0], dtype=np.float32),
|
|
np.array([2, 13], dtype=np.int64)) # batch == 2, max_elems = 13
|
|
|
|
expected_sp2 = ( # indices, values, shape
|
|
np.array([[0, 5], [0, 10]],
|
|
dtype=np.int64), np.array([5.0, 6.0], dtype=np.float32),
|
|
np.array([2, 7], dtype=np.int64)) # batch == 2, max_elems = 13
|
|
|
|
expected_output = {
|
|
"sp1": expected_sp1,
|
|
"sp2": expected_sp2,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"sp1":
|
|
parsing_ops.SparseFeature("idx", "val1", dtypes.float32,
|
|
13),
|
|
"sp2":
|
|
parsing_ops.SparseFeature(
|
|
"idx",
|
|
"val2",
|
|
dtypes.float32,
|
|
size=7,
|
|
already_sorted=True)
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContaining3DSparseFeature(self):
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"val": float_feature([3, 4]),
|
|
"idx0": int64_feature([5, 10]),
|
|
"idx1": int64_feature([0, 2]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": float_feature([]), # empty float list
|
|
"idx0": int64_feature([]),
|
|
"idx1": int64_feature([]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": feature(), # feature with nothing in it
|
|
# missing idx feature
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": float_feature([1, 2, -1]),
|
|
"idx0": int64_feature([0, 9, 3]), # unsorted
|
|
"idx1": int64_feature([1, 0, 2]),
|
|
}))
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_sp = (
|
|
# indices
|
|
np.array([[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
|
|
dtype=np.int64),
|
|
# values
|
|
np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
|
|
# shape batch == 4, max_elems = 13
|
|
np.array([4, 13, 3], dtype=np.int64))
|
|
|
|
expected_output = {
|
|
"sp": expected_sp,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"sp":
|
|
parsing_ops.SparseFeature(["idx0", "idx1"], "val",
|
|
dtypes.float32, [13, 3])
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingDense(self):
|
|
aname = "a"
|
|
bname = "b*has+a:tricky_name"
|
|
original = [
|
|
example(
|
|
features=features({
|
|
aname: float_feature([1, 1]),
|
|
bname: bytes_feature([b"b0_str"]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([-1, -1]),
|
|
bname: bytes_feature([b""]),
|
|
}))
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_output = {
|
|
aname:
|
|
np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
|
|
bname:
|
|
np.array(["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
|
|
}
|
|
|
|
# No defaults, values required
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 2, 1), dtype=dtypes.float32),
|
|
bname:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 1, 1, 1), dtype=dtypes.string),
|
|
}
|
|
}, expected_output)
|
|
|
|
# This test is identical as the previous one except
|
|
# for the creation of 'serialized'.
|
|
def testSerializedContainingDenseWithConcat(self):
|
|
aname = "a"
|
|
bname = "b*has+a:tricky_name"
|
|
# TODO(lew): Feature appearing twice should be an error in future.
|
|
original = [
|
|
(example(features=features({
|
|
aname: float_feature([10, 10]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([1, 1]),
|
|
bname: bytes_feature([b"b0_str"]),
|
|
}))),
|
|
(
|
|
example(features=features({
|
|
bname: bytes_feature([b"b100"]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([-1, -1]),
|
|
bname: bytes_feature([b"b1"]),
|
|
})),
|
|
),
|
|
]
|
|
|
|
serialized = [
|
|
m.SerializeToString() + n.SerializeToString() for (m, n) in original
|
|
]
|
|
|
|
expected_output = {
|
|
aname:
|
|
np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
|
|
bname:
|
|
np.array(["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
|
|
}
|
|
|
|
# No defaults, values required
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 2, 1), dtype=dtypes.float32),
|
|
bname:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 1, 1, 1), dtype=dtypes.string),
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingDenseScalar(self):
|
|
original = [
|
|
example(features=features({
|
|
"a": float_feature([1]),
|
|
})),
|
|
example(features=features({}))
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_output = {
|
|
"a":
|
|
np.array([[1], [-1]], dtype=np.float32) # 2x1 (column vector)
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"a":
|
|
parsing_ops.FixedLenFeature(
|
|
(1,), dtype=dtypes.float32, default_value=-1),
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingDenseWithDefaults(self):
|
|
original = [
|
|
example(features=features({
|
|
"a": float_feature([1, 1]),
|
|
})),
|
|
example(features=features({
|
|
"b": bytes_feature([b"b1"]),
|
|
})),
|
|
example(features=features({"b": feature()})),
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_output = {
|
|
"a":
|
|
np.array([[1, 1], [3, -3], [3, -3]],
|
|
dtype=np.float32).reshape(3, 1, 2, 1),
|
|
"b":
|
|
np.array(["tmp_str", "b1", "tmp_str"],
|
|
dtype=bytes).reshape(3, 1, 1, 1, 1),
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"a":
|
|
parsing_ops.FixedLenFeature((1, 2, 1),
|
|
dtype=dtypes.float32,
|
|
default_value=[3.0, -3.0]),
|
|
"b":
|
|
parsing_ops.FixedLenFeature((1, 1, 1, 1),
|
|
dtype=dtypes.string,
|
|
default_value="tmp_str"),
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
|
|
expected_st_a = ( # indices, values, shape
|
|
np.empty((0, 2), dtype=np.int64), # indices
|
|
np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
|
|
np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
|
|
expected_sp = ( # indices, values, shape
|
|
np.array([[0, 0], [0, 3], [1, 7]],
|
|
dtype=np.int64), np.array(["a", "b", "c"], dtype="|S"),
|
|
np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
|
|
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"c": float_feature([3, 4]),
|
|
"val": bytes_feature([b"a", b"b"]),
|
|
"idx": int64_feature([0, 3])
|
|
})),
|
|
example(
|
|
features=features({
|
|
"c": float_feature([1, 2]),
|
|
"val": bytes_feature([b"c"]),
|
|
"idx": int64_feature([7])
|
|
}))
|
|
]
|
|
|
|
names = ["in1", "in2"]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
a_default = [1, 2, 3]
|
|
b_default = np.random.rand(3, 3).astype(bytes)
|
|
expected_output = {
|
|
"st_a": expected_st_a,
|
|
"sp": expected_sp,
|
|
"a": np.array(2 * [[a_default]]),
|
|
"b": np.array(2 * [b_default]),
|
|
"c": np.array([[3, 4], [1, 2]], dtype=np.float32),
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"st_a":
|
|
parsing_ops.VarLenFeature(dtypes.int64),
|
|
"sp":
|
|
parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
|
|
"a":
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 3), dtypes.int64, default_value=a_default),
|
|
"b":
|
|
parsing_ops.FixedLenFeature(
|
|
(3, 3), dtypes.string, default_value=b_default),
|
|
# Feature "c" must be provided, since it has no default_value.
|
|
"c":
|
|
parsing_ops.FixedLenFeature((2,), dtypes.float32),
|
|
}
|
|
},
|
|
expected_output)
|
|
|
|
def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
|
|
expected_idx = ( # indices, values, shape
|
|
np.array([[0, 0], [0, 1], [1, 0], [1, 1]],
|
|
dtype=np.int64), np.array([0, 3, 7, 1]),
|
|
np.array([2, 2], dtype=np.int64)) # batch == 4, max_elems = 2
|
|
|
|
expected_sp = ( # indices, values, shape
|
|
np.array([[0, 0], [0, 3], [1, 1], [1, 7]],
|
|
dtype=np.int64), np.array(["a", "b", "d", "c"], dtype="|S"),
|
|
np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
|
|
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"val": bytes_feature([b"a", b"b"]),
|
|
"idx": int64_feature([0, 3])
|
|
})),
|
|
example(
|
|
features=features({
|
|
"val": bytes_feature([b"c", b"d"]),
|
|
"idx": int64_feature([7, 1])
|
|
}))
|
|
]
|
|
|
|
names = ["in1", "in2"]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_output = {
|
|
"idx": expected_idx,
|
|
"sp": expected_sp,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
"idx":
|
|
parsing_ops.VarLenFeature(dtypes.int64),
|
|
"sp":
|
|
parsing_ops.SparseFeature(["idx"], "val", dtypes.string,
|
|
[13]),
|
|
}
|
|
}, expected_output)
|
|
|
|
def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size):
|
|
# During parsing, data read from the serialized proto is stored in buffers.
|
|
# For small batch sizes, a buffer will contain one minibatch entry.
|
|
# For larger batch sizes, a buffer may contain several minibatch
|
|
# entries. This test identified a bug where the code that copied
|
|
# data out of the buffers and into the output tensors assumed each
|
|
# buffer only contained one minibatch entry. The bug has since been fixed.
|
|
truth_int = [i for i in range(batch_size)]
|
|
truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()]
|
|
for i in range(batch_size)]
|
|
|
|
expected_str = copy.deepcopy(truth_str)
|
|
|
|
# Delete some intermediate entries. (Skip the first entry, to ensure that
|
|
# we have at least one entry with length 2, to get the expected padding.)
|
|
for i in range(1, batch_size):
|
|
col = 1
|
|
if np.random.rand() < 0.25:
|
|
# w.p. 25%, drop out the second entry
|
|
expected_str[i][col] = b"default"
|
|
col -= 1
|
|
truth_str[i].pop()
|
|
if np.random.rand() < 0.25:
|
|
# w.p. 25%, drop out the second entry (possibly again)
|
|
expected_str[i][col] = b"default"
|
|
truth_str[i].pop()
|
|
|
|
expected_output = {
|
|
# Batch size batch_size, 1 time step.
|
|
"a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1),
|
|
# Batch size batch_size, 2 time steps.
|
|
"b": np.array(expected_str, dtype="|S").reshape(batch_size, 2),
|
|
}
|
|
|
|
original = [
|
|
example(
|
|
features=features({
|
|
"a": int64_feature([truth_int[i]]),
|
|
"b": bytes_feature(truth_str[i])
|
|
})) for i in range(batch_size)
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
self._test(
|
|
{
|
|
"serialized":
|
|
ops.convert_to_tensor(serialized, dtype=dtypes.string),
|
|
"features": {
|
|
"a":
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=(),
|
|
dtype=dtypes.int64,
|
|
allow_missing=True,
|
|
default_value=-1),
|
|
"b":
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[],
|
|
dtype=dtypes.string,
|
|
allow_missing=True,
|
|
default_value="default"),
|
|
}
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingVarLenDenseLargerBatch(self):
|
|
np.random.seed(3456)
|
|
for batch_size in (1, 10, 20, 100, 256):
|
|
self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
|
|
|
|
def testSerializedContainingVarLenDense(self):
|
|
aname = "a"
|
|
bname = "b"
|
|
cname = "c"
|
|
dname = "d"
|
|
example_names = ["in1", "in2", "in3", "in4"]
|
|
original = [
|
|
example(features=features({
|
|
cname: int64_feature([2]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([1, 1]),
|
|
bname: bytes_feature([b"b0_str", b"b1_str"]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([-1, -1, 2, 2]),
|
|
bname: bytes_feature([b"b1"]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([]),
|
|
cname: int64_feature([3]),
|
|
})),
|
|
]
|
|
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
expected_output = {
|
|
aname:
|
|
np.array([
|
|
[0, 0, 0, 0],
|
|
[1, 1, 0, 0],
|
|
[-1, -1, 2, 2],
|
|
[0, 0, 0, 0],
|
|
],
|
|
dtype=np.float32).reshape(4, 2, 2, 1),
|
|
bname:
|
|
np.array([["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
|
|
dtype=bytes).reshape(4, 2, 1, 1, 1),
|
|
cname:
|
|
np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1),
|
|
dname:
|
|
np.empty(shape=(4, 0), dtype=bytes),
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1), dtype=dtypes.float32, allow_missing=True),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
cname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.int64, allow_missing=True),
|
|
dname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
}, expected_output)
|
|
|
|
# Test with padding values.
|
|
expected_output_custom_padding = dict(expected_output)
|
|
expected_output_custom_padding[aname] = np.array([
|
|
[-2, -2, -2, -2],
|
|
[1, 1, -2, -2],
|
|
[-1, -1, 2, 2],
|
|
[-2, -2, -2, -2],
|
|
],
|
|
dtype=np.float32).reshape(
|
|
4, 2, 2, 1)
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenSequenceFeature((2, 1),
|
|
dtype=dtypes.float32,
|
|
allow_missing=True,
|
|
default_value=-2.0),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
cname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.int64, allow_missing=True),
|
|
dname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
}, expected_output_custom_padding)
|
|
|
|
# Change number of required values so the inputs are not a
|
|
# multiple of this size.
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1), dtype=dtypes.float32, allow_missing=True),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
},
|
|
expected_err=(
|
|
errors_impl.OpError, "Name: in3, Key: b, Index: 2. "
|
|
"Number of bytes values is not a multiple of stride length."))
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenSequenceFeature((2, 1),
|
|
dtype=dtypes.float32,
|
|
allow_missing=True,
|
|
default_value=[]),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
},
|
|
expected_err=(ValueError,
|
|
"Cannot reshape a tensor with 0 elements to shape"))
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenFeature(
|
|
(None, 2, 1), dtype=dtypes.float32),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
},
|
|
expected_err=(ValueError,
|
|
"First dimension of shape for feature a unknown. "
|
|
"Consider using FixedLenSequenceFeature."))
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
cname:
|
|
parsing_ops.FixedLenFeature(
|
|
(1, None), dtype=dtypes.int64, default_value=[[1]]),
|
|
}
|
|
},
|
|
expected_err=(ValueError,
|
|
"All dimensions of shape for feature c need to be known "
|
|
r"but received \(1, None\)."))
|
|
|
|
self._test(
|
|
{
|
|
"example_names": example_names,
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": {
|
|
aname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(2, 1), dtype=dtypes.float32, allow_missing=True),
|
|
bname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(1, 1, 1), dtype=dtypes.string, allow_missing=True),
|
|
cname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.int64, allow_missing=False),
|
|
dname:
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
shape=[], dtype=dtypes.string, allow_missing=True),
|
|
}
|
|
},
|
|
expected_err=(ValueError,
|
|
"Unsupported: FixedLenSequenceFeature requires "
|
|
"allow_missing to be True."))
|
|
|
|
def testSerializedContainingRaggedFeatureWithNoPartitions(self):
|
|
original = [
|
|
example(features=features({"rt_c": float_feature([3, 4])})),
|
|
example(
|
|
features=features({
|
|
"rt_c": float_feature([]), # empty float list
|
|
})),
|
|
example(
|
|
features=features({
|
|
"rt_d": feature(), # feature with nothing in it
|
|
})),
|
|
example(
|
|
features=features({
|
|
"rt_c": float_feature([1, 2, -1]),
|
|
"rt_d": bytes_feature([b"hi"])
|
|
}))
|
|
]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
test_features = {
|
|
"rt_c":
|
|
parsing_ops.RaggedFeature(dtype=dtypes.float32),
|
|
"rt_d":
|
|
parsing_ops.RaggedFeature(
|
|
dtype=dtypes.string, row_splits_dtype=dtypes.int64)
|
|
}
|
|
|
|
expected_rt_c = ragged_factory_ops.constant(
|
|
[[3.0, 4.0], [], [], [1.0, 2.0, -1.0]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
expected_rt_d = ragged_factory_ops.constant([[], [], [], [b"hi"]])
|
|
|
|
expected_output = {
|
|
"rt_c": expected_rt_c,
|
|
"rt_d": expected_rt_d,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": test_features
|
|
}, expected_output)
|
|
|
|
# Test with a large enough batch to ensure that the minibatch size is >1.
|
|
batch_serialized = serialized * 64
|
|
self.assertEqual(expected_rt_c.row_splits.dtype, np.int32)
|
|
batch_expected_out = {
|
|
"rt_c": ragged_concat_ops.concat([expected_rt_c] * 64, axis=0),
|
|
"rt_d": ragged_concat_ops.concat([expected_rt_d] * 64, axis=0)
|
|
}
|
|
self.assertEqual(batch_expected_out["rt_c"].row_splits.dtype, dtypes.int32)
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(batch_serialized),
|
|
"features": test_features
|
|
}, batch_expected_out)
|
|
|
|
def testSerializedContainingRaggedFeature(self):
|
|
original = [
|
|
example(
|
|
features=features({
|
|
# rt = [[3], [4, 5, 6]]
|
|
"rt_values": float_feature([3, 4, 5, 6]),
|
|
"rt_splits": int64_feature([0, 1, 4]),
|
|
"rt_lengths": int64_feature([1, 3]),
|
|
"rt_starts": int64_feature([0, 1]),
|
|
"rt_limits": int64_feature([1, 4]),
|
|
"rt_rowids": int64_feature([0, 1, 1, 1]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
# rt = []
|
|
"rt_values": float_feature([]),
|
|
"rt_splits": int64_feature([0]),
|
|
"rt_lengths": int64_feature([]),
|
|
"rt_starts": int64_feature([]),
|
|
"rt_limits": int64_feature([]),
|
|
"rt_rowids": int64_feature([]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
# rt = []
|
|
"rt_values": feature(), # feature with nothing in it
|
|
"rt_splits": int64_feature([0]),
|
|
"rt_lengths": feature(),
|
|
"rt_starts": feature(),
|
|
"rt_limits": feature(),
|
|
"rt_rowids": feature(),
|
|
})),
|
|
example(
|
|
features=features({
|
|
# rt = [[1.0, 2.0, -1.0], [], [8.0, 9.0], [5.0]]
|
|
"rt_values": float_feature([1, 2, -1, 8, 9, 5]),
|
|
"rt_splits": int64_feature([0, 3, 3, 5, 6]),
|
|
"rt_lengths": int64_feature([3, 0, 2, 1]),
|
|
"rt_starts": int64_feature([0, 3, 3, 5]),
|
|
"rt_limits": int64_feature([3, 3, 5, 6]),
|
|
"rt_rowids": int64_feature([0, 0, 0, 2, 2, 3]),
|
|
}))
|
|
]
|
|
serialized = ops.convert_to_tensor(
|
|
[m.SerializeToString() for m in original])
|
|
|
|
test_features = {
|
|
"rt1":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowSplits("rt_splits")],
|
|
dtype=dtypes.float32),
|
|
"rt2":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowLengths("rt_lengths")],
|
|
dtype=dtypes.float32),
|
|
"rt3":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowStarts("rt_starts")],
|
|
dtype=dtypes.float32),
|
|
"rt4":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowLimits("rt_limits")],
|
|
dtype=dtypes.float32),
|
|
"rt5":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.ValueRowIds("rt_rowids")],
|
|
dtype=dtypes.float32),
|
|
"uniform1":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.UniformRowLength(2)],
|
|
dtype=dtypes.float32),
|
|
"uniform2":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[
|
|
parsing_ops.RaggedFeature.UniformRowLength(2),
|
|
parsing_ops.RaggedFeature.RowSplits("rt_splits")
|
|
],
|
|
dtype=dtypes.float32),
|
|
}
|
|
|
|
expected_rt = ragged_factory_ops.constant(
|
|
[[[3], [4, 5, 6]], [], [], [[1, 2, -1], [], [8, 9], [5]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
|
|
expected_uniform1 = ragged_factory_ops.constant(
|
|
[[[3, 4], [5, 6]], [], [], [[1, 2], [-1, 8], [9, 5]]],
|
|
ragged_rank=1,
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
|
|
expected_uniform2 = ragged_factory_ops.constant(
|
|
[[[[3], [4, 5, 6]]], [], [], [[[1, 2, -1], []], [[8, 9], [5]]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
|
|
expected_output = {
|
|
"rt1": expected_rt,
|
|
"rt2": expected_rt,
|
|
"rt3": expected_rt,
|
|
"rt4": expected_rt,
|
|
"rt5": expected_rt,
|
|
"uniform1": expected_uniform1,
|
|
"uniform2": expected_uniform2,
|
|
}
|
|
|
|
self._test({
|
|
"serialized": serialized,
|
|
"features": test_features
|
|
}, expected_output)
|
|
|
|
def testSerializedContainingNestedRaggedFeature(self):
|
|
"""Test RaggedFeature with 3 partitions."""
|
|
original = [
|
|
# rt shape: [(batch), 2, None, None]
|
|
example(
|
|
features=features({
|
|
# rt = [[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]]
|
|
"rt_values": float_feature([1, 2, 3, 4, 5, 6, 7]),
|
|
"lengths_axis2": int64_feature([1, 2, 0, 1]),
|
|
"lengths_axis3": int64_feature([1, 2, 1, 3]),
|
|
"splits_axis3": int64_feature([0, 1, 3, 4, 7]),
|
|
})),
|
|
example(
|
|
features=features({
|
|
# rt = [[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]
|
|
"rt_values": float_feature([1, 2, 3, 4, 5, 6, 7, 8]),
|
|
"lengths_axis2": int64_feature([2, 3]),
|
|
"lengths_axis3": int64_feature([3, 1, 1, 1, 2]),
|
|
"splits_axis3": int64_feature([0, 3, 4, 5, 6, 8]),
|
|
}))
|
|
]
|
|
serialized = ops.convert_to_tensor(
|
|
[m.SerializeToString() for m in original])
|
|
|
|
test_features = {
|
|
"rt1":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[
|
|
parsing_ops.RaggedFeature.UniformRowLength(2),
|
|
parsing_ops.RaggedFeature.RowLengths("lengths_axis2"),
|
|
parsing_ops.RaggedFeature.RowSplits("splits_axis3"),
|
|
],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int64,
|
|
),
|
|
}
|
|
|
|
expected_rt = ragged_factory_ops.constant(
|
|
[[[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]],
|
|
[[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int64)
|
|
|
|
expected_output = {
|
|
"rt1": expected_rt,
|
|
}
|
|
|
|
self._test({
|
|
"serialized": serialized,
|
|
"features": test_features
|
|
}, expected_output)
|
|
|
|
|
|
@test_util.run_all_in_graph_and_eager_modes
|
|
class ParseSingleExampleTest(test.TestCase):
|
|
|
|
def _test(self, kwargs, expected_values=None, expected_err=None):
|
|
if expected_err:
|
|
with self.assertRaisesWithPredicateMatch(expected_err[0],
|
|
expected_err[1]):
|
|
self.evaluate(parsing_ops.parse_single_example(**kwargs))
|
|
else:
|
|
out = parsing_ops.parse_single_example(**kwargs)
|
|
_compare_output_to_expected(self, out, expected_values)
|
|
|
|
# Check shapes.
|
|
for k, f in kwargs["features"].items():
|
|
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
|
|
self.assertEqual(
|
|
tuple(out[k].get_shape()), tensor_shape.as_shape(f.shape))
|
|
elif isinstance(f, parsing_ops.VarLenFeature):
|
|
if context.executing_eagerly():
|
|
self.assertEqual(tuple(out[k].indices.shape.as_list()), (2, 1))
|
|
self.assertEqual(tuple(out[k].values.shape.as_list()), (2,))
|
|
self.assertEqual(tuple(out[k].dense_shape.shape.as_list()), (1,))
|
|
else:
|
|
self.assertEqual(tuple(out[k].indices.shape.as_list()), (None, 1))
|
|
self.assertEqual(tuple(out[k].values.shape.as_list()), (None,))
|
|
self.assertEqual(tuple(out[k].dense_shape.shape.as_list()), (1,))
|
|
|
|
def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
|
|
original = example(
|
|
features=features({
|
|
"c": float_feature([3, 4]),
|
|
"d": float_feature([0.0, 1.0]),
|
|
"val": bytes_feature([b"a", b"b"]),
|
|
"idx": int64_feature([0, 3]),
|
|
"st_a": float_feature([3.0, 4.0])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
a_default = [1, 2, 3]
|
|
b_default = np.random.rand(3, 3).astype(bytes)
|
|
test_features = {
|
|
"st_a":
|
|
parsing_ops.VarLenFeature(dtypes.float32),
|
|
"sp":
|
|
parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
|
|
"a":
|
|
parsing_ops.FixedLenFeature((1, 3),
|
|
dtypes.int64,
|
|
default_value=a_default),
|
|
"b":
|
|
parsing_ops.FixedLenFeature((3, 3),
|
|
dtypes.string,
|
|
default_value=b_default),
|
|
# Feature "c" must be provided, since it has no default_value.
|
|
"c":
|
|
parsing_ops.FixedLenFeature(2, dtypes.float32),
|
|
"d":
|
|
parsing_ops.FixedLenSequenceFeature([],
|
|
dtypes.float32,
|
|
allow_missing=True)
|
|
}
|
|
|
|
expected_st_a = (
|
|
np.array([[0], [1]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0], dtype=np.float32), # values
|
|
np.array([2], dtype=np.int64)) # shape: max_values = 2
|
|
|
|
expected_sp = ( # indices, values, shape
|
|
np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype="|S"),
|
|
np.array([13], dtype=np.int64)) # max_values = 13
|
|
|
|
expected_output = {
|
|
"st_a": expected_st_a,
|
|
"sp": expected_sp,
|
|
"a": [a_default],
|
|
"b": b_default,
|
|
"c": np.array([3, 4], dtype=np.float32),
|
|
"d": np.array([0.0, 1.0], dtype=np.float32),
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": ops.convert_to_tensor("in1"),
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": test_features,
|
|
}, expected_output)
|
|
|
|
# Note: if example_names is None, then a different code-path gets used.
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": test_features,
|
|
}, expected_output)
|
|
|
|
def testSingleExampleWithAllFeatureTypes(self):
|
|
original = example(
|
|
features=features({
|
|
# FixLen features
|
|
"c": float_feature([3, 4]),
|
|
"d": float_feature([0.0, 1.0]),
|
|
# Sparse features
|
|
"val": bytes_feature([b"a", b"b"]), # for sp
|
|
"idx": int64_feature([0, 3]), # for sp
|
|
"st_a": float_feature([3.0, 4.0]),
|
|
# Ragged features
|
|
"rt_1d": float_feature([3.0, 4.0]),
|
|
"rt_values": float_feature([5, 6, 7]), # for rt_2d
|
|
"rt_splits": int64_feature([0, 1, 1, 3]), # for rt_2d
|
|
"rt_lengths": int64_feature([1, 0, 2]), # for rt_2d
|
|
"rt_starts": int64_feature([0, 1, 1]), # for rt_2d
|
|
"rt_limits": int64_feature([1, 1, 3]), # for rt_2d
|
|
"rt_rowids": int64_feature([0, 2, 2]), # for rt_2d
|
|
"rt_splits2": int64_feature([0, 2, 3]), # for rt_3d
|
|
}))
|
|
serialized = original.SerializeToString()
|
|
|
|
a_default = [1, 2, 3]
|
|
b_default = np.random.rand(3, 3).astype(bytes)
|
|
test_features = {
|
|
"st_a":
|
|
parsing_ops.VarLenFeature(dtypes.float32),
|
|
"sp":
|
|
parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
|
|
"a":
|
|
parsing_ops.FixedLenFeature((1, 3),
|
|
dtypes.int64,
|
|
default_value=a_default),
|
|
"b":
|
|
parsing_ops.FixedLenFeature((3, 3),
|
|
dtypes.string,
|
|
default_value=b_default),
|
|
# Feature "c" must be provided, since it has no default_value.
|
|
"c":
|
|
parsing_ops.FixedLenFeature(2, dtypes.float32),
|
|
"d":
|
|
parsing_ops.FixedLenSequenceFeature([],
|
|
dtypes.float32,
|
|
allow_missing=True),
|
|
"rt_1d":
|
|
parsing_ops.RaggedFeature(dtypes.float32),
|
|
"rt_2d_with_splits":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowSplits("rt_splits")],
|
|
dtype=dtypes.float32),
|
|
"rt_2d_with_lengths":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowLengths("rt_lengths")],
|
|
dtype=dtypes.float32),
|
|
"rt_2d_with_starts":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowStarts("rt_starts")],
|
|
dtype=dtypes.float32),
|
|
"rt_2d_with_limits":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.RowLimits("rt_limits")],
|
|
dtype=dtypes.float32),
|
|
"rt_2d_with_rowids":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.ValueRowIds("rt_rowids")],
|
|
dtype=dtypes.float32),
|
|
"rt_2d_with_uniform_row_length":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[parsing_ops.RaggedFeature.UniformRowLength(1)],
|
|
dtype=dtypes.float32),
|
|
"rt_3d":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[
|
|
parsing_ops.RaggedFeature.RowSplits("rt_splits2"),
|
|
parsing_ops.RaggedFeature.RowSplits("rt_splits")
|
|
],
|
|
dtype=dtypes.float32),
|
|
"rt_3d_with_uniform_row_length":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="rt_values",
|
|
partitions=[
|
|
parsing_ops.RaggedFeature.UniformRowLength(1),
|
|
parsing_ops.RaggedFeature.RowSplits("rt_splits")
|
|
],
|
|
dtype=dtypes.float32),
|
|
}
|
|
|
|
expected_st_a = (
|
|
np.array([[0], [1]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0], dtype=np.float32), # values
|
|
np.array([2], dtype=np.int64)) # shape: max_values = 2
|
|
|
|
expected_sp = ( # indices, values, shape
|
|
np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype="|S"),
|
|
np.array([13], dtype=np.int64)) # max_values = 13
|
|
|
|
expected_rt_1d = constant_op.constant([3, 4], dtypes.float32)
|
|
|
|
expected_rt_2d = ragged_factory_ops.constant([[5], [], [6, 7]],
|
|
dtype=dtypes.float32)
|
|
|
|
expected_rt_2d_uniform = constant_op.constant([[5], [6], [7]],
|
|
dtype=dtypes.float32)
|
|
|
|
expected_rt_3d = ragged_factory_ops.constant([[[5], []], [[6, 7]]],
|
|
dtype=dtypes.float32)
|
|
|
|
expected_rt_3d_with_uniform = (
|
|
ragged_tensor.RaggedTensor.from_uniform_row_length(
|
|
expected_rt_2d, uniform_row_length=1))
|
|
|
|
expected_output = {
|
|
"st_a": expected_st_a,
|
|
"sp": expected_sp,
|
|
"a": [a_default],
|
|
"b": b_default,
|
|
"c": np.array([3, 4], dtype=np.float32),
|
|
"d": np.array([0.0, 1.0], dtype=np.float32),
|
|
"rt_1d": expected_rt_1d,
|
|
"rt_2d_with_splits": expected_rt_2d,
|
|
"rt_2d_with_lengths": expected_rt_2d,
|
|
"rt_2d_with_starts": expected_rt_2d,
|
|
"rt_2d_with_limits": expected_rt_2d,
|
|
"rt_2d_with_rowids": expected_rt_2d,
|
|
"rt_2d_with_uniform_row_length": expected_rt_2d_uniform,
|
|
"rt_3d": expected_rt_3d,
|
|
"rt_3d_with_uniform_row_length": expected_rt_3d_with_uniform,
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": ops.convert_to_tensor("in1"),
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"features": test_features,
|
|
}, expected_output)
|
|
|
|
|
|
@test_util.run_all_in_graph_and_eager_modes
|
|
class ParseSequenceExampleTest(test.TestCase):
|
|
|
|
def testCreateSequenceExample(self):
|
|
value = sequence_example(
|
|
context=features({
|
|
"global_feature": float_feature([1, 2, 3]),
|
|
}),
|
|
feature_lists=feature_lists({
|
|
"repeated_feature_2_frames":
|
|
feature_list([
|
|
bytes_feature([b"a", b"b", b"c"]),
|
|
bytes_feature([b"a", b"d", b"e"])
|
|
]),
|
|
"repeated_feature_3_frames":
|
|
feature_list([
|
|
int64_feature([3, 4, 5, 6, 7]),
|
|
int64_feature([-1, 0, 0, 0, 0]),
|
|
int64_feature([1, 2, 3, 4, 5])
|
|
])
|
|
}))
|
|
value.SerializeToString() # Smoke test
|
|
|
|
def _test(self,
|
|
kwargs,
|
|
expected_context_values=None,
|
|
expected_feat_list_values=None,
|
|
expected_length_values=None,
|
|
expected_err=None,
|
|
batch=False):
|
|
expected_context_values = expected_context_values or {}
|
|
expected_feat_list_values = expected_feat_list_values or {}
|
|
expected_length_values = expected_length_values or {}
|
|
|
|
if expected_err:
|
|
with self.assertRaisesWithPredicateMatch(expected_err[0],
|
|
expected_err[1]):
|
|
if batch:
|
|
self.evaluate(parsing_ops.parse_sequence_example(**kwargs))
|
|
else:
|
|
self.evaluate(parsing_ops.parse_single_sequence_example(**kwargs))
|
|
else:
|
|
if batch:
|
|
(context_out, feat_list_out,
|
|
lengths_out) = parsing_ops.parse_sequence_example(**kwargs)
|
|
else:
|
|
(context_out,
|
|
feat_list_out) = parsing_ops.parse_single_sequence_example(**kwargs)
|
|
lengths_out = {}
|
|
|
|
# Check values.
|
|
_compare_output_to_expected(self, context_out, expected_context_values)
|
|
_compare_output_to_expected(self, feat_list_out,
|
|
expected_feat_list_values)
|
|
_compare_output_to_expected(self, lengths_out, expected_length_values)
|
|
|
|
# Check shapes; if serialized is a Tensor we need its size to
|
|
# properly check.
|
|
if "context_features" in kwargs:
|
|
for k, f in kwargs["context_features"].items():
|
|
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
|
|
if batch:
|
|
self.assertEqual(tuple(context_out[k].shape.as_list()[1:]), f.shape)
|
|
else:
|
|
self.assertEqual(tuple(context_out[k].shape.as_list()), f.shape)
|
|
elif isinstance(f, parsing_ops.VarLenFeature) and batch:
|
|
if context.executing_eagerly():
|
|
context_out[k].indices.shape.assert_is_compatible_with([None, 2])
|
|
context_out[k].values.shape.assert_is_compatible_with([None])
|
|
context_out[k].dense_shape.shape.assert_is_compatible_with([2])
|
|
else:
|
|
self.assertEqual(context_out[k].indices.shape.as_list(), [None, 2])
|
|
self.assertEqual(context_out[k].values.shape.as_list(), [None])
|
|
self.assertEqual(context_out[k].dense_shape.shape.as_list(), [2])
|
|
elif isinstance(f, parsing_ops.VarLenFeature) and not batch:
|
|
if context.executing_eagerly():
|
|
context_out[k].indices.shape.assert_is_compatible_with([None, 1])
|
|
context_out[k].values.shape.assert_is_compatible_with([None])
|
|
context_out[k].dense_shape.shape.assert_is_compatible_with([1])
|
|
else:
|
|
self.assertEqual(context_out[k].indices.shape.as_list(), [None, 1])
|
|
self.assertEqual(context_out[k].values.shape.as_list(), [None])
|
|
self.assertEqual(context_out[k].dense_shape.shape.as_list(), [1])
|
|
|
|
def _testBoth(self,
|
|
kwargs,
|
|
expected_context_values=None,
|
|
expected_feat_list_values=None,
|
|
expected_err=None):
|
|
# Test using tf.io.parse_single_sequence_example
|
|
self._test(
|
|
kwargs,
|
|
expected_context_values=expected_context_values,
|
|
expected_feat_list_values=expected_feat_list_values,
|
|
expected_err=expected_err,
|
|
batch=False)
|
|
|
|
# Convert the input to a batch of size 1, and test using
|
|
# tf.parse_sequence_example.
|
|
|
|
# Some replacements are needed for the batch version.
|
|
kwargs["serialized"] = [kwargs.pop("serialized")]
|
|
kwargs["example_names"] = [kwargs.pop("example_name")
|
|
] if "example_name" in kwargs else None
|
|
|
|
# Add a batch dimension to expected output
|
|
if expected_context_values:
|
|
new_values = {}
|
|
for k in expected_context_values:
|
|
v = expected_context_values[k]
|
|
if isinstance(kwargs["context_features"][k],
|
|
(parsing_ops.FixedLenFeature, parsing_ops.RaggedFeature)):
|
|
new_values[k] = np.expand_dims(v, axis=0)
|
|
else:
|
|
# Sparse tensor.
|
|
new_values[k] = (np.insert(v[0], 0, 0,
|
|
axis=1), v[1], np.insert(v[2], 0, 1))
|
|
expected_context_values = new_values
|
|
|
|
expected_length_values = {}
|
|
if expected_feat_list_values:
|
|
new_values = {}
|
|
for k in expected_feat_list_values:
|
|
v = expected_feat_list_values[k]
|
|
if isinstance(kwargs["sequence_features"][k],
|
|
parsing_ops.FixedLenSequenceFeature):
|
|
expected_length_values[k] = [np.shape(v)[0]]
|
|
new_values[k] = np.expand_dims(v, axis=0)
|
|
elif isinstance(kwargs["sequence_features"][k],
|
|
parsing_ops.RaggedFeature):
|
|
new_values[k] = np.expand_dims(v, axis=0)
|
|
else:
|
|
# Sparse tensor.
|
|
new_values[k] = (np.insert(v[0], 0, 0,
|
|
axis=1), v[1], np.insert(v[2], 0, 1))
|
|
expected_feat_list_values = new_values
|
|
|
|
self._test(
|
|
kwargs,
|
|
expected_context_values=expected_context_values,
|
|
expected_feat_list_values=expected_feat_list_values,
|
|
expected_length_values=expected_length_values,
|
|
expected_err=expected_err,
|
|
batch=True)
|
|
|
|
def testSequenceExampleWithSparseAndDenseContext(self):
|
|
original = sequence_example(
|
|
context=features({
|
|
"c": float_feature([3, 4]),
|
|
"st_a": float_feature([3.0, 4.0])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
expected_st_a = (
|
|
np.array([[0], [1]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0], dtype=np.float32), # values
|
|
np.array([2], dtype=np.int64)) # shape: num_features = 2
|
|
|
|
a_default = [[1, 2, 3]]
|
|
b_default = np.random.rand(3, 3).astype(bytes)
|
|
expected_context_output = {
|
|
"st_a": expected_st_a,
|
|
"a": a_default,
|
|
"b": b_default,
|
|
"c": np.array([3, 4], dtype=np.float32),
|
|
}
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"context_features": {
|
|
"st_a":
|
|
parsing_ops.VarLenFeature(dtypes.float32),
|
|
"a":
|
|
parsing_ops.FixedLenFeature(
|
|
(1, 3), dtypes.int64, default_value=a_default),
|
|
"b":
|
|
parsing_ops.FixedLenFeature(
|
|
(3, 3), dtypes.string, default_value=b_default),
|
|
# Feature "c" must be provided, since it has no default_value.
|
|
"c":
|
|
parsing_ops.FixedLenFeature((2,), dtypes.float32),
|
|
}
|
|
},
|
|
expected_context_values=expected_context_output)
|
|
|
|
def testSequenceExampleWithMultipleSizeFeatureLists(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([
|
|
int64_feature([-1, 0, 1]),
|
|
int64_feature([2, 3, 4]),
|
|
int64_feature([5, 6, 7]),
|
|
int64_feature([8, 9, 10]),
|
|
]),
|
|
"b":
|
|
feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
|
|
"c":
|
|
feature_list([float_feature([3, 4]),
|
|
float_feature([-1, 2])]),
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
expected_feature_list_output = {
|
|
"a":
|
|
np.array(
|
|
[ # outer dimension is time.
|
|
[[-1, 0, 1]], # inside are 1x3 matrices
|
|
[[2, 3, 4]],
|
|
[[5, 6, 7]],
|
|
[[8, 9, 10]]
|
|
],
|
|
dtype=np.int64),
|
|
"b":
|
|
np.array(
|
|
[ # outer dimension is time, inside are 2x2 matrices
|
|
[[b"r00", b"r01"], [b"r10", b"r11"]]
|
|
],
|
|
dtype=bytes),
|
|
"c":
|
|
np.array(
|
|
[ # outer dimension is time, inside are 2-vectors
|
|
[3, 4], [-1, 2]
|
|
],
|
|
dtype=np.float32),
|
|
"d":
|
|
np.empty(shape=(0, 5), dtype=np.float32), # empty_allowed_missing
|
|
}
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a":
|
|
parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
|
|
"b":
|
|
parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string),
|
|
"c":
|
|
parsing_ops.FixedLenSequenceFeature(2, dtypes.float32),
|
|
"d":
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(5,), dtypes.float32, allow_missing=True),
|
|
}
|
|
},
|
|
expected_feat_list_values=expected_feature_list_output)
|
|
|
|
def testSequenceExampleWithoutDebugName(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([int64_feature([3, 4]),
|
|
int64_feature([1, 0])]),
|
|
"st_a":
|
|
feature_list([
|
|
float_feature([3.0, 4.0]),
|
|
float_feature([5.0]),
|
|
float_feature([])
|
|
]),
|
|
"st_b":
|
|
feature_list([
|
|
bytes_feature([b"a"]),
|
|
bytes_feature([]),
|
|
bytes_feature([]),
|
|
bytes_feature([b"b", b"c"])
|
|
])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
expected_st_a = (
|
|
np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
|
|
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
|
|
|
|
expected_st_b = (
|
|
np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
|
|
np.array(["a", "b", "c"], dtype="|S"), # values
|
|
np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
|
|
|
|
expected_st_c = (
|
|
np.empty((0, 2), dtype=np.int64), # indices
|
|
np.empty((0,), dtype=np.int64), # values
|
|
np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
|
|
|
|
expected_feature_list_output = {
|
|
"a": np.array([[3, 4], [1, 0]], dtype=np.int64),
|
|
"st_a": expected_st_a,
|
|
"st_b": expected_st_b,
|
|
"st_c": expected_st_c,
|
|
}
|
|
|
|
self._testBoth(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
|
|
"st_b": parsing_ops.VarLenFeature(dtypes.string),
|
|
"st_c": parsing_ops.VarLenFeature(dtypes.int64),
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
|
|
}
|
|
},
|
|
expected_feat_list_values=expected_feature_list_output)
|
|
|
|
def testSequenceExampleWithSparseAndDenseFeatureLists(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([int64_feature([3, 4]),
|
|
int64_feature([1, 0])]),
|
|
"st_a":
|
|
feature_list([
|
|
float_feature([3.0, 4.0]),
|
|
float_feature([5.0]),
|
|
float_feature([])
|
|
]),
|
|
"st_b":
|
|
feature_list([
|
|
bytes_feature([b"a"]),
|
|
bytes_feature([]),
|
|
bytes_feature([]),
|
|
bytes_feature([b"b", b"c"])
|
|
])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
expected_st_a = (
|
|
np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
|
|
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
|
|
|
|
expected_st_b = (
|
|
np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
|
|
np.array(["a", "b", "c"], dtype="|S"), # values
|
|
np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
|
|
|
|
expected_st_c = (
|
|
np.empty((0, 2), dtype=np.int64), # indices
|
|
np.empty((0,), dtype=np.int64), # values
|
|
np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
|
|
|
|
expected_feature_list_output = {
|
|
"a": np.array([[3, 4], [1, 0]], dtype=np.int64),
|
|
"st_a": expected_st_a,
|
|
"st_b": expected_st_b,
|
|
"st_c": expected_st_c,
|
|
}
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
|
|
"st_b": parsing_ops.VarLenFeature(dtypes.string),
|
|
"st_c": parsing_ops.VarLenFeature(dtypes.int64),
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
|
|
}
|
|
},
|
|
expected_feat_list_values=expected_feature_list_output)
|
|
|
|
def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"st_a":
|
|
feature_list([
|
|
float_feature([3.0, 4.0]),
|
|
feature(),
|
|
float_feature([5.0]),
|
|
]),
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
expected_st_a = (
|
|
np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices
|
|
np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
|
|
np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
|
|
|
|
expected_feature_list_output = {
|
|
"st_a": expected_st_a,
|
|
}
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"st_a": parsing_ops.VarLenFeature(dtypes.float32),
|
|
}
|
|
},
|
|
expected_feat_list_values=expected_feature_list_output)
|
|
|
|
def testSequenceExampleListWithInconsistentDataFails(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a": feature_list([int64_feature([-1, 0]),
|
|
float_feature([2, 3])])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(errors_impl.OpError, "Feature list: a, Index: 1."
|
|
" Data types don't match. Expected type: int64"))
|
|
|
|
def testSequenceExampleListWithWrongDataTypeFails(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists(
|
|
{"a": feature_list([float_feature([2, 3])])}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(errors_impl.OpError,
|
|
"Feature list: a, Index: 0. Data types don't match."
|
|
" Expected type: int64"))
|
|
|
|
def testSequenceExampleListWithWrongSparseDataTypeFails(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([
|
|
int64_feature([3, 4]),
|
|
int64_feature([1, 2]),
|
|
float_feature([2.0, 3.0])
|
|
])
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(errors_impl.OpError,
|
|
"Name: in1, Feature list: a, Index: 2."
|
|
" Data types don't match. Expected type: int64"))
|
|
|
|
def testSequenceExampleListWithWrongShapeFails(self):
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([int64_feature([2, 3]),
|
|
int64_feature([2, 3, 4])]),
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(
|
|
errors_impl.OpError,
|
|
# message from ParseSingleExample.
|
|
r"Name: in1, Key: a, Index: 1."
|
|
r" Number of int64 values != expected."
|
|
r" values size: 3 but output shape: \[2\]"
|
|
# or message from FastParseSequenceExample
|
|
r"|Feature list 'a' has an unexpected number of values. "
|
|
r"Total values size: 5 is not consistent with output "
|
|
r"shape: \[\?,2\]"))
|
|
|
|
def testSequenceExampleListWithWrongShapeFails2(self):
|
|
# This exercises a different code path for FastParseSequenceExample than
|
|
# testSequenceExampleListWithWrongShapeFails (in that test, we can tell that
|
|
# the shape is bad based on the total number of values; in this test, we
|
|
# can't tell the shape is bad until we look at individual rows.)
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a": feature_list([int64_feature([2]),
|
|
int64_feature([2, 3, 4])]),
|
|
}))
|
|
|
|
serialized = original.SerializeToString()
|
|
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(errors_impl.OpError, r"Name: in1, Key: a, Index: 0."
|
|
r" Number of (int64 )?values != expected."
|
|
r" values size: 1 but output shape: \[2\]"))
|
|
|
|
def testSequenceExampleWithMissingFeatureListFails(self):
|
|
original = sequence_example(feature_lists=feature_lists({}))
|
|
|
|
# Test fails because we didn't add:
|
|
# feature_list_dense_defaults = {"a": None}
|
|
self._testBoth(
|
|
{
|
|
"example_name": "in1",
|
|
"serialized": ops.convert_to_tensor(original.SerializeToString()),
|
|
"sequence_features": {
|
|
"a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
|
|
}
|
|
},
|
|
expected_err=(
|
|
errors_impl.OpError,
|
|
"Name: in1, Feature list 'a' is required but could not be found."
|
|
" Did you mean to include it in"
|
|
" feature_list_dense_missing_assumed_empty or"
|
|
" feature_list_dense_defaults?"))
|
|
|
|
def testSequenceExampleBatch(self):
|
|
first = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"a":
|
|
feature_list([
|
|
int64_feature([-1, 0, 1]),
|
|
int64_feature([2, 3, 4]),
|
|
int64_feature([5, 6, 7]),
|
|
int64_feature([8, 9, 10]),
|
|
])
|
|
}))
|
|
second = sequence_example(
|
|
context=features({"c": float_feature([7])}),
|
|
feature_lists=feature_lists({
|
|
"a": feature_list([
|
|
int64_feature([21, 2, 11]),
|
|
]),
|
|
"b": feature_list([
|
|
int64_feature([5]),
|
|
]),
|
|
}))
|
|
|
|
serialized = [first.SerializeToString(), second.SerializeToString()]
|
|
|
|
expected_context_output = {
|
|
"c": np.array([-1, 7], dtype=np.float32),
|
|
}
|
|
expected_feature_list_output = {
|
|
"a":
|
|
np.array(
|
|
[ # outermost dimension is example id
|
|
[ # middle dimension is time.
|
|
[[-1, 0, 1]], # inside are 1x3 matrices
|
|
[[2, 3, 4]],
|
|
[[5, 6, 7]],
|
|
[[8, 9, 10]]
|
|
],
|
|
[ # middle dimension is time.
|
|
[[21, 2, 11]], # inside are 1x3 matrices
|
|
[[0, 0, 0]], # additional entries are padded with 0
|
|
[[0, 0, 0]],
|
|
[[0, 0, 0]]
|
|
]
|
|
],
|
|
dtype=np.int64),
|
|
"b":
|
|
np.array([[0], [5]], dtype=np.int64),
|
|
"d":
|
|
np.empty(shape=(2, 0, 5), dtype=np.float32), # allowed_missing
|
|
}
|
|
|
|
self._test(
|
|
{
|
|
"example_names": ops.convert_to_tensor(["in1", "in2"]),
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"context_features": {
|
|
"c":
|
|
parsing_ops.FixedLenFeature(
|
|
(), dtypes.float32, default_value=-1),
|
|
},
|
|
"sequence_features": {
|
|
"a":
|
|
parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
|
|
"b":
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(), dtypes.int64, allow_missing=True),
|
|
"d":
|
|
parsing_ops.FixedLenSequenceFeature(
|
|
(5,), dtypes.float32, allow_missing=True),
|
|
}
|
|
},
|
|
expected_context_values=expected_context_output,
|
|
expected_feat_list_values=expected_feature_list_output,
|
|
expected_length_values={
|
|
"a": [4, 1],
|
|
"b": [0, 1],
|
|
"d": [0, 0]
|
|
},
|
|
batch=True)
|
|
|
|
def testSerializedContainingRaggedFeatureWithNoPartitions(self):
|
|
original = [
|
|
sequence_example(
|
|
context=features({"a": float_feature([3, 4])}),
|
|
feature_lists=feature_lists({
|
|
"b": feature_list([float_feature([5]),
|
|
float_feature([3])]),
|
|
"c": feature_list([int64_feature([6, 7, 8, 9])])
|
|
})),
|
|
sequence_example(
|
|
context=features({"a": float_feature([9])}),
|
|
feature_lists=feature_lists({
|
|
"b": feature_list([]),
|
|
"c": feature_list([int64_feature([]),
|
|
int64_feature([1, 2, 3])])
|
|
})),
|
|
sequence_example(
|
|
feature_lists=feature_lists({
|
|
"b":
|
|
feature_list([
|
|
float_feature([1]),
|
|
float_feature([1, 2]),
|
|
float_feature([1, 2, 3])
|
|
])
|
|
})),
|
|
sequence_example(
|
|
context=features({"a": feature()}),
|
|
feature_lists=feature_lists({
|
|
"b": feature_list([feature()]),
|
|
"c": feature_list([int64_feature([3, 3, 3])])
|
|
}))
|
|
]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
context_features = {"a": parsing_ops.RaggedFeature(dtype=dtypes.float32)}
|
|
sequence_features = {
|
|
"b":
|
|
parsing_ops.RaggedFeature(dtype=dtypes.float32),
|
|
"c":
|
|
parsing_ops.RaggedFeature(
|
|
dtype=dtypes.int64, row_splits_dtype=dtypes.int64)
|
|
}
|
|
|
|
expected_a = ragged_factory_ops.constant([[3, 4], [9], [], []],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
expected_b = ragged_factory_ops.constant(
|
|
[[[5], [3]], [], [[1], [1, 2], [1, 2, 3]], [[]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32)
|
|
expected_c = ragged_factory_ops.constant(
|
|
[[[6, 7, 8, 9]], [[], [1, 2, 3]], [], [[3, 3, 3]]],
|
|
dtype=dtypes.int64,
|
|
row_splits_dtype=dtypes.int64)
|
|
|
|
expected_context_output = dict(a=expected_a)
|
|
expected_feature_list_output = dict(b=expected_b, c=expected_c)
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized),
|
|
"context_features": context_features,
|
|
"sequence_features": sequence_features,
|
|
},
|
|
expected_context_output,
|
|
expected_feature_list_output,
|
|
batch=True)
|
|
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(serialized)[0],
|
|
"context_features": context_features,
|
|
"sequence_features": sequence_features,
|
|
},
|
|
expected_context_values={"a": [3, 4]},
|
|
expected_feat_list_values={
|
|
"b": [[5], [3]],
|
|
"c": [[6, 7, 8, 9]]
|
|
},
|
|
batch=False)
|
|
|
|
# Test with a larger batch of examples.
|
|
batch_serialized = serialized * 64
|
|
batch_context_expected_out = {
|
|
"a": ragged_concat_ops.concat([expected_a] * 64, axis=0)
|
|
}
|
|
batch_feature_list_expected_out = {
|
|
"b": ragged_concat_ops.concat([expected_b] * 64, axis=0),
|
|
"c": ragged_concat_ops.concat([expected_c] * 64, axis=0)
|
|
}
|
|
self._test(
|
|
{
|
|
"serialized": ops.convert_to_tensor(batch_serialized),
|
|
"context_features": context_features,
|
|
"sequence_features": sequence_features,
|
|
},
|
|
batch_context_expected_out,
|
|
batch_feature_list_expected_out,
|
|
batch=True)
|
|
|
|
def testSerializedContainingNestedRaggedFeature(self):
|
|
"""Test RaggedFeatures with nested partitions."""
|
|
original = [
|
|
# rt shape: [(batch), 2, None, None]
|
|
sequence_example(
|
|
context=features({
|
|
# a[0] = [[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]]
|
|
"a_values": float_feature([1, 2, 3, 4, 5, 6, 7]),
|
|
"a_lengths_axis2": int64_feature([1, 2, 0, 1]),
|
|
"a_lengths_axis3": int64_feature([1, 2, 1, 3]),
|
|
"a_splits_axis3": int64_feature([0, 1, 3, 4, 7])
|
|
}),
|
|
feature_lists=feature_lists({
|
|
# b[0] = [[[1], [2, 3, 4]], [[2, 4], [6]]]
|
|
"b_values":
|
|
feature_list(
|
|
[float_feature([1, 2, 3, 4]),
|
|
float_feature([2, 4, 6])]),
|
|
"b_splits":
|
|
feature_list(
|
|
[int64_feature([0, 1, 4]),
|
|
int64_feature([0, 2, 3])]),
|
|
})),
|
|
sequence_example(
|
|
# a[1] = []
|
|
# b[1] = []
|
|
),
|
|
sequence_example(
|
|
context=features({
|
|
# a[2] = [[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]
|
|
"a_values": float_feature([1, 2, 3, 4, 5, 6, 7, 8]),
|
|
"a_lengths_axis2": int64_feature([2, 3]),
|
|
"a_lengths_axis3": int64_feature([3, 1, 1, 1, 2]),
|
|
"a_splits_axis3": int64_feature([0, 3, 4, 5, 6, 8])
|
|
}),
|
|
feature_lists=feature_lists({
|
|
# b[2] = [[[9], [8, 7, 6], [5]], [[4, 3, 2, 1]], [[0]]]
|
|
"b_values":
|
|
feature_list([
|
|
float_feature([9, 8, 7, 6, 5]),
|
|
float_feature([4, 3, 2, 1]),
|
|
float_feature([0])
|
|
]),
|
|
"b_splits":
|
|
feature_list([
|
|
int64_feature([0, 1, 4, 5]),
|
|
int64_feature([0, 4]),
|
|
int64_feature([0, 1])
|
|
])
|
|
}))
|
|
]
|
|
serialized = [m.SerializeToString() for m in original]
|
|
|
|
context_features = {
|
|
"a":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="a_values",
|
|
partitions=[
|
|
parsing_ops.RaggedFeature.UniformRowLength(2),
|
|
parsing_ops.RaggedFeature.RowLengths("a_lengths_axis2"),
|
|
parsing_ops.RaggedFeature.RowSplits("a_splits_axis3"),
|
|
],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int64,
|
|
)
|
|
}
|
|
sequence_features = {
|
|
"b":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="b_values",
|
|
dtype=dtypes.float32,
|
|
partitions=[parsing_ops.RaggedFeature.RowSplits("b_splits")]),
|
|
"c":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="b_values",
|
|
dtype=dtypes.float32,
|
|
partitions=[parsing_ops.RaggedFeature.UniformRowLength(1)]),
|
|
}
|
|
|
|
expected_context = {
|
|
"a":
|
|
ragged_factory_ops.constant(
|
|
[[[[[1]], [[2, 3], [4]]], [[], [[5, 6, 7]]]], [],
|
|
[[[[1, 2, 3], [4]], [[5], [6], [7, 8]]]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int64)
|
|
}
|
|
expected_feature_list = {
|
|
"b":
|
|
ragged_factory_ops.constant(
|
|
[[[[1], [2, 3, 4]], [[2, 4], [6]]], [],
|
|
[[[9], [8, 7, 6], [5]], [[4, 3, 2, 1]], [[0]]]],
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32),
|
|
"c":
|
|
ragged_factory_ops.constant(
|
|
[[[[1], [2], [3], [4]], [[2], [4], [6]]], [],
|
|
[[[9], [8], [7], [6], [5]], [[4], [3], [2], [1]], [[0]]]],
|
|
ragged_rank=2,
|
|
dtype=dtypes.float32,
|
|
row_splits_dtype=dtypes.int32),
|
|
}
|
|
|
|
self._test(
|
|
dict(
|
|
serialized=ops.convert_to_tensor(serialized),
|
|
context_features=context_features,
|
|
sequence_features=sequence_features),
|
|
expected_context,
|
|
expected_feature_list,
|
|
batch=True)
|
|
|
|
self._test(
|
|
dict(
|
|
serialized=ops.convert_to_tensor(serialized)[0],
|
|
context_features=context_features,
|
|
sequence_features=sequence_features),
|
|
{"a": expected_context["a"][0]}, {
|
|
"b": expected_feature_list["b"][0],
|
|
"c": expected_feature_list["c"][0]
|
|
},
|
|
batch=False)
|
|
|
|
def testSerializedContainingMisalignedNestedRaggedFeature(self):
|
|
"""FeatureList with 2 value tensors but only one splits tensor."""
|
|
original = sequence_example(
|
|
feature_lists=feature_lists({
|
|
"b_values":
|
|
feature_list(
|
|
[float_feature([1, 2, 3, 4]),
|
|
float_feature([2, 4, 6])]),
|
|
"b_splits":
|
|
feature_list([int64_feature([0, 1, 4])]),
|
|
}))
|
|
sequence_features = {
|
|
"b":
|
|
parsing_ops.RaggedFeature(
|
|
value_key="b_values",
|
|
dtype=dtypes.float32,
|
|
partitions=[parsing_ops.RaggedFeature.RowSplits("b_splits")],
|
|
validate=True)
|
|
}
|
|
self._testBoth(
|
|
dict(
|
|
serialized=ops.convert_to_tensor(original.SerializeToString()),
|
|
sequence_features=sequence_features),
|
|
expected_err=(
|
|
(errors_impl.InvalidArgumentError, ValueError),
|
|
# Message for batch=true:
|
|
"Feature b: values and partitions are not aligned"
|
|
# Message for batch=false in graph mode:
|
|
"|.* do not form a valid RaggedTensor"
|
|
# Message for batch=false in eager mode:
|
|
"|Incompatible shapes"))
|
|
|
|
|
|
@test_util.run_all_in_graph_and_eager_modes
|
|
class DecodeRawTest(test.TestCase):
|
|
|
|
def _decode_v1(self, words):
|
|
with self.cached_session():
|
|
examples = np.array(words)
|
|
example_tensor = constant_op.constant(
|
|
examples, shape=examples.shape, dtype=dtypes.string)
|
|
byte_tensor = parsing_ops.decode_raw_v1(example_tensor, dtypes.uint8)
|
|
return self.evaluate(byte_tensor)
|
|
|
|
def _decode_v2(self, words, fixed_length=None):
|
|
with self.cached_session():
|
|
examples = np.array(words)
|
|
byte_tensor = parsing_ops.decode_raw(
|
|
examples, dtypes.uint8, fixed_length=fixed_length)
|
|
return self.evaluate(byte_tensor)
|
|
|
|
def _ordinalize(self, words, fixed_length=None):
|
|
outputs = []
|
|
if fixed_length is None:
|
|
fixed_length = len(words[0])
|
|
|
|
for word in words:
|
|
output = []
|
|
for i in range(fixed_length):
|
|
if i < len(word):
|
|
output.append(ord(word[i]))
|
|
else:
|
|
output.append(0)
|
|
outputs.append(output)
|
|
return np.array(outputs)
|
|
|
|
def testDecodeRawV1EqualLength(self):
|
|
words = ["string1", "string2"]
|
|
|
|
observed = self._decode_v1(words)
|
|
expected = self._ordinalize(words)
|
|
|
|
self.assertAllEqual(expected.shape, observed.shape)
|
|
self.assertAllEqual(expected, observed)
|
|
|
|
def testDecodeRawV2FallbackEqualLength(self):
|
|
words = ["string1", "string2"]
|
|
|
|
observed = self._decode_v2(words)
|
|
expected = self._ordinalize(words)
|
|
|
|
self.assertAllEqual(expected.shape, observed.shape)
|
|
self.assertAllEqual(expected, observed)
|
|
|
|
def testDecodeRawV1VariableLength(self):
|
|
words = ["string", "longer_string"]
|
|
with self.assertRaises(errors_impl.InvalidArgumentError):
|
|
self._decode_v1(words)
|
|
|
|
def testDecodeRawV2FallbackVariableLength(self):
|
|
words = ["string", "longer_string"]
|
|
with self.assertRaises(errors_impl.InvalidArgumentError):
|
|
self._decode_v2(words)
|
|
|
|
def testDecodeRawV2VariableLength(self):
|
|
words = ["string", "longer_string"]
|
|
|
|
observed = self._decode_v2(words, fixed_length=8)
|
|
expected = self._ordinalize(words, fixed_length=8)
|
|
|
|
self.assertAllEqual(expected.shape, observed.shape)
|
|
self.assertAllEqual(expected, observed)
|
|
|
|
|
|
@test_util.run_all_in_graph_and_eager_modes
|
|
class DecodeJSONExampleTest(test.TestCase):
|
|
|
|
def _testRoundTrip(self, examples):
|
|
examples = np.array(examples, dtype=np.object)
|
|
|
|
json_tensor = constant_op.constant(
|
|
[json_format.MessageToJson(m) for m in examples.flatten()],
|
|
shape=examples.shape,
|
|
dtype=dtypes.string)
|
|
binary_tensor = parsing_ops.decode_json_example(json_tensor)
|
|
binary_val = self.evaluate(binary_tensor)
|
|
|
|
if examples.shape:
|
|
self.assertShapeEqual(binary_val, json_tensor)
|
|
for input_example, output_binary in zip(
|
|
np.array(examples).flatten(), binary_val.flatten()):
|
|
output_example = example_pb2.Example()
|
|
output_example.ParseFromString(output_binary)
|
|
self.assertProtoEquals(input_example, output_example)
|
|
else:
|
|
output_example = example_pb2.Example()
|
|
output_example.ParseFromString(binary_val)
|
|
self.assertProtoEquals(examples.item(), output_example)
|
|
|
|
def testEmptyTensor(self):
|
|
self._testRoundTrip([])
|
|
self._testRoundTrip([[], [], []])
|
|
|
|
def testEmptyExamples(self):
|
|
self._testRoundTrip([example(), example(), example()])
|
|
|
|
def testDenseFeaturesScalar(self):
|
|
self._testRoundTrip(
|
|
example(features=features({"a": float_feature([1, 1, 3])})))
|
|
|
|
def testDenseFeaturesVector(self):
|
|
self._testRoundTrip([
|
|
example(features=features({"a": float_feature([1, 1, 3])})),
|
|
example(features=features({"a": float_feature([-1, -1, 2])})),
|
|
])
|
|
|
|
def testDenseFeaturesMatrix(self):
|
|
self._testRoundTrip([
|
|
[example(features=features({"a": float_feature([1, 1, 3])}))],
|
|
[example(features=features({"a": float_feature([-1, -1, 2])}))],
|
|
])
|
|
|
|
def testSparseFeatures(self):
|
|
self._testRoundTrip([
|
|
example(features=features({"st_c": float_feature([3, 4])})),
|
|
example(features=features({"st_c": float_feature([])})),
|
|
example(features=features({"st_d": feature()})),
|
|
example(
|
|
features=features({
|
|
"st_c": float_feature([1, 2, -1]),
|
|
"st_d": bytes_feature([b"hi"])
|
|
})),
|
|
])
|
|
|
|
def testSerializedContainingBytes(self):
|
|
aname = "a"
|
|
bname = "b*has+a:tricky_name"
|
|
self._testRoundTrip([
|
|
example(
|
|
features=features({
|
|
aname: float_feature([1, 1]),
|
|
bname: bytes_feature([b"b0_str"])
|
|
})),
|
|
example(
|
|
features=features({
|
|
aname: float_feature([-1, -1]),
|
|
bname: bytes_feature([b"b1"])
|
|
})),
|
|
])
|
|
|
|
def testInvalidSyntax(self):
|
|
json_tensor = constant_op.constant(["{]"])
|
|
if context.executing_eagerly():
|
|
with self.assertRaisesRegexp(errors.InvalidArgumentError,
|
|
"Error while parsing JSON"):
|
|
parsing_ops.decode_json_example(json_tensor)
|
|
else:
|
|
binary_tensor = parsing_ops.decode_json_example(json_tensor)
|
|
with self.assertRaisesOpError("Error while parsing JSON"):
|
|
self.evaluate(binary_tensor)
|
|
|
|
|
|
class ParseTensorOpTest(test.TestCase):
|
|
|
|
@test_util.run_deprecated_v1
|
|
def testToFloat32(self):
|
|
with self.cached_session():
|
|
expected = np.random.rand(3, 4, 5).astype(np.float32)
|
|
tensor_proto = tensor_util.make_tensor_proto(expected)
|
|
|
|
serialized = array_ops.placeholder(dtypes.string)
|
|
tensor = parsing_ops.parse_tensor(serialized, dtypes.float32)
|
|
|
|
result = tensor.eval(
|
|
feed_dict={serialized: tensor_proto.SerializeToString()})
|
|
|
|
self.assertAllEqual(expected, result)
|
|
|
|
@test_util.run_deprecated_v1
|
|
def testToUint8(self):
|
|
with self.cached_session():
|
|
expected = np.random.rand(3, 4, 5).astype(np.uint8)
|
|
tensor_proto = tensor_util.make_tensor_proto(expected)
|
|
|
|
serialized = array_ops.placeholder(dtypes.string)
|
|
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint8)
|
|
|
|
result = tensor.eval(
|
|
feed_dict={serialized: tensor_proto.SerializeToString()})
|
|
|
|
self.assertAllEqual(expected, result)
|
|
|
|
@test_util.run_deprecated_v1
|
|
def testTypeMismatch(self):
|
|
with self.cached_session():
|
|
expected = np.random.rand(3, 4, 5).astype(np.uint8)
|
|
tensor_proto = tensor_util.make_tensor_proto(expected)
|
|
|
|
serialized = array_ops.placeholder(dtypes.string)
|
|
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
|
|
|
|
with self.assertRaisesOpError(
|
|
r"Type mismatch between parsed tensor \(uint8\) and dtype "
|
|
r"\(uint16\)"):
|
|
tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
|
|
|
|
@test_util.run_deprecated_v1
|
|
def testInvalidInput(self):
|
|
with self.cached_session():
|
|
serialized = array_ops.placeholder(dtypes.string)
|
|
tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
|
|
|
|
with self.assertRaisesOpError(
|
|
"Could not parse `serialized` as TensorProto: 'bogus'"):
|
|
tensor.eval(feed_dict={serialized: "bogus"})
|
|
|
|
with self.assertRaisesOpError(
|
|
r"Expected `serialized` to be a scalar, got shape: \[1\]"):
|
|
tensor.eval(feed_dict={serialized: ["bogus"]})
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test.main()
|