Move tf.keras.experimental.SequenceFeatures to keras package.
PiperOrigin-RevId: 309421612 Change-Id: I55e17386071dad91cce2e2500f30fc9e3c3cf657
This commit is contained in:
parent
a952fa1b1c
commit
c53eca0d6a
tensorflow
@ -13,6 +13,7 @@ py_library(
|
|||||||
":feature_column",
|
":feature_column",
|
||||||
":feature_column_v2",
|
":feature_column_v2",
|
||||||
"//tensorflow/python:util",
|
"//tensorflow/python:util",
|
||||||
|
"//tensorflow/python/keras/feature_column",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -27,4 +27,5 @@ from tensorflow.python.feature_column.feature_column import *
|
|||||||
from tensorflow.python.feature_column.feature_column_v2 import *
|
from tensorflow.python.feature_column.feature_column_v2 import *
|
||||||
from tensorflow.python.feature_column.sequence_feature_column import *
|
from tensorflow.python.feature_column.sequence_feature_column import *
|
||||||
from tensorflow.python.feature_column.serialization import *
|
from tensorflow.python.feature_column.serialization import *
|
||||||
|
from tensorflow.python.keras.feature_column.sequence_feature_column import *
|
||||||
# pylint: enable=unused-import,line-too-long
|
# pylint: enable=unused-import,line-too-long
|
||||||
|
@ -316,14 +316,6 @@ class FeatureColumnsIntegrationTest(keras_parameterized.TestCase):
|
|||||||
self.assertIsInstance(revived, fc.DenseFeatures)
|
self.assertIsInstance(revived, fc.DenseFeatures)
|
||||||
self.assertNotIsInstance(revived, dense_features_v2.DenseFeatures)
|
self.assertNotIsInstance(revived, dense_features_v2.DenseFeatures)
|
||||||
|
|
||||||
def test_serialization_sequence_features(self):
|
|
||||||
rating = fc.sequence_numeric_column('rating')
|
|
||||||
sequence_feature = fc.SequenceFeatures([rating])
|
|
||||||
config = keras.layers.serialize(sequence_feature)
|
|
||||||
|
|
||||||
revived = keras.layers.deserialize(config)
|
|
||||||
self.assertIsInstance(revived, fc.SequenceFeatures)
|
|
||||||
|
|
||||||
# This test is an example for a regression on categorical inputs, i.e.,
|
# This test is an example for a regression on categorical inputs, i.e.,
|
||||||
# the output is 0.4, 0.6, 0.9 when input is 'alpha', 'beta', 'gamma'
|
# the output is 0.4, 0.6, 0.9 when input is 'alpha', 'beta', 'gamma'
|
||||||
# separately.
|
# separately.
|
||||||
|
@ -30,156 +30,14 @@ from tensorflow.python.feature_column import utils as fc_utils
|
|||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
from tensorflow.python.framework import tensor_shape
|
from tensorflow.python.framework import tensor_shape
|
||||||
from tensorflow.python.keras import backend
|
|
||||||
from tensorflow.python.keras.layers import serialization as layer_serialization
|
|
||||||
from tensorflow.python.ops import array_ops
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import check_ops
|
from tensorflow.python.ops import check_ops
|
||||||
from tensorflow.python.ops import parsing_ops
|
from tensorflow.python.ops import parsing_ops
|
||||||
from tensorflow.python.ops import sparse_ops
|
from tensorflow.python.ops import sparse_ops
|
||||||
from tensorflow.python.util.tf_export import keras_export
|
|
||||||
from tensorflow.python.util.tf_export import tf_export
|
from tensorflow.python.util.tf_export import tf_export
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
|
|
||||||
|
|
||||||
@keras_export('keras.experimental.SequenceFeatures')
|
|
||||||
class SequenceFeatures(fc._BaseFeaturesLayer):
|
|
||||||
"""A layer for sequence input.
|
|
||||||
|
|
||||||
All `feature_columns` must be sequence dense columns with the same
|
|
||||||
`sequence_length`. The output of this method can be fed into sequence
|
|
||||||
networks, such as RNN.
|
|
||||||
|
|
||||||
The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
|
|
||||||
`T` is the maximum sequence length for this batch, which could differ from
|
|
||||||
batch to batch.
|
|
||||||
|
|
||||||
If multiple `feature_columns` are given with `Di` `num_elements` each, their
|
|
||||||
outputs are concatenated. So, the final `Tensor` has shape
|
|
||||||
`[batch_size, T, D0 + D1 + ... + Dn]`.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Behavior of some cells or feature columns may depend on whether we are in
|
|
||||||
# training or inference mode, e.g. applying dropout.
|
|
||||||
training = True
|
|
||||||
rating = sequence_numeric_column('rating')
|
|
||||||
watches = sequence_categorical_column_with_identity(
|
|
||||||
'watches', num_buckets=1000)
|
|
||||||
watches_embedding = embedding_column(watches, dimension=10)
|
|
||||||
columns = [rating, watches_embedding]
|
|
||||||
|
|
||||||
sequence_input_layer = SequenceFeatures(columns)
|
|
||||||
features = tf.io.parse_example(...,
|
|
||||||
features=make_parse_example_spec(columns))
|
|
||||||
sequence_input, sequence_length = sequence_input_layer(
|
|
||||||
features, training=training)
|
|
||||||
sequence_length_mask = tf.sequence_mask(sequence_length)
|
|
||||||
|
|
||||||
rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size, training=training)
|
|
||||||
rnn_layer = tf.keras.layers.RNN(rnn_cell, training=training)
|
|
||||||
outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
feature_columns,
|
|
||||||
trainable=True,
|
|
||||||
name=None,
|
|
||||||
**kwargs):
|
|
||||||
""""Constructs a SequenceFeatures layer.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
feature_columns: An iterable of dense sequence columns. Valid columns are
|
|
||||||
- `embedding_column` that wraps a `sequence_categorical_column_with_*`
|
|
||||||
- `sequence_numeric_column`.
|
|
||||||
trainable: Boolean, whether the layer's variables will be updated via
|
|
||||||
gradient descent during training.
|
|
||||||
name: Name to give to the SequenceFeatures.
|
|
||||||
**kwargs: Keyword arguments to construct a layer.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If any of the `feature_columns` is not a
|
|
||||||
`SequenceDenseColumn`.
|
|
||||||
"""
|
|
||||||
super(SequenceFeatures, self).__init__(
|
|
||||||
feature_columns=feature_columns,
|
|
||||||
trainable=trainable,
|
|
||||||
name=name,
|
|
||||||
expected_column_type=fc.SequenceDenseColumn,
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _is_feature_layer(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _target_shape(self, input_shape, total_elements):
|
|
||||||
return (input_shape[0], input_shape[1], total_elements)
|
|
||||||
|
|
||||||
def call(self, features, training=None):
|
|
||||||
"""Returns sequence input corresponding to the `feature_columns`.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
features: A dict mapping keys to tensors.
|
|
||||||
training: Python boolean or None, indicating whether to the layer is being
|
|
||||||
run in training mode. This argument is passed to the call method of any
|
|
||||||
`FeatureColumn` that takes a `training` argument. For example, if a
|
|
||||||
`FeatureColumn` performed dropout, the column could expose a `training`
|
|
||||||
argument to control whether the dropout should be applied. If `None`,
|
|
||||||
defaults to `tf.keras.backend.learning_phase()`.
|
|
||||||
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An `(input_layer, sequence_length)` tuple where:
|
|
||||||
- input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
|
|
||||||
`T` is the maximum sequence length for this batch, which could differ
|
|
||||||
from batch to batch. `D` is the sum of `num_elements` for all
|
|
||||||
`feature_columns`.
|
|
||||||
- sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
|
|
||||||
length for each example.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If features are not a dictionary.
|
|
||||||
"""
|
|
||||||
if not isinstance(features, dict):
|
|
||||||
raise ValueError('We expected a dictionary here. Instead we got: ',
|
|
||||||
features)
|
|
||||||
if training is None:
|
|
||||||
training = backend.learning_phase()
|
|
||||||
transformation_cache = fc.FeatureTransformationCache(features)
|
|
||||||
output_tensors = []
|
|
||||||
sequence_lengths = []
|
|
||||||
|
|
||||||
for column in self._feature_columns:
|
|
||||||
with ops.name_scope(column.name):
|
|
||||||
try:
|
|
||||||
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
|
|
||||||
transformation_cache, self._state_manager, training=training)
|
|
||||||
except TypeError:
|
|
||||||
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
|
|
||||||
transformation_cache, self._state_manager)
|
|
||||||
# Flattens the final dimension to produce a 3D Tensor.
|
|
||||||
output_tensors.append(self._process_dense_tensor(column, dense_tensor))
|
|
||||||
sequence_lengths.append(sequence_length)
|
|
||||||
|
|
||||||
# Check and process sequence lengths.
|
|
||||||
fc._verify_static_batch_size_equality(sequence_lengths,
|
|
||||||
self._feature_columns)
|
|
||||||
sequence_length = _assert_all_equal_and_return(sequence_lengths)
|
|
||||||
|
|
||||||
return self._verify_and_concat_tensors(output_tensors), sequence_length
|
|
||||||
|
|
||||||
|
|
||||||
layer_serialization.inject_feature_column_v1_objects(
|
|
||||||
'SequenceFeatures', SequenceFeatures)
|
|
||||||
|
|
||||||
|
|
||||||
layer_serialization.inject_feature_column_v2_objects(
|
|
||||||
'SequenceFeatures', SequenceFeatures)
|
|
||||||
|
|
||||||
|
|
||||||
def concatenate_context_input(context_input, sequence_input):
|
def concatenate_context_input(context_input, sequence_input):
|
||||||
"""Replicates `context_input` across all timesteps of `sequence_input`.
|
"""Replicates `context_input` across all timesteps of `sequence_input`.
|
||||||
|
|
||||||
|
@ -24,130 +24,13 @@ import tempfile
|
|||||||
from google.protobuf import text_format
|
from google.protobuf import text_format
|
||||||
|
|
||||||
from tensorflow.core.example import example_pb2
|
from tensorflow.core.example import example_pb2
|
||||||
from tensorflow.core.example import feature_pb2
|
|
||||||
from tensorflow.python.data.ops import dataset_ops
|
|
||||||
from tensorflow.python.feature_column import dense_features
|
|
||||||
from tensorflow.python.feature_column import feature_column_v2 as fc
|
from tensorflow.python.feature_column import feature_column_v2 as fc
|
||||||
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
||||||
from tensorflow.python.framework import sparse_tensor
|
|
||||||
from tensorflow.python.framework import test_util
|
|
||||||
from tensorflow.python.keras.layers import recurrent
|
|
||||||
from tensorflow.python.ops import init_ops_v2
|
|
||||||
from tensorflow.python.ops import parsing_ops
|
from tensorflow.python.ops import parsing_ops
|
||||||
from tensorflow.python.ops import variables
|
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
from tensorflow.python.util import compat
|
from tensorflow.python.util import compat
|
||||||
|
|
||||||
|
|
||||||
class SequenceFeatureColumnIntegrationTest(test.TestCase):
|
|
||||||
|
|
||||||
def _make_sequence_example(self):
|
|
||||||
example = example_pb2.SequenceExample()
|
|
||||||
example.context.feature['int_ctx'].int64_list.value.extend([5])
|
|
||||||
example.context.feature['float_ctx'].float_list.value.extend([123.6])
|
|
||||||
for val in range(0, 10, 2):
|
|
||||||
feat = feature_pb2.Feature()
|
|
||||||
feat.int64_list.value.extend([val] * val)
|
|
||||||
example.feature_lists.feature_list['int_list'].feature.extend([feat])
|
|
||||||
for val in range(1, 11, 2):
|
|
||||||
feat = feature_pb2.Feature()
|
|
||||||
feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
|
|
||||||
example.feature_lists.feature_list['str_list'].feature.extend([feat])
|
|
||||||
|
|
||||||
return example
|
|
||||||
|
|
||||||
def _build_feature_columns(self):
|
|
||||||
col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
|
|
||||||
ctx_cols = [
|
|
||||||
fc.embedding_column(col, dimension=10),
|
|
||||||
fc.numeric_column('float_ctx')
|
|
||||||
]
|
|
||||||
|
|
||||||
identity_col = sfc.sequence_categorical_column_with_identity(
|
|
||||||
'int_list', num_buckets=10)
|
|
||||||
bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
|
|
||||||
'bytes_list', hash_bucket_size=100)
|
|
||||||
seq_cols = [
|
|
||||||
fc.embedding_column(identity_col, dimension=10),
|
|
||||||
fc.embedding_column(bucket_col, dimension=20)
|
|
||||||
]
|
|
||||||
|
|
||||||
return ctx_cols, seq_cols
|
|
||||||
|
|
||||||
def test_sequence_example_into_input_layer(self):
|
|
||||||
examples = [_make_sequence_example().SerializeToString()] * 100
|
|
||||||
ctx_cols, seq_cols = self._build_feature_columns()
|
|
||||||
|
|
||||||
def _parse_example(example):
|
|
||||||
ctx, seq = parsing_ops.parse_single_sequence_example(
|
|
||||||
example,
|
|
||||||
context_features=fc.make_parse_example_spec_v2(ctx_cols),
|
|
||||||
sequence_features=fc.make_parse_example_spec_v2(seq_cols))
|
|
||||||
ctx.update(seq)
|
|
||||||
return ctx
|
|
||||||
|
|
||||||
ds = dataset_ops.Dataset.from_tensor_slices(examples)
|
|
||||||
ds = ds.map(_parse_example)
|
|
||||||
ds = ds.batch(20)
|
|
||||||
|
|
||||||
# Test on a single batch
|
|
||||||
features = dataset_ops.make_one_shot_iterator(ds).get_next()
|
|
||||||
|
|
||||||
# Tile the context features across the sequence features
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(seq_cols)
|
|
||||||
seq_layer, _ = sequence_input_layer(features)
|
|
||||||
input_layer = dense_features.DenseFeatures(ctx_cols)
|
|
||||||
ctx_layer = input_layer(features)
|
|
||||||
input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
|
|
||||||
|
|
||||||
rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
|
|
||||||
output = rnn_layer(input_layer)
|
|
||||||
|
|
||||||
with self.cached_session() as sess:
|
|
||||||
sess.run(variables.global_variables_initializer())
|
|
||||||
features_r = sess.run(features)
|
|
||||||
self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
|
|
||||||
|
|
||||||
output_r = sess.run(output)
|
|
||||||
self.assertAllEqual(output_r.shape, [20, 10])
|
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def test_shared_sequence_non_sequence_into_input_layer(self):
|
|
||||||
non_seq = fc.categorical_column_with_identity('non_seq',
|
|
||||||
num_buckets=10)
|
|
||||||
seq = sfc.sequence_categorical_column_with_identity('seq',
|
|
||||||
num_buckets=10)
|
|
||||||
shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
|
|
||||||
[non_seq, seq],
|
|
||||||
dimension=4,
|
|
||||||
combiner='sum',
|
|
||||||
initializer=init_ops_v2.Ones(),
|
|
||||||
shared_embedding_collection_name='shared')
|
|
||||||
|
|
||||||
seq = sparse_tensor.SparseTensor(
|
|
||||||
indices=[[0, 0], [0, 1], [1, 0]],
|
|
||||||
values=[0, 1, 2],
|
|
||||||
dense_shape=[2, 2])
|
|
||||||
non_seq = sparse_tensor.SparseTensor(
|
|
||||||
indices=[[0, 0], [0, 1], [1, 0]],
|
|
||||||
values=[0, 1, 2],
|
|
||||||
dense_shape=[2, 2])
|
|
||||||
features = {'seq': seq, 'non_seq': non_seq}
|
|
||||||
|
|
||||||
# Tile the context features across the sequence features
|
|
||||||
seq_input, seq_length = sfc.SequenceFeatures([shared_seq])(features)
|
|
||||||
non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features)
|
|
||||||
|
|
||||||
with self.cached_session() as sess:
|
|
||||||
sess.run(variables.global_variables_initializer())
|
|
||||||
output_seq, output_seq_length, output_non_seq = sess.run(
|
|
||||||
[seq_input, seq_length, non_seq_input])
|
|
||||||
self.assertAllEqual(output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]],
|
|
||||||
[[1, 1, 1, 1], [0, 0, 0, 0]]])
|
|
||||||
self.assertAllEqual(output_seq_length, [2, 1])
|
|
||||||
self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
|
|
||||||
|
|
||||||
|
|
||||||
class SequenceExampleParsingTest(test.TestCase):
|
class SequenceExampleParsingTest(test.TestCase):
|
||||||
|
|
||||||
def test_seq_ex_in_sequence_categorical_column_with_identity(self):
|
def test_seq_ex_in_sequence_categorical_column_with_identity(self):
|
||||||
|
@ -29,7 +29,6 @@ from tensorflow.python.feature_column import feature_column_v2 as fc
|
|||||||
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
||||||
from tensorflow.python.feature_column import serialization
|
from tensorflow.python.feature_column import serialization
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import errors
|
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
from tensorflow.python.framework import sparse_tensor
|
from tensorflow.python.framework import sparse_tensor
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
@ -49,538 +48,6 @@ def _initialized_session(config=None):
|
|||||||
return sess
|
return sess
|
||||||
|
|
||||||
|
|
||||||
class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args_a': {
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
'indices': ((0, 0), (1, 0), (1, 1)),
|
|
||||||
'values': (2, 0, 1),
|
|
||||||
'dense_shape': (2, 2)},
|
|
||||||
'sparse_input_args_b': {
|
|
||||||
# example 0, ids [1]
|
|
||||||
# example 1, ids [2, 0]
|
|
||||||
'indices': ((0, 0), (1, 0), (1, 1)),
|
|
||||||
'values': (1, 2, 0),
|
|
||||||
'dense_shape': (2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# example 0, ids_a [2], ids_b [1]
|
|
||||||
[[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
|
|
||||||
# example 1, ids_a [0, 1], ids_b [2, 0]
|
|
||||||
[[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
|
|
||||||
'expected_sequence_length': [1, 2]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args_a': {
|
|
||||||
# feature 0, ids [[2], [0, 1]]
|
|
||||||
# feature 1, ids [[0, 0], [1]]
|
|
||||||
'indices': (
|
|
||||||
(0, 0, 0), (0, 1, 0), (0, 1, 1),
|
|
||||||
(1, 0, 0), (1, 0, 1), (1, 1, 0)),
|
|
||||||
'values': (2, 0, 1, 0, 0, 1),
|
|
||||||
'dense_shape': (2, 2, 2)},
|
|
||||||
'sparse_input_args_b': {
|
|
||||||
# feature 0, ids [[1, 1], [1]]
|
|
||||||
# feature 1, ids [[2], [0]]
|
|
||||||
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
|
||||||
'values': (1, 1, 1, 2, 0),
|
|
||||||
'dense_shape': (2, 2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
|
|
||||||
[[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
|
|
||||||
# feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
|
|
||||||
[[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
|
|
||||||
'expected_sequence_length': [2, 2]},
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_embedding_column(
|
|
||||||
self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
|
|
||||||
expected_sequence_length):
|
|
||||||
|
|
||||||
sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
|
|
||||||
sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
|
|
||||||
vocabulary_size = 3
|
|
||||||
embedding_dimension_a = 2
|
|
||||||
embedding_values_a = (
|
|
||||||
(1., 2.), # id 0
|
|
||||||
(3., 4.), # id 1
|
|
||||||
(5., 6.) # id 2
|
|
||||||
)
|
|
||||||
embedding_dimension_b = 3
|
|
||||||
embedding_values_b = (
|
|
||||||
(11., 12., 13.), # id 0
|
|
||||||
(14., 15., 16.), # id 1
|
|
||||||
(17., 18., 19.) # id 2
|
|
||||||
)
|
|
||||||
def _get_initializer(embedding_dimension, embedding_values):
|
|
||||||
|
|
||||||
def _initializer(shape, dtype, partition_info=None):
|
|
||||||
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
|
||||||
self.assertEqual(dtypes.float32, dtype)
|
|
||||||
self.assertIsNone(partition_info)
|
|
||||||
return embedding_values
|
|
||||||
return _initializer
|
|
||||||
|
|
||||||
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size)
|
|
||||||
embedding_column_a = fc.embedding_column(
|
|
||||||
categorical_column_a,
|
|
||||||
dimension=embedding_dimension_a,
|
|
||||||
initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
|
|
||||||
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='bbb', num_buckets=vocabulary_size)
|
|
||||||
embedding_column_b = fc.embedding_column(
|
|
||||||
categorical_column_b,
|
|
||||||
dimension=embedding_dimension_b,
|
|
||||||
initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
|
|
||||||
|
|
||||||
# Test that columns are reordered alphabetically.
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(
|
|
||||||
[embedding_column_b, embedding_column_a])
|
|
||||||
input_layer, sequence_length = sequence_input_layer({
|
|
||||||
'aaa': sparse_input_a, 'bbb': sparse_input_b,})
|
|
||||||
|
|
||||||
self.evaluate(variables_lib.global_variables_initializer())
|
|
||||||
weights = sequence_input_layer.weights
|
|
||||||
self.assertCountEqual(
|
|
||||||
('sequence_features/aaa_embedding/embedding_weights:0',
|
|
||||||
'sequence_features/bbb_embedding/embedding_weights:0'),
|
|
||||||
tuple([v.name for v in weights]))
|
|
||||||
self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
|
|
||||||
self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
|
|
||||||
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
|
||||||
self.assertAllEqual(
|
|
||||||
expected_sequence_length, self.evaluate(sequence_length))
|
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_embedding_column_with_non_sequence_categorical(self):
|
|
||||||
"""Tests that error is raised for non-sequence embedding column."""
|
|
||||||
vocabulary_size = 3
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(2, 0, 1),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
|
|
||||||
categorical_column_a = fc.categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size)
|
|
||||||
embedding_column_a = fc.embedding_column(
|
|
||||||
categorical_column_a, dimension=2)
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([embedding_column_a])
|
|
||||||
with self.assertRaisesRegexp(
|
|
||||||
ValueError,
|
|
||||||
r'In embedding_column: aaa_embedding\. categorical_column must be of '
|
|
||||||
r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
|
||||||
_, _ = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_shared_embedding_column(self):
|
|
||||||
with ops.Graph().as_default():
|
|
||||||
vocabulary_size = 3
|
|
||||||
sparse_input_a = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(2, 0, 1),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
sparse_input_b = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [1]
|
|
||||||
# example 1, ids [2, 0]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(1, 2, 0),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
|
|
||||||
embedding_dimension = 2
|
|
||||||
embedding_values = (
|
|
||||||
(1., 2.), # id 0
|
|
||||||
(3., 4.), # id 1
|
|
||||||
(5., 6.) # id 2
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_initializer(embedding_dimension, embedding_values):
|
|
||||||
|
|
||||||
def _initializer(shape, dtype, partition_info=None):
|
|
||||||
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
|
||||||
self.assertEqual(dtypes.float32, dtype)
|
|
||||||
self.assertIsNone(partition_info)
|
|
||||||
return embedding_values
|
|
||||||
|
|
||||||
return _initializer
|
|
||||||
|
|
||||||
expected_input_layer = [
|
|
||||||
# example 0, ids_a [2], ids_b [1]
|
|
||||||
[[5., 6., 3., 4.], [0., 0., 0., 0.]],
|
|
||||||
# example 1, ids_a [0, 1], ids_b [2, 0]
|
|
||||||
[[1., 2., 5., 6.], [3., 4., 1., 2.]],
|
|
||||||
]
|
|
||||||
expected_sequence_length = [1, 2]
|
|
||||||
|
|
||||||
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size)
|
|
||||||
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='bbb', num_buckets=vocabulary_size)
|
|
||||||
# Test that columns are reordered alphabetically.
|
|
||||||
shared_embedding_columns = fc.shared_embedding_columns_v2(
|
|
||||||
[categorical_column_b, categorical_column_a],
|
|
||||||
dimension=embedding_dimension,
|
|
||||||
initializer=_get_initializer(embedding_dimension, embedding_values))
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
|
|
||||||
input_layer, sequence_length = sequence_input_layer({
|
|
||||||
'aaa': sparse_input_a, 'bbb': sparse_input_b})
|
|
||||||
|
|
||||||
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
|
|
||||||
self.assertCountEqual(
|
|
||||||
('aaa_bbb_shared_embedding:0',),
|
|
||||||
tuple([v.name for v in global_vars]))
|
|
||||||
with _initialized_session() as sess:
|
|
||||||
self.assertAllEqual(embedding_values,
|
|
||||||
global_vars[0].eval(session=sess))
|
|
||||||
self.assertAllEqual(expected_input_layer,
|
|
||||||
input_layer.eval(session=sess))
|
|
||||||
self.assertAllEqual(
|
|
||||||
expected_sequence_length, sequence_length.eval(session=sess))
|
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
|
||||||
def test_shared_embedding_column_with_non_sequence_categorical(self):
|
|
||||||
"""Tests that error is raised for non-sequence shared embedding column."""
|
|
||||||
vocabulary_size = 3
|
|
||||||
sparse_input_a = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(2, 0, 1),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
sparse_input_b = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(2, 0, 1),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
|
|
||||||
categorical_column_a = fc.categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size)
|
|
||||||
categorical_column_b = fc.categorical_column_with_identity(
|
|
||||||
key='bbb', num_buckets=vocabulary_size)
|
|
||||||
shared_embedding_columns = fc.shared_embedding_columns_v2(
|
|
||||||
[categorical_column_a, categorical_column_b], dimension=2)
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(shared_embedding_columns)
|
|
||||||
with self.assertRaisesRegexp(
|
|
||||||
ValueError,
|
|
||||||
r'In embedding_column: aaa_shared_embedding\. categorical_column must '
|
|
||||||
r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
|
||||||
_, _ = sequence_input_layer({'aaa': sparse_input_a,
|
|
||||||
'bbb': sparse_input_b})
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args_a': {
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
'indices': ((0, 0), (1, 0), (1, 1)),
|
|
||||||
'values': (2, 0, 1),
|
|
||||||
'dense_shape': (2, 2)},
|
|
||||||
'sparse_input_args_b': {
|
|
||||||
# example 0, ids [1]
|
|
||||||
# example 1, ids [1, 0]
|
|
||||||
'indices': ((0, 0), (1, 0), (1, 1)),
|
|
||||||
'values': (1, 1, 0),
|
|
||||||
'dense_shape': (2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# example 0, ids_a [2], ids_b [1]
|
|
||||||
[[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
|
|
||||||
# example 1, ids_a [0, 1], ids_b [1, 0]
|
|
||||||
[[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
|
|
||||||
'expected_sequence_length': [1, 2]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args_a': {
|
|
||||||
# feature 0, ids [[2], [0, 1]]
|
|
||||||
# feature 1, ids [[0, 0], [1]]
|
|
||||||
'indices': (
|
|
||||||
(0, 0, 0), (0, 1, 0), (0, 1, 1),
|
|
||||||
(1, 0, 0), (1, 0, 1), (1, 1, 0)),
|
|
||||||
'values': (2, 0, 1, 0, 0, 1),
|
|
||||||
'dense_shape': (2, 2, 2)},
|
|
||||||
'sparse_input_args_b': {
|
|
||||||
# feature 0, ids [[1, 1], [1]]
|
|
||||||
# feature 1, ids [[1], [0]]
|
|
||||||
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
|
||||||
'values': (1, 1, 1, 1, 0),
|
|
||||||
'dense_shape': (2, 2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
|
|
||||||
[[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
|
|
||||||
# feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
|
|
||||||
[[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
|
|
||||||
'expected_sequence_length': [2, 2]},
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_indicator_column(
|
|
||||||
self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
|
|
||||||
expected_sequence_length):
|
|
||||||
sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
|
|
||||||
sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
|
|
||||||
|
|
||||||
vocabulary_size_a = 3
|
|
||||||
vocabulary_size_b = 2
|
|
||||||
|
|
||||||
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size_a)
|
|
||||||
indicator_column_a = fc.indicator_column(categorical_column_a)
|
|
||||||
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='bbb', num_buckets=vocabulary_size_b)
|
|
||||||
indicator_column_b = fc.indicator_column(categorical_column_b)
|
|
||||||
# Test that columns are reordered alphabetically.
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(
|
|
||||||
[indicator_column_b, indicator_column_a])
|
|
||||||
input_layer, sequence_length = sequence_input_layer({
|
|
||||||
'aaa': sparse_input_a, 'bbb': sparse_input_b})
|
|
||||||
|
|
||||||
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
|
||||||
self.assertAllEqual(
|
|
||||||
expected_sequence_length, self.evaluate(sequence_length))
|
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_indicator_column_with_non_sequence_categorical(self):
|
|
||||||
"""Tests that error is raised for non-sequence categorical column."""
|
|
||||||
vocabulary_size = 3
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
indices=((0, 0), (1, 0), (1, 1)),
|
|
||||||
values=(2, 0, 1),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
|
|
||||||
categorical_column_a = fc.categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=vocabulary_size)
|
|
||||||
indicator_column_a = fc.indicator_column(categorical_column_a)
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([indicator_column_a])
|
|
||||||
with self.assertRaisesRegexp(
|
|
||||||
ValueError,
|
|
||||||
r'In indicator_column: aaa_indicator\. categorical_column must be of '
|
|
||||||
r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
|
||||||
_, _ = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, values [0., 1]
|
|
||||||
# example 1, [10.]
|
|
||||||
'indices': ((0, 0), (0, 1), (1, 0)),
|
|
||||||
'values': (0., 1., 10.),
|
|
||||||
'dense_shape': (2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
[[0.], [1.]],
|
|
||||||
[[10.], [0.]]],
|
|
||||||
'expected_sequence_length': [2, 1]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# feature 0, ids [[20, 3], [5]]
|
|
||||||
# feature 1, ids [[3], [8]]
|
|
||||||
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
|
||||||
'values': (20., 3., 5., 3., 8.),
|
|
||||||
'dense_shape': (2, 2, 2)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
[[20.], [3.], [5.], [0.]],
|
|
||||||
[[3.], [0.], [8.], [0.]]],
|
|
||||||
'expected_sequence_length': [2, 2]},
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_numeric_column(
|
|
||||||
self, sparse_input_args, expected_input_layer, expected_sequence_length):
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
|
||||||
|
|
||||||
numeric_column = sfc.sequence_numeric_column('aaa')
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([numeric_column])
|
|
||||||
input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
|
|
||||||
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
|
||||||
self.assertAllEqual(
|
|
||||||
expected_sequence_length, self.evaluate(sequence_length))
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, values [0., 1., 2., 3., 4., 5., 6., 7.]
|
|
||||||
# example 1, [10., 11., 12., 13.]
|
|
||||||
'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
|
|
||||||
(0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
|
|
||||||
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
|
||||||
'dense_shape': (2, 8)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# The output of numeric_column._get_dense_tensor should be flattened.
|
|
||||||
[[0., 1., 2., 3.], [4., 5., 6., 7.]],
|
|
||||||
[[10., 11., 12., 13.], [0., 0., 0., 0.]]],
|
|
||||||
'expected_sequence_length': [2, 1]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
|
|
||||||
# example 1, [[10., 11., 12., 13.], []]
|
|
||||||
'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
|
|
||||||
(0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
|
|
||||||
(1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
|
|
||||||
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
|
||||||
'dense_shape': (2, 2, 4)},
|
|
||||||
'expected_input_layer': [
|
|
||||||
# The output of numeric_column._get_dense_tensor should be flattened.
|
|
||||||
[[0., 1., 2., 3.], [4., 5., 6., 7.]],
|
|
||||||
[[10., 11., 12., 13.], [0., 0., 0., 0.]]],
|
|
||||||
'expected_sequence_length': [2, 1]},
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_numeric_column_multi_dim(
|
|
||||||
self, sparse_input_args, expected_input_layer, expected_sequence_length):
|
|
||||||
"""Tests SequenceFeatures for multi-dimensional numeric_column."""
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
|
||||||
|
|
||||||
numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([numeric_column])
|
|
||||||
input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
|
|
||||||
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
|
||||||
self.assertAllEqual(
|
|
||||||
expected_sequence_length, self.evaluate(sequence_length))
|
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_sequence_length_not_equal(self):
|
|
||||||
"""Tests that an error is raised when sequence lengths are not equal."""
|
|
||||||
# Input a with sequence_length = [2, 1]
|
|
||||||
sparse_input_a = sparse_tensor.SparseTensorValue(
|
|
||||||
indices=((0, 0), (0, 1), (1, 0)),
|
|
||||||
values=(0., 1., 10.),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
# Input b with sequence_length = [1, 1]
|
|
||||||
sparse_input_b = sparse_tensor.SparseTensorValue(
|
|
||||||
indices=((0, 0), (1, 0)),
|
|
||||||
values=(1., 10.),
|
|
||||||
dense_shape=(2, 2))
|
|
||||||
numeric_column_a = sfc.sequence_numeric_column('aaa')
|
|
||||||
numeric_column_b = sfc.sequence_numeric_column('bbb')
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures(
|
|
||||||
[numeric_column_a, numeric_column_b])
|
|
||||||
|
|
||||||
with self.assertRaisesRegexp(
|
|
||||||
errors.InvalidArgumentError, r'Condition x == y did not hold.*'):
|
|
||||||
_, sequence_length = sequence_input_layer({
|
|
||||||
'aaa': sparse_input_a,
|
|
||||||
'bbb': sparse_input_b
|
|
||||||
})
|
|
||||||
self.evaluate(sequence_length)
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]]
|
|
||||||
# example 1, [[[10., 11.], [12., 13.]]]
|
|
||||||
'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
|
|
||||||
(0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
|
|
||||||
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
|
||||||
'dense_shape': (2, 8)},
|
|
||||||
'expected_shape': [2, 2, 4]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
|
|
||||||
# example 1, [[10., 11., 12., 13.], []]
|
|
||||||
'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
|
|
||||||
(0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
|
|
||||||
(1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
|
|
||||||
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
|
||||||
'dense_shape': (2, 2, 4)},
|
|
||||||
'expected_shape': [2, 2, 4]},
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_static_shape_from_tensors_numeric(
|
|
||||||
self, sparse_input_args, expected_shape):
|
|
||||||
"""Tests that we return a known static shape when we have one."""
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
|
||||||
numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([numeric_column])
|
|
||||||
input_layer, _ = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
shape = input_layer.get_shape()
|
|
||||||
self.assertEqual(shape, expected_shape)
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
{'testcase_name': '2D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, ids [2]
|
|
||||||
# example 1, ids [0, 1]
|
|
||||||
# example 2, ids []
|
|
||||||
# example 3, ids [1]
|
|
||||||
'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
|
|
||||||
'values': (2, 0, 1, 1),
|
|
||||||
'dense_shape': (4, 2)},
|
|
||||||
'expected_shape': [4, 2, 3]},
|
|
||||||
{'testcase_name': '3D',
|
|
||||||
'sparse_input_args': {
|
|
||||||
# example 0, ids [[2]]
|
|
||||||
# example 1, ids [[0, 1], [2]]
|
|
||||||
# example 2, ids []
|
|
||||||
# example 3, ids [[1], [0, 2]]
|
|
||||||
'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
|
|
||||||
(3, 0, 0), (3, 1, 0), (3, 1, 1)),
|
|
||||||
'values': (2, 0, 1, 2, 1, 0, 2),
|
|
||||||
'dense_shape': (4, 2, 2)},
|
|
||||||
'expected_shape': [4, 2, 3]}
|
|
||||||
)
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_static_shape_from_tensors_indicator(
|
|
||||||
self, sparse_input_args, expected_shape):
|
|
||||||
"""Tests that we return a known static shape when we have one."""
|
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
|
||||||
categorical_column = sfc.sequence_categorical_column_with_identity(
|
|
||||||
key='aaa', num_buckets=3)
|
|
||||||
indicator_column = fc.indicator_column(categorical_column)
|
|
||||||
|
|
||||||
sequence_input_layer = sfc.SequenceFeatures([indicator_column])
|
|
||||||
input_layer, _ = sequence_input_layer({'aaa': sparse_input})
|
|
||||||
shape = input_layer.get_shape()
|
|
||||||
self.assertEqual(shape, expected_shape)
|
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes
|
|
||||||
def test_compute_output_shape(self):
|
|
||||||
price1 = sfc.sequence_numeric_column('price1', shape=2)
|
|
||||||
price2 = sfc.sequence_numeric_column('price2')
|
|
||||||
features = {
|
|
||||||
'price1': sparse_tensor.SparseTensor(
|
|
||||||
indices=[[0, 0, 0], [0, 0, 1],
|
|
||||||
[0, 1, 0], [0, 1, 1],
|
|
||||||
[1, 0, 0], [1, 0, 1],
|
|
||||||
[2, 0, 0], [2, 0, 1],
|
|
||||||
[3, 0, 0], [3, 0, 1]],
|
|
||||||
values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
|
|
||||||
dense_shape=(4, 3, 2)),
|
|
||||||
'price2': sparse_tensor.SparseTensor(
|
|
||||||
indices=[[0, 0],
|
|
||||||
[0, 1],
|
|
||||||
[1, 0],
|
|
||||||
[2, 0],
|
|
||||||
[3, 0]],
|
|
||||||
values=[10., 11., 20., 30., 40.],
|
|
||||||
dense_shape=(4, 3))}
|
|
||||||
sequence_features = sfc.SequenceFeatures([price1, price2])
|
|
||||||
seq_input, seq_len = sequence_features(features)
|
|
||||||
self.assertEqual(
|
|
||||||
sequence_features.compute_output_shape((None, None)),
|
|
||||||
(None, None, 3))
|
|
||||||
self.evaluate(variables_lib.global_variables_initializer())
|
|
||||||
self.evaluate(lookup_ops.tables_initializer())
|
|
||||||
|
|
||||||
self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
|
|
||||||
[[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
|
|
||||||
[[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
|
|
||||||
[[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
|
|
||||||
self.evaluate(seq_input))
|
|
||||||
self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
|
|
||||||
|
|
||||||
|
|
||||||
@test_util.run_all_in_graph_and_eager_modes
|
@test_util.run_all_in_graph_and_eager_modes
|
||||||
class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
|
class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
|
||||||
"""Tests the utility fn concatenate_context_input."""
|
"""Tests the utility fn concatenate_context_input."""
|
||||||
|
@ -22,7 +22,6 @@ from absl.testing import parameterized
|
|||||||
|
|
||||||
from tensorflow.python.feature_column import dense_features
|
from tensorflow.python.feature_column import dense_features
|
||||||
from tensorflow.python.feature_column import feature_column_v2 as fc
|
from tensorflow.python.feature_column import feature_column_v2 as fc
|
||||||
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
|
||||||
from tensorflow.python.feature_column import serialization
|
from tensorflow.python.feature_column import serialization
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
@ -180,40 +179,6 @@ class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
|
|||||||
self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
|
self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
|
||||||
|
|
||||||
|
|
||||||
@test_util.run_all_in_graph_and_eager_modes
|
|
||||||
class SequenceFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
|
|
||||||
|
|
||||||
@parameterized.named_parameters(('default', None, None),
|
|
||||||
('trainable', True, 'trainable'),
|
|
||||||
('not_trainable', False, 'frozen'))
|
|
||||||
def test_get_config(self, trainable, name):
|
|
||||||
cols = [sfc.sequence_numeric_column('a')]
|
|
||||||
orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name)
|
|
||||||
config = orig_layer.get_config()
|
|
||||||
|
|
||||||
self.assertEqual(config['name'], orig_layer.name)
|
|
||||||
self.assertEqual(config['trainable'], trainable)
|
|
||||||
self.assertLen(config['feature_columns'], 1)
|
|
||||||
self.assertEqual(config['feature_columns'][0]['class_name'],
|
|
||||||
'SequenceNumericColumn')
|
|
||||||
self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
|
|
||||||
|
|
||||||
@parameterized.named_parameters(('default', None, None),
|
|
||||||
('trainable', True, 'trainable'),
|
|
||||||
('not_trainable', False, 'frozen'))
|
|
||||||
def test_from_config(self, trainable, name):
|
|
||||||
cols = [sfc.sequence_numeric_column('a')]
|
|
||||||
orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name)
|
|
||||||
config = orig_layer.get_config()
|
|
||||||
|
|
||||||
new_layer = sfc.SequenceFeatures.from_config(config)
|
|
||||||
|
|
||||||
self.assertEqual(new_layer.name, orig_layer.name)
|
|
||||||
self.assertEqual(new_layer.trainable, trainable)
|
|
||||||
self.assertLen(new_layer._feature_columns, 1)
|
|
||||||
self.assertEqual(new_layer._feature_columns[0].name, 'a')
|
|
||||||
|
|
||||||
|
|
||||||
@test_util.run_all_in_graph_and_eager_modes
|
@test_util.run_all_in_graph_and_eager_modes
|
||||||
class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase):
|
class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ py_library(
|
|||||||
"//tensorflow/python/eager:monitoring",
|
"//tensorflow/python/eager:monitoring",
|
||||||
"//tensorflow/python/keras/applications",
|
"//tensorflow/python/keras/applications",
|
||||||
"//tensorflow/python/keras/datasets",
|
"//tensorflow/python/keras/datasets",
|
||||||
|
"//tensorflow/python/keras/feature_column",
|
||||||
"//tensorflow/python/keras/layers",
|
"//tensorflow/python/keras/layers",
|
||||||
"//tensorflow/python/keras/mixed_precision/experimental:mixed_precision_experimental",
|
"//tensorflow/python/keras/mixed_precision/experimental:mixed_precision_experimental",
|
||||||
"//tensorflow/python/keras/optimizer_v2",
|
"//tensorflow/python/keras/optimizer_v2",
|
||||||
|
@ -47,6 +47,7 @@ keras_packages = [
|
|||||||
"tensorflow.python.keras.engine.sequential",
|
"tensorflow.python.keras.engine.sequential",
|
||||||
"tensorflow.python.keras.engine.training",
|
"tensorflow.python.keras.engine.training",
|
||||||
"tensorflow.python.keras.estimator",
|
"tensorflow.python.keras.estimator",
|
||||||
|
"tensorflow.python.keras.feature_column.sequence_feature_column",
|
||||||
"tensorflow.python.keras.initializers",
|
"tensorflow.python.keras.initializers",
|
||||||
"tensorflow.python.keras.initializers.initializers_v1",
|
"tensorflow.python.keras.initializers.initializers_v1",
|
||||||
"tensorflow.python.keras.initializers.initializers_v2",
|
"tensorflow.python.keras.initializers.initializers_v2",
|
||||||
|
74
tensorflow/python/keras/feature_column/BUILD
Normal file
74
tensorflow/python/keras/feature_column/BUILD
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
load("//tensorflow:tensorflow.bzl", "py_test", "tf_py_test")
|
||||||
|
|
||||||
|
package(
|
||||||
|
default_visibility = [
|
||||||
|
"//tensorflow/python/feature_column:__subpackages__",
|
||||||
|
"//tensorflow/python/keras:__subpackages__",
|
||||||
|
],
|
||||||
|
licenses = ["notice"], # Apache 2.0
|
||||||
|
)
|
||||||
|
|
||||||
|
exports_files(["LICENSE"])
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "feature_column",
|
||||||
|
deps = [
|
||||||
|
":sequence_feature_column",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "sequence_feature_column",
|
||||||
|
srcs = ["sequence_feature_column.py"],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/python:array_ops",
|
||||||
|
"//tensorflow/python:check_ops",
|
||||||
|
"//tensorflow/python:framework_ops",
|
||||||
|
"//tensorflow/python:tf_export",
|
||||||
|
"//tensorflow/python/feature_column:feature_column_v2",
|
||||||
|
"//tensorflow/python/keras:backend",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_py_test(
|
||||||
|
name = "sequence_feature_column_test",
|
||||||
|
srcs = ["sequence_feature_column_test.py"],
|
||||||
|
deps = [
|
||||||
|
":sequence_feature_column",
|
||||||
|
"//tensorflow/python:client_testlib",
|
||||||
|
"//tensorflow/python:dtypes",
|
||||||
|
"//tensorflow/python:errors",
|
||||||
|
"//tensorflow/python:extra_py_tests_deps",
|
||||||
|
"//tensorflow/python:framework_ops",
|
||||||
|
"//tensorflow/python:framework_test_lib",
|
||||||
|
"//tensorflow/python:lookup_ops",
|
||||||
|
"//tensorflow/python:session",
|
||||||
|
"//tensorflow/python:sparse_tensor",
|
||||||
|
"//tensorflow/python:variables",
|
||||||
|
"//tensorflow/python/feature_column:feature_column_v2",
|
||||||
|
"//tensorflow/python/keras",
|
||||||
|
"//tensorflow/python/keras:combinations",
|
||||||
|
"@absl_py//absl/testing:parameterized",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py_test(
|
||||||
|
name = "sequence_feature_column_integration_test",
|
||||||
|
srcs = ["sequence_feature_column_integration_test.py"],
|
||||||
|
python_version = "PY3",
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
tags = ["no_pip"],
|
||||||
|
deps = [
|
||||||
|
":sequence_feature_column",
|
||||||
|
"//tensorflow/python:client_testlib",
|
||||||
|
"//tensorflow/python:framework_test_lib",
|
||||||
|
"//tensorflow/python:init_ops_v2",
|
||||||
|
"//tensorflow/python:parsing_ops",
|
||||||
|
"//tensorflow/python:sparse_tensor",
|
||||||
|
"//tensorflow/python:util",
|
||||||
|
"//tensorflow/python:variables",
|
||||||
|
"//tensorflow/python/data/ops:dataset_ops",
|
||||||
|
"//tensorflow/python/feature_column:feature_column_v2",
|
||||||
|
"//tensorflow/python/keras/layers:recurrent",
|
||||||
|
],
|
||||||
|
)
|
@ -0,0 +1,173 @@
|
|||||||
|
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""This API defines FeatureColumn for sequential input.
|
||||||
|
|
||||||
|
NOTE: This API is a work in progress and will likely be changing frequently.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from tensorflow.python.feature_column import feature_column_v2 as fc
|
||||||
|
from tensorflow.python.framework import ops
|
||||||
|
from tensorflow.python.keras import backend
|
||||||
|
from tensorflow.python.ops import array_ops
|
||||||
|
from tensorflow.python.ops import check_ops
|
||||||
|
from tensorflow.python.util.tf_export import keras_export
|
||||||
|
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
|
||||||
|
|
||||||
|
@keras_export('keras.experimental.SequenceFeatures')
|
||||||
|
class SequenceFeatures(fc._BaseFeaturesLayer):
|
||||||
|
"""A layer for sequence input.
|
||||||
|
|
||||||
|
All `feature_columns` must be sequence dense columns with the same
|
||||||
|
`sequence_length`. The output of this method can be fed into sequence
|
||||||
|
networks, such as RNN.
|
||||||
|
|
||||||
|
The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`.
|
||||||
|
`T` is the maximum sequence length for this batch, which could differ from
|
||||||
|
batch to batch.
|
||||||
|
|
||||||
|
If multiple `feature_columns` are given with `Di` `num_elements` each, their
|
||||||
|
outputs are concatenated. So, the final `Tensor` has shape
|
||||||
|
`[batch_size, T, D0 + D1 + ... + Dn]`.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Behavior of some cells or feature columns may depend on whether we are in
|
||||||
|
# training or inference mode, e.g. applying dropout.
|
||||||
|
training = True
|
||||||
|
rating = sequence_numeric_column('rating')
|
||||||
|
watches = sequence_categorical_column_with_identity(
|
||||||
|
'watches', num_buckets=1000)
|
||||||
|
watches_embedding = embedding_column(watches, dimension=10)
|
||||||
|
columns = [rating, watches_embedding]
|
||||||
|
|
||||||
|
sequence_input_layer = SequenceFeatures(columns)
|
||||||
|
features = tf.io.parse_example(...,
|
||||||
|
features=make_parse_example_spec(columns))
|
||||||
|
sequence_input, sequence_length = sequence_input_layer(
|
||||||
|
features, training=training)
|
||||||
|
sequence_length_mask = tf.sequence_mask(sequence_length)
|
||||||
|
|
||||||
|
rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size, training=training)
|
||||||
|
rnn_layer = tf.keras.layers.RNN(rnn_cell, training=training)
|
||||||
|
outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
feature_columns,
|
||||||
|
trainable=True,
|
||||||
|
name=None,
|
||||||
|
**kwargs):
|
||||||
|
""""Constructs a SequenceFeatures layer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
feature_columns: An iterable of dense sequence columns. Valid columns are
|
||||||
|
- `embedding_column` that wraps a `sequence_categorical_column_with_*`
|
||||||
|
- `sequence_numeric_column`.
|
||||||
|
trainable: Boolean, whether the layer's variables will be updated via
|
||||||
|
gradient descent during training.
|
||||||
|
name: Name to give to the SequenceFeatures.
|
||||||
|
**kwargs: Keyword arguments to construct a layer.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If any of the `feature_columns` is not a
|
||||||
|
`SequenceDenseColumn`.
|
||||||
|
"""
|
||||||
|
super(SequenceFeatures, self).__init__(
|
||||||
|
feature_columns=feature_columns,
|
||||||
|
trainable=trainable,
|
||||||
|
name=name,
|
||||||
|
expected_column_type=fc.SequenceDenseColumn,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _is_feature_layer(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _target_shape(self, input_shape, total_elements):
|
||||||
|
return (input_shape[0], input_shape[1], total_elements)
|
||||||
|
|
||||||
|
def call(self, features, training=None):
|
||||||
|
"""Returns sequence input corresponding to the `feature_columns`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
features: A dict mapping keys to tensors.
|
||||||
|
training: Python boolean or None, indicating whether to the layer is being
|
||||||
|
run in training mode. This argument is passed to the call method of any
|
||||||
|
`FeatureColumn` that takes a `training` argument. For example, if a
|
||||||
|
`FeatureColumn` performed dropout, the column could expose a `training`
|
||||||
|
argument to control whether the dropout should be applied. If `None`,
|
||||||
|
defaults to `tf.keras.backend.learning_phase()`.
|
||||||
|
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An `(input_layer, sequence_length)` tuple where:
|
||||||
|
- input_layer: A float `Tensor` of shape `[batch_size, T, D]`.
|
||||||
|
`T` is the maximum sequence length for this batch, which could differ
|
||||||
|
from batch to batch. `D` is the sum of `num_elements` for all
|
||||||
|
`feature_columns`.
|
||||||
|
- sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence
|
||||||
|
length for each example.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If features are not a dictionary.
|
||||||
|
"""
|
||||||
|
if not isinstance(features, dict):
|
||||||
|
raise ValueError('We expected a dictionary here. Instead we got: ',
|
||||||
|
features)
|
||||||
|
if training is None:
|
||||||
|
training = backend.learning_phase()
|
||||||
|
transformation_cache = fc.FeatureTransformationCache(features)
|
||||||
|
output_tensors = []
|
||||||
|
sequence_lengths = []
|
||||||
|
|
||||||
|
for column in self._feature_columns:
|
||||||
|
with ops.name_scope(column.name):
|
||||||
|
try:
|
||||||
|
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
|
||||||
|
transformation_cache, self._state_manager, training=training)
|
||||||
|
except TypeError:
|
||||||
|
dense_tensor, sequence_length = column.get_sequence_dense_tensor(
|
||||||
|
transformation_cache, self._state_manager)
|
||||||
|
# Flattens the final dimension to produce a 3D Tensor.
|
||||||
|
output_tensors.append(self._process_dense_tensor(column, dense_tensor))
|
||||||
|
sequence_lengths.append(sequence_length)
|
||||||
|
|
||||||
|
# Check and process sequence lengths.
|
||||||
|
fc._verify_static_batch_size_equality(sequence_lengths,
|
||||||
|
self._feature_columns)
|
||||||
|
sequence_length = _assert_all_equal_and_return(sequence_lengths)
|
||||||
|
|
||||||
|
return self._verify_and_concat_tensors(output_tensors), sequence_length
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_all_equal_and_return(tensors, name=None):
|
||||||
|
"""Asserts that all tensors are equal and returns the first one."""
|
||||||
|
with ops.name_scope(name, 'assert_all_equal', values=tensors):
|
||||||
|
if len(tensors) == 1:
|
||||||
|
return tensors[0]
|
||||||
|
assert_equal_ops = []
|
||||||
|
for t in tensors[1:]:
|
||||||
|
assert_equal_ops.append(check_ops.assert_equal(tensors[0], t))
|
||||||
|
with ops.control_dependencies(assert_equal_ops):
|
||||||
|
return array_ops.identity(tensors[0])
|
@ -0,0 +1,259 @@
|
|||||||
|
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Integration test for sequence feature columns with SequenceExamples."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
|
||||||
|
from google.protobuf import text_format
|
||||||
|
|
||||||
|
from tensorflow.core.example import example_pb2
|
||||||
|
from tensorflow.core.example import feature_pb2
|
||||||
|
from tensorflow.python.data.ops import dataset_ops
|
||||||
|
from tensorflow.python.feature_column import dense_features
|
||||||
|
from tensorflow.python.feature_column import feature_column_v2 as fc
|
||||||
|
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
||||||
|
from tensorflow.python.framework import sparse_tensor
|
||||||
|
from tensorflow.python.framework import test_util
|
||||||
|
from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc
|
||||||
|
from tensorflow.python.keras.layers import recurrent
|
||||||
|
from tensorflow.python.ops import init_ops_v2
|
||||||
|
from tensorflow.python.ops import parsing_ops
|
||||||
|
from tensorflow.python.ops import variables
|
||||||
|
from tensorflow.python.platform import test
|
||||||
|
from tensorflow.python.util import compat
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceFeatureColumnIntegrationTest(test.TestCase):
|
||||||
|
|
||||||
|
def _make_sequence_example(self):
|
||||||
|
example = example_pb2.SequenceExample()
|
||||||
|
example.context.feature['int_ctx'].int64_list.value.extend([5])
|
||||||
|
example.context.feature['float_ctx'].float_list.value.extend([123.6])
|
||||||
|
for val in range(0, 10, 2):
|
||||||
|
feat = feature_pb2.Feature()
|
||||||
|
feat.int64_list.value.extend([val] * val)
|
||||||
|
example.feature_lists.feature_list['int_list'].feature.extend([feat])
|
||||||
|
for val in range(1, 11, 2):
|
||||||
|
feat = feature_pb2.Feature()
|
||||||
|
feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val)
|
||||||
|
example.feature_lists.feature_list['str_list'].feature.extend([feat])
|
||||||
|
|
||||||
|
return example
|
||||||
|
|
||||||
|
def _build_feature_columns(self):
|
||||||
|
col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
|
||||||
|
ctx_cols = [
|
||||||
|
fc.embedding_column(col, dimension=10),
|
||||||
|
fc.numeric_column('float_ctx')
|
||||||
|
]
|
||||||
|
|
||||||
|
identity_col = sfc.sequence_categorical_column_with_identity(
|
||||||
|
'int_list', num_buckets=10)
|
||||||
|
bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
|
||||||
|
'bytes_list', hash_bucket_size=100)
|
||||||
|
seq_cols = [
|
||||||
|
fc.embedding_column(identity_col, dimension=10),
|
||||||
|
fc.embedding_column(bucket_col, dimension=20)
|
||||||
|
]
|
||||||
|
|
||||||
|
return ctx_cols, seq_cols
|
||||||
|
|
||||||
|
def test_sequence_example_into_input_layer(self):
|
||||||
|
examples = [_make_sequence_example().SerializeToString()] * 100
|
||||||
|
ctx_cols, seq_cols = self._build_feature_columns()
|
||||||
|
|
||||||
|
def _parse_example(example):
|
||||||
|
ctx, seq = parsing_ops.parse_single_sequence_example(
|
||||||
|
example,
|
||||||
|
context_features=fc.make_parse_example_spec_v2(ctx_cols),
|
||||||
|
sequence_features=fc.make_parse_example_spec_v2(seq_cols))
|
||||||
|
ctx.update(seq)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
ds = dataset_ops.Dataset.from_tensor_slices(examples)
|
||||||
|
ds = ds.map(_parse_example)
|
||||||
|
ds = ds.batch(20)
|
||||||
|
|
||||||
|
# Test on a single batch
|
||||||
|
features = dataset_ops.make_one_shot_iterator(ds).get_next()
|
||||||
|
|
||||||
|
# Tile the context features across the sequence features
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(seq_cols)
|
||||||
|
seq_layer, _ = sequence_input_layer(features)
|
||||||
|
input_layer = dense_features.DenseFeatures(ctx_cols)
|
||||||
|
ctx_layer = input_layer(features)
|
||||||
|
input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer)
|
||||||
|
|
||||||
|
rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
|
||||||
|
output = rnn_layer(input_layer)
|
||||||
|
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
sess.run(variables.global_variables_initializer())
|
||||||
|
features_r = sess.run(features)
|
||||||
|
self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])
|
||||||
|
|
||||||
|
output_r = sess.run(output)
|
||||||
|
self.assertAllEqual(output_r.shape, [20, 10])
|
||||||
|
|
||||||
|
@test_util.run_deprecated_v1
|
||||||
|
def test_shared_sequence_non_sequence_into_input_layer(self):
|
||||||
|
non_seq = fc.categorical_column_with_identity('non_seq',
|
||||||
|
num_buckets=10)
|
||||||
|
seq = sfc.sequence_categorical_column_with_identity('seq',
|
||||||
|
num_buckets=10)
|
||||||
|
shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
|
||||||
|
[non_seq, seq],
|
||||||
|
dimension=4,
|
||||||
|
combiner='sum',
|
||||||
|
initializer=init_ops_v2.Ones(),
|
||||||
|
shared_embedding_collection_name='shared')
|
||||||
|
|
||||||
|
seq = sparse_tensor.SparseTensor(
|
||||||
|
indices=[[0, 0], [0, 1], [1, 0]],
|
||||||
|
values=[0, 1, 2],
|
||||||
|
dense_shape=[2, 2])
|
||||||
|
non_seq = sparse_tensor.SparseTensor(
|
||||||
|
indices=[[0, 0], [0, 1], [1, 0]],
|
||||||
|
values=[0, 1, 2],
|
||||||
|
dense_shape=[2, 2])
|
||||||
|
features = {'seq': seq, 'non_seq': non_seq}
|
||||||
|
|
||||||
|
# Tile the context features across the sequence features
|
||||||
|
seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
|
||||||
|
non_seq_input = dense_features.DenseFeatures([shared_non_seq])(features)
|
||||||
|
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
sess.run(variables.global_variables_initializer())
|
||||||
|
output_seq, output_seq_length, output_non_seq = sess.run(
|
||||||
|
[seq_input, seq_length, non_seq_input])
|
||||||
|
self.assertAllEqual(output_seq, [[[1, 1, 1, 1], [1, 1, 1, 1]],
|
||||||
|
[[1, 1, 1, 1], [0, 0, 0, 0]]])
|
||||||
|
self.assertAllEqual(output_seq_length, [2, 1])
|
||||||
|
self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
|
||||||
|
|
||||||
|
|
||||||
|
_SEQ_EX_PROTO = """
|
||||||
|
context {
|
||||||
|
feature {
|
||||||
|
key: "float_ctx"
|
||||||
|
value {
|
||||||
|
float_list {
|
||||||
|
value: 123.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
key: "int_ctx"
|
||||||
|
value {
|
||||||
|
int64_list {
|
||||||
|
value: 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature_lists {
|
||||||
|
feature_list {
|
||||||
|
key: "bytes_list"
|
||||||
|
value {
|
||||||
|
feature {
|
||||||
|
bytes_list {
|
||||||
|
value: "a"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
bytes_list {
|
||||||
|
value: "b"
|
||||||
|
value: "c"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
bytes_list {
|
||||||
|
value: "d"
|
||||||
|
value: "e"
|
||||||
|
value: "f"
|
||||||
|
value: "g"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature_list {
|
||||||
|
key: "float_list"
|
||||||
|
value {
|
||||||
|
feature {
|
||||||
|
float_list {
|
||||||
|
value: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
float_list {
|
||||||
|
value: 3.0
|
||||||
|
value: 3.0
|
||||||
|
value: 3.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
float_list {
|
||||||
|
value: 5.0
|
||||||
|
value: 5.0
|
||||||
|
value: 5.0
|
||||||
|
value: 5.0
|
||||||
|
value: 5.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature_list {
|
||||||
|
key: "int_list"
|
||||||
|
value {
|
||||||
|
feature {
|
||||||
|
int64_list {
|
||||||
|
value: 2
|
||||||
|
value: 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
int64_list {
|
||||||
|
value: 4
|
||||||
|
value: 4
|
||||||
|
value: 4
|
||||||
|
value: 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
feature {
|
||||||
|
int64_list {
|
||||||
|
value: 6
|
||||||
|
value: 6
|
||||||
|
value: 6
|
||||||
|
value: 6
|
||||||
|
value: 6
|
||||||
|
value: 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def _make_sequence_example():
|
||||||
|
example = example_pb2.SequenceExample()
|
||||||
|
return text_format.Parse(_SEQ_EX_PROTO, example)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test.main()
|
@ -0,0 +1,687 @@
|
|||||||
|
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Tests for sequential_feature_column."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
|
||||||
|
from absl.testing import parameterized
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow.python import keras
|
||||||
|
from tensorflow.python.client import session
|
||||||
|
from tensorflow.python.eager import context
|
||||||
|
from tensorflow.python.feature_column import feature_column_v2 as fc
|
||||||
|
from tensorflow.python.feature_column import sequence_feature_column as sfc
|
||||||
|
from tensorflow.python.framework import dtypes
|
||||||
|
from tensorflow.python.framework import errors
|
||||||
|
from tensorflow.python.framework import ops
|
||||||
|
from tensorflow.python.framework import sparse_tensor
|
||||||
|
from tensorflow.python.framework import test_util
|
||||||
|
from tensorflow.python.keras import combinations
|
||||||
|
from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc
|
||||||
|
from tensorflow.python.keras.saving import model_config
|
||||||
|
from tensorflow.python.ops import lookup_ops
|
||||||
|
from tensorflow.python.ops import variables as variables_lib
|
||||||
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
|
||||||
|
def _initialized_session(config=None):
|
||||||
|
sess = session.Session(config=config)
|
||||||
|
sess.run(variables_lib.global_variables_initializer())
|
||||||
|
sess.run(lookup_ops.tables_initializer())
|
||||||
|
return sess
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceFeaturesTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args_a': {
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
'indices': ((0, 0), (1, 0), (1, 1)),
|
||||||
|
'values': (2, 0, 1),
|
||||||
|
'dense_shape': (2, 2)},
|
||||||
|
'sparse_input_args_b': {
|
||||||
|
# example 0, ids [1]
|
||||||
|
# example 1, ids [2, 0]
|
||||||
|
'indices': ((0, 0), (1, 0), (1, 1)),
|
||||||
|
'values': (1, 2, 0),
|
||||||
|
'dense_shape': (2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# example 0, ids_a [2], ids_b [1]
|
||||||
|
[[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]],
|
||||||
|
# example 1, ids_a [0, 1], ids_b [2, 0]
|
||||||
|
[[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],],
|
||||||
|
'expected_sequence_length': [1, 2]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args_a': {
|
||||||
|
# feature 0, ids [[2], [0, 1]]
|
||||||
|
# feature 1, ids [[0, 0], [1]]
|
||||||
|
'indices': (
|
||||||
|
(0, 0, 0), (0, 1, 0), (0, 1, 1),
|
||||||
|
(1, 0, 0), (1, 0, 1), (1, 1, 0)),
|
||||||
|
'values': (2, 0, 1, 0, 0, 1),
|
||||||
|
'dense_shape': (2, 2, 2)},
|
||||||
|
'sparse_input_args_b': {
|
||||||
|
# feature 0, ids [[1, 1], [1]]
|
||||||
|
# feature 1, ids [[2], [0]]
|
||||||
|
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
||||||
|
'values': (1, 1, 1, 2, 0),
|
||||||
|
'dense_shape': (2, 2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
|
||||||
|
[[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]],
|
||||||
|
# feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -]
|
||||||
|
[[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]],
|
||||||
|
'expected_sequence_length': [2, 2]},
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_embedding_column(
|
||||||
|
self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
|
||||||
|
expected_sequence_length):
|
||||||
|
|
||||||
|
sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
|
||||||
|
sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
|
||||||
|
vocabulary_size = 3
|
||||||
|
embedding_dimension_a = 2
|
||||||
|
embedding_values_a = (
|
||||||
|
(1., 2.), # id 0
|
||||||
|
(3., 4.), # id 1
|
||||||
|
(5., 6.) # id 2
|
||||||
|
)
|
||||||
|
embedding_dimension_b = 3
|
||||||
|
embedding_values_b = (
|
||||||
|
(11., 12., 13.), # id 0
|
||||||
|
(14., 15., 16.), # id 1
|
||||||
|
(17., 18., 19.) # id 2
|
||||||
|
)
|
||||||
|
def _get_initializer(embedding_dimension, embedding_values):
|
||||||
|
|
||||||
|
def _initializer(shape, dtype, partition_info=None):
|
||||||
|
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
||||||
|
self.assertEqual(dtypes.float32, dtype)
|
||||||
|
self.assertIsNone(partition_info)
|
||||||
|
return embedding_values
|
||||||
|
return _initializer
|
||||||
|
|
||||||
|
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
embedding_column_a = fc.embedding_column(
|
||||||
|
categorical_column_a,
|
||||||
|
dimension=embedding_dimension_a,
|
||||||
|
initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
|
||||||
|
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='bbb', num_buckets=vocabulary_size)
|
||||||
|
embedding_column_b = fc.embedding_column(
|
||||||
|
categorical_column_b,
|
||||||
|
dimension=embedding_dimension_b,
|
||||||
|
initializer=_get_initializer(embedding_dimension_b, embedding_values_b))
|
||||||
|
|
||||||
|
# Test that columns are reordered alphabetically.
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(
|
||||||
|
[embedding_column_b, embedding_column_a])
|
||||||
|
input_layer, sequence_length = sequence_input_layer({
|
||||||
|
'aaa': sparse_input_a, 'bbb': sparse_input_b,})
|
||||||
|
|
||||||
|
self.evaluate(variables_lib.global_variables_initializer())
|
||||||
|
weights = sequence_input_layer.weights
|
||||||
|
self.assertCountEqual(
|
||||||
|
('sequence_features/aaa_embedding/embedding_weights:0',
|
||||||
|
'sequence_features/bbb_embedding/embedding_weights:0'),
|
||||||
|
tuple([v.name for v in weights]))
|
||||||
|
self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
|
||||||
|
self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
|
||||||
|
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
||||||
|
self.assertAllEqual(
|
||||||
|
expected_sequence_length, self.evaluate(sequence_length))
|
||||||
|
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_embedding_column_with_non_sequence_categorical(self):
|
||||||
|
"""Tests that error is raised for non-sequence embedding column."""
|
||||||
|
vocabulary_size = 3
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(2, 0, 1),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
|
||||||
|
categorical_column_a = fc.categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
embedding_column_a = fc.embedding_column(
|
||||||
|
categorical_column_a, dimension=2)
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([embedding_column_a])
|
||||||
|
with self.assertRaisesRegexp(
|
||||||
|
ValueError,
|
||||||
|
r'In embedding_column: aaa_embedding\. categorical_column must be of '
|
||||||
|
r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
||||||
|
_, _ = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_shared_embedding_column(self):
|
||||||
|
with ops.Graph().as_default():
|
||||||
|
vocabulary_size = 3
|
||||||
|
sparse_input_a = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(2, 0, 1),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
sparse_input_b = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [1]
|
||||||
|
# example 1, ids [2, 0]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(1, 2, 0),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
|
||||||
|
embedding_dimension = 2
|
||||||
|
embedding_values = (
|
||||||
|
(1., 2.), # id 0
|
||||||
|
(3., 4.), # id 1
|
||||||
|
(5., 6.) # id 2
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_initializer(embedding_dimension, embedding_values):
|
||||||
|
|
||||||
|
def _initializer(shape, dtype, partition_info=None):
|
||||||
|
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
||||||
|
self.assertEqual(dtypes.float32, dtype)
|
||||||
|
self.assertIsNone(partition_info)
|
||||||
|
return embedding_values
|
||||||
|
|
||||||
|
return _initializer
|
||||||
|
|
||||||
|
expected_input_layer = [
|
||||||
|
# example 0, ids_a [2], ids_b [1]
|
||||||
|
[[5., 6., 3., 4.], [0., 0., 0., 0.]],
|
||||||
|
# example 1, ids_a [0, 1], ids_b [2, 0]
|
||||||
|
[[1., 2., 5., 6.], [3., 4., 1., 2.]],
|
||||||
|
]
|
||||||
|
expected_sequence_length = [1, 2]
|
||||||
|
|
||||||
|
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='bbb', num_buckets=vocabulary_size)
|
||||||
|
# Test that columns are reordered alphabetically.
|
||||||
|
shared_embedding_columns = fc.shared_embedding_columns_v2(
|
||||||
|
[categorical_column_b, categorical_column_a],
|
||||||
|
dimension=embedding_dimension,
|
||||||
|
initializer=_get_initializer(embedding_dimension, embedding_values))
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns)
|
||||||
|
input_layer, sequence_length = sequence_input_layer({
|
||||||
|
'aaa': sparse_input_a, 'bbb': sparse_input_b})
|
||||||
|
|
||||||
|
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
|
||||||
|
self.assertCountEqual(
|
||||||
|
('aaa_bbb_shared_embedding:0',),
|
||||||
|
tuple([v.name for v in global_vars]))
|
||||||
|
with _initialized_session() as sess:
|
||||||
|
self.assertAllEqual(embedding_values,
|
||||||
|
global_vars[0].eval(session=sess))
|
||||||
|
self.assertAllEqual(expected_input_layer,
|
||||||
|
input_layer.eval(session=sess))
|
||||||
|
self.assertAllEqual(
|
||||||
|
expected_sequence_length, sequence_length.eval(session=sess))
|
||||||
|
|
||||||
|
@test_util.run_deprecated_v1
|
||||||
|
def test_shared_embedding_column_with_non_sequence_categorical(self):
|
||||||
|
"""Tests that error is raised for non-sequence shared embedding column."""
|
||||||
|
vocabulary_size = 3
|
||||||
|
sparse_input_a = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(2, 0, 1),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
sparse_input_b = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(2, 0, 1),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
|
||||||
|
categorical_column_a = fc.categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
categorical_column_b = fc.categorical_column_with_identity(
|
||||||
|
key='bbb', num_buckets=vocabulary_size)
|
||||||
|
shared_embedding_columns = fc.shared_embedding_columns_v2(
|
||||||
|
[categorical_column_a, categorical_column_b], dimension=2)
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns)
|
||||||
|
with self.assertRaisesRegexp(
|
||||||
|
ValueError,
|
||||||
|
r'In embedding_column: aaa_shared_embedding\. categorical_column must '
|
||||||
|
r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
||||||
|
_, _ = sequence_input_layer({'aaa': sparse_input_a,
|
||||||
|
'bbb': sparse_input_b})
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args_a': {
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
'indices': ((0, 0), (1, 0), (1, 1)),
|
||||||
|
'values': (2, 0, 1),
|
||||||
|
'dense_shape': (2, 2)},
|
||||||
|
'sparse_input_args_b': {
|
||||||
|
# example 0, ids [1]
|
||||||
|
# example 1, ids [1, 0]
|
||||||
|
'indices': ((0, 0), (1, 0), (1, 1)),
|
||||||
|
'values': (1, 1, 0),
|
||||||
|
'dense_shape': (2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# example 0, ids_a [2], ids_b [1]
|
||||||
|
[[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]],
|
||||||
|
# example 1, ids_a [0, 1], ids_b [1, 0]
|
||||||
|
[[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
|
||||||
|
'expected_sequence_length': [1, 2]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args_a': {
|
||||||
|
# feature 0, ids [[2], [0, 1]]
|
||||||
|
# feature 1, ids [[0, 0], [1]]
|
||||||
|
'indices': (
|
||||||
|
(0, 0, 0), (0, 1, 0), (0, 1, 1),
|
||||||
|
(1, 0, 0), (1, 0, 1), (1, 1, 0)),
|
||||||
|
'values': (2, 0, 1, 0, 0, 1),
|
||||||
|
'dense_shape': (2, 2, 2)},
|
||||||
|
'sparse_input_args_b': {
|
||||||
|
# feature 0, ids [[1, 1], [1]]
|
||||||
|
# feature 1, ids [[1], [0]]
|
||||||
|
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
||||||
|
'values': (1, 1, 1, 1, 0),
|
||||||
|
'dense_shape': (2, 2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -]
|
||||||
|
[[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]],
|
||||||
|
# feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -]
|
||||||
|
[[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]],
|
||||||
|
'expected_sequence_length': [2, 2]},
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_indicator_column(
|
||||||
|
self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
|
||||||
|
expected_sequence_length):
|
||||||
|
sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
|
||||||
|
sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
|
||||||
|
|
||||||
|
vocabulary_size_a = 3
|
||||||
|
vocabulary_size_b = 2
|
||||||
|
|
||||||
|
categorical_column_a = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size_a)
|
||||||
|
indicator_column_a = fc.indicator_column(categorical_column_a)
|
||||||
|
categorical_column_b = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='bbb', num_buckets=vocabulary_size_b)
|
||||||
|
indicator_column_b = fc.indicator_column(categorical_column_b)
|
||||||
|
# Test that columns are reordered alphabetically.
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(
|
||||||
|
[indicator_column_b, indicator_column_a])
|
||||||
|
input_layer, sequence_length = sequence_input_layer({
|
||||||
|
'aaa': sparse_input_a, 'bbb': sparse_input_b})
|
||||||
|
|
||||||
|
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
||||||
|
self.assertAllEqual(
|
||||||
|
expected_sequence_length, self.evaluate(sequence_length))
|
||||||
|
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_indicator_column_with_non_sequence_categorical(self):
|
||||||
|
"""Tests that error is raised for non-sequence categorical column."""
|
||||||
|
vocabulary_size = 3
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
indices=((0, 0), (1, 0), (1, 1)),
|
||||||
|
values=(2, 0, 1),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
|
||||||
|
categorical_column_a = fc.categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
indicator_column_a = fc.indicator_column(categorical_column_a)
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a])
|
||||||
|
with self.assertRaisesRegexp(
|
||||||
|
ValueError,
|
||||||
|
r'In indicator_column: aaa_indicator\. categorical_column must be of '
|
||||||
|
r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
|
||||||
|
_, _ = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, values [0., 1]
|
||||||
|
# example 1, [10.]
|
||||||
|
'indices': ((0, 0), (0, 1), (1, 0)),
|
||||||
|
'values': (0., 1., 10.),
|
||||||
|
'dense_shape': (2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
[[0.], [1.]],
|
||||||
|
[[10.], [0.]]],
|
||||||
|
'expected_sequence_length': [2, 1]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# feature 0, ids [[20, 3], [5]]
|
||||||
|
# feature 1, ids [[3], [8]]
|
||||||
|
'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)),
|
||||||
|
'values': (20., 3., 5., 3., 8.),
|
||||||
|
'dense_shape': (2, 2, 2)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
[[20.], [3.], [5.], [0.]],
|
||||||
|
[[3.], [0.], [8.], [0.]]],
|
||||||
|
'expected_sequence_length': [2, 2]},
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_numeric_column(
|
||||||
|
self, sparse_input_args, expected_input_layer, expected_sequence_length):
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
||||||
|
|
||||||
|
numeric_column = sfc.sequence_numeric_column('aaa')
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
|
||||||
|
input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
|
||||||
|
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
||||||
|
self.assertAllEqual(
|
||||||
|
expected_sequence_length, self.evaluate(sequence_length))
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, values [0., 1., 2., 3., 4., 5., 6., 7.]
|
||||||
|
# example 1, [10., 11., 12., 13.]
|
||||||
|
'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
|
||||||
|
(0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
|
||||||
|
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
||||||
|
'dense_shape': (2, 8)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# The output of numeric_column._get_dense_tensor should be flattened.
|
||||||
|
[[0., 1., 2., 3.], [4., 5., 6., 7.]],
|
||||||
|
[[10., 11., 12., 13.], [0., 0., 0., 0.]]],
|
||||||
|
'expected_sequence_length': [2, 1]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
|
||||||
|
# example 1, [[10., 11., 12., 13.], []]
|
||||||
|
'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
|
||||||
|
(0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
|
||||||
|
(1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
|
||||||
|
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
||||||
|
'dense_shape': (2, 2, 4)},
|
||||||
|
'expected_input_layer': [
|
||||||
|
# The output of numeric_column._get_dense_tensor should be flattened.
|
||||||
|
[[0., 1., 2., 3.], [4., 5., 6., 7.]],
|
||||||
|
[[10., 11., 12., 13.], [0., 0., 0., 0.]]],
|
||||||
|
'expected_sequence_length': [2, 1]},
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_numeric_column_multi_dim(
|
||||||
|
self, sparse_input_args, expected_input_layer, expected_sequence_length):
|
||||||
|
"""Tests SequenceFeatures for multi-dimensional numeric_column."""
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
||||||
|
|
||||||
|
numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
|
||||||
|
input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
|
||||||
|
self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
|
||||||
|
self.assertAllEqual(
|
||||||
|
expected_sequence_length, self.evaluate(sequence_length))
|
||||||
|
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_sequence_length_not_equal(self):
|
||||||
|
"""Tests that an error is raised when sequence lengths are not equal."""
|
||||||
|
# Input a with sequence_length = [2, 1]
|
||||||
|
sparse_input_a = sparse_tensor.SparseTensorValue(
|
||||||
|
indices=((0, 0), (0, 1), (1, 0)),
|
||||||
|
values=(0., 1., 10.),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
# Input b with sequence_length = [1, 1]
|
||||||
|
sparse_input_b = sparse_tensor.SparseTensorValue(
|
||||||
|
indices=((0, 0), (1, 0)),
|
||||||
|
values=(1., 10.),
|
||||||
|
dense_shape=(2, 2))
|
||||||
|
numeric_column_a = sfc.sequence_numeric_column('aaa')
|
||||||
|
numeric_column_b = sfc.sequence_numeric_column('bbb')
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures(
|
||||||
|
[numeric_column_a, numeric_column_b])
|
||||||
|
|
||||||
|
with self.assertRaisesRegexp(
|
||||||
|
errors.InvalidArgumentError, r'Condition x == y did not hold.*'):
|
||||||
|
_, sequence_length = sequence_input_layer({
|
||||||
|
'aaa': sparse_input_a,
|
||||||
|
'bbb': sparse_input_b
|
||||||
|
})
|
||||||
|
self.evaluate(sequence_length)
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]]
|
||||||
|
# example 1, [[[10., 11.], [12., 13.]]]
|
||||||
|
'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
|
||||||
|
(0, 7), (1, 0), (1, 1), (1, 2), (1, 3)),
|
||||||
|
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
||||||
|
'dense_shape': (2, 8)},
|
||||||
|
'expected_shape': [2, 2, 4]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]]
|
||||||
|
# example 1, [[10., 11., 12., 13.], []]
|
||||||
|
'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
|
||||||
|
(0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
|
||||||
|
(1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)),
|
||||||
|
'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.),
|
||||||
|
'dense_shape': (2, 2, 4)},
|
||||||
|
'expected_shape': [2, 2, 4]},
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_static_shape_from_tensors_numeric(
|
||||||
|
self, sparse_input_args, expected_shape):
|
||||||
|
"""Tests that we return a known static shape when we have one."""
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
||||||
|
numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
|
||||||
|
input_layer, _ = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
shape = input_layer.get_shape()
|
||||||
|
self.assertEqual(shape, expected_shape)
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{'testcase_name': '2D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, ids [2]
|
||||||
|
# example 1, ids [0, 1]
|
||||||
|
# example 2, ids []
|
||||||
|
# example 3, ids [1]
|
||||||
|
'indices': ((0, 0), (1, 0), (1, 1), (3, 0)),
|
||||||
|
'values': (2, 0, 1, 1),
|
||||||
|
'dense_shape': (4, 2)},
|
||||||
|
'expected_shape': [4, 2, 3]},
|
||||||
|
{'testcase_name': '3D',
|
||||||
|
'sparse_input_args': {
|
||||||
|
# example 0, ids [[2]]
|
||||||
|
# example 1, ids [[0, 1], [2]]
|
||||||
|
# example 2, ids []
|
||||||
|
# example 3, ids [[1], [0, 2]]
|
||||||
|
'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0),
|
||||||
|
(3, 0, 0), (3, 1, 0), (3, 1, 1)),
|
||||||
|
'values': (2, 0, 1, 2, 1, 0, 2),
|
||||||
|
'dense_shape': (4, 2, 2)},
|
||||||
|
'expected_shape': [4, 2, 3]}
|
||||||
|
)
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_static_shape_from_tensors_indicator(
|
||||||
|
self, sparse_input_args, expected_shape):
|
||||||
|
"""Tests that we return a known static shape when we have one."""
|
||||||
|
sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
|
||||||
|
categorical_column = sfc.sequence_categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=3)
|
||||||
|
indicator_column = fc.indicator_column(categorical_column)
|
||||||
|
|
||||||
|
sequence_input_layer = ksfc.SequenceFeatures([indicator_column])
|
||||||
|
input_layer, _ = sequence_input_layer({'aaa': sparse_input})
|
||||||
|
shape = input_layer.get_shape()
|
||||||
|
self.assertEqual(shape, expected_shape)
|
||||||
|
|
||||||
|
@test_util.run_in_graph_and_eager_modes
|
||||||
|
def test_compute_output_shape(self):
|
||||||
|
price1 = sfc.sequence_numeric_column('price1', shape=2)
|
||||||
|
price2 = sfc.sequence_numeric_column('price2')
|
||||||
|
features = {
|
||||||
|
'price1': sparse_tensor.SparseTensor(
|
||||||
|
indices=[[0, 0, 0], [0, 0, 1],
|
||||||
|
[0, 1, 0], [0, 1, 1],
|
||||||
|
[1, 0, 0], [1, 0, 1],
|
||||||
|
[2, 0, 0], [2, 0, 1],
|
||||||
|
[3, 0, 0], [3, 0, 1]],
|
||||||
|
values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
|
||||||
|
dense_shape=(4, 3, 2)),
|
||||||
|
'price2': sparse_tensor.SparseTensor(
|
||||||
|
indices=[[0, 0],
|
||||||
|
[0, 1],
|
||||||
|
[1, 0],
|
||||||
|
[2, 0],
|
||||||
|
[3, 0]],
|
||||||
|
values=[10., 11., 20., 30., 40.],
|
||||||
|
dense_shape=(4, 3))}
|
||||||
|
sequence_features = ksfc.SequenceFeatures([price1, price2])
|
||||||
|
seq_input, seq_len = sequence_features(features)
|
||||||
|
self.assertEqual(
|
||||||
|
sequence_features.compute_output_shape((None, None)),
|
||||||
|
(None, None, 3))
|
||||||
|
self.evaluate(variables_lib.global_variables_initializer())
|
||||||
|
self.evaluate(lookup_ops.tables_initializer())
|
||||||
|
|
||||||
|
self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
|
||||||
|
[[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
|
||||||
|
[[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
|
||||||
|
[[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
|
||||||
|
self.evaluate(seq_input))
|
||||||
|
self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
|
||||||
|
|
||||||
|
|
||||||
|
@test_util.run_all_in_graph_and_eager_modes
|
||||||
|
class SequenceFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
|
@parameterized.named_parameters(('default', None, None),
|
||||||
|
('trainable', True, 'trainable'),
|
||||||
|
('not_trainable', False, 'frozen'))
|
||||||
|
def test_get_config(self, trainable, name):
|
||||||
|
cols = [sfc.sequence_numeric_column('a')]
|
||||||
|
orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
|
||||||
|
config = orig_layer.get_config()
|
||||||
|
|
||||||
|
self.assertEqual(config['name'], orig_layer.name)
|
||||||
|
self.assertEqual(config['trainable'], trainable)
|
||||||
|
self.assertLen(config['feature_columns'], 1)
|
||||||
|
self.assertEqual(config['feature_columns'][0]['class_name'],
|
||||||
|
'SequenceNumericColumn')
|
||||||
|
self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
|
||||||
|
|
||||||
|
@parameterized.named_parameters(('default', None, None),
|
||||||
|
('trainable', True, 'trainable'),
|
||||||
|
('not_trainable', False, 'frozen'))
|
||||||
|
def test_from_config(self, trainable, name):
|
||||||
|
cols = [sfc.sequence_numeric_column('a')]
|
||||||
|
orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
|
||||||
|
config = orig_layer.get_config()
|
||||||
|
|
||||||
|
new_layer = ksfc.SequenceFeatures.from_config(config)
|
||||||
|
|
||||||
|
self.assertEqual(new_layer.name, orig_layer.name)
|
||||||
|
self.assertEqual(new_layer.trainable, trainable)
|
||||||
|
self.assertLen(new_layer._feature_columns, 1)
|
||||||
|
self.assertEqual(new_layer._feature_columns[0].name, 'a')
|
||||||
|
|
||||||
|
def test_serialization_sequence_features(self):
|
||||||
|
rating = sfc.sequence_numeric_column('rating')
|
||||||
|
sequence_feature = ksfc.SequenceFeatures([rating])
|
||||||
|
config = keras.layers.serialize(sequence_feature)
|
||||||
|
|
||||||
|
revived = keras.layers.deserialize(config)
|
||||||
|
self.assertIsInstance(revived, ksfc.SequenceFeatures)
|
||||||
|
|
||||||
|
|
||||||
|
class SequenceFeaturesSavingTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
|
@combinations.generate(combinations.combine(mode=['graph', 'eager']))
|
||||||
|
def test_saving_with_sequence_features(self):
|
||||||
|
cols = [
|
||||||
|
sfc.sequence_numeric_column('a'),
|
||||||
|
fc.indicator_column(
|
||||||
|
sfc.sequence_categorical_column_with_vocabulary_list(
|
||||||
|
'b', ['one', 'two']))
|
||||||
|
]
|
||||||
|
input_layers = {
|
||||||
|
'a':
|
||||||
|
keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
|
||||||
|
'b':
|
||||||
|
keras.layers.Input(
|
||||||
|
shape=(None, 1), sparse=True, name='b', dtype='string')
|
||||||
|
}
|
||||||
|
|
||||||
|
fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
|
||||||
|
# TODO(tibell): Figure out the right dtype and apply masking.
|
||||||
|
# sequence_length_mask = array_ops.sequence_mask(sequence_length)
|
||||||
|
# x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
|
||||||
|
x = keras.layers.GRU(32)(fc_layer)
|
||||||
|
output = keras.layers.Dense(10)(x)
|
||||||
|
|
||||||
|
model = keras.models.Model(input_layers, output)
|
||||||
|
|
||||||
|
model.compile(
|
||||||
|
loss=keras.losses.MSE,
|
||||||
|
optimizer='rmsprop',
|
||||||
|
metrics=[keras.metrics.categorical_accuracy])
|
||||||
|
|
||||||
|
config = model.to_json()
|
||||||
|
loaded_model = model_config.model_from_json(config)
|
||||||
|
|
||||||
|
batch_size = 10
|
||||||
|
timesteps = 1
|
||||||
|
|
||||||
|
values_a = np.arange(10, dtype=np.float32)
|
||||||
|
indices_a = np.zeros((10, 3), dtype=np.int64)
|
||||||
|
indices_a[:, 0] = np.arange(10)
|
||||||
|
inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
|
||||||
|
(batch_size, timesteps, 1))
|
||||||
|
|
||||||
|
values_b = np.zeros(10, dtype=np.str)
|
||||||
|
indices_b = np.zeros((10, 3), dtype=np.int64)
|
||||||
|
indices_b[:, 0] = np.arange(10)
|
||||||
|
inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
|
||||||
|
(batch_size, timesteps, 1))
|
||||||
|
|
||||||
|
with self.cached_session():
|
||||||
|
# Initialize tables for V1 lookup.
|
||||||
|
if not context.executing_eagerly():
|
||||||
|
self.evaluate(lookup_ops.tables_initializer())
|
||||||
|
|
||||||
|
self.assertLen(
|
||||||
|
loaded_model.predict({
|
||||||
|
'a': inputs_a,
|
||||||
|
'b': inputs_b
|
||||||
|
}, steps=1), batch_size)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test.main()
|
@ -122,11 +122,13 @@ def populate_deserializable_objects():
|
|||||||
from tensorflow.python.keras import models # pylint: disable=g-import-not-at-top
|
from tensorflow.python.keras import models # pylint: disable=g-import-not-at-top
|
||||||
from tensorflow.python.keras.premade.linear import LinearModel # pylint: disable=g-import-not-at-top
|
from tensorflow.python.keras.premade.linear import LinearModel # pylint: disable=g-import-not-at-top
|
||||||
from tensorflow.python.keras.premade.wide_deep import WideDeepModel # pylint: disable=g-import-not-at-top
|
from tensorflow.python.keras.premade.wide_deep import WideDeepModel # pylint: disable=g-import-not-at-top
|
||||||
|
from tensorflow.python.keras.feature_column.sequence_feature_column import SequenceFeatures # pylint: disable=g-import-not-at-top
|
||||||
|
|
||||||
LOCAL.ALL_OBJECTS['Input'] = input_layer.Input
|
LOCAL.ALL_OBJECTS['Input'] = input_layer.Input
|
||||||
LOCAL.ALL_OBJECTS['InputSpec'] = input_spec.InputSpec
|
LOCAL.ALL_OBJECTS['InputSpec'] = input_spec.InputSpec
|
||||||
LOCAL.ALL_OBJECTS['Network'] = models.Network
|
LOCAL.ALL_OBJECTS['Network'] = models.Network
|
||||||
LOCAL.ALL_OBJECTS['Model'] = models.Model
|
LOCAL.ALL_OBJECTS['Model'] = models.Model
|
||||||
|
LOCAL.ALL_OBJECTS['SequenceFeatures'] = SequenceFeatures
|
||||||
LOCAL.ALL_OBJECTS['Sequential'] = models.Sequential
|
LOCAL.ALL_OBJECTS['Sequential'] = models.Sequential
|
||||||
LOCAL.ALL_OBJECTS['LinearModel'] = LinearModel
|
LOCAL.ALL_OBJECTS['LinearModel'] = LinearModel
|
||||||
LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel
|
LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel
|
||||||
|
@ -165,5 +165,6 @@ class LayerSerializationTest(parameterized.TestCase, test.TestCase):
|
|||||||
self.assertIsInstance(new_layer, rnn_v1.GRU)
|
self.assertIsInstance(new_layer, rnn_v1.GRU)
|
||||||
self.assertNotIsInstance(new_layer, rnn_v2.GRU)
|
self.assertNotIsInstance(new_layer, rnn_v2.GRU)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
path: "tensorflow.keras.experimental.SequenceFeatures"
|
path: "tensorflow.keras.experimental.SequenceFeatures"
|
||||||
tf_class {
|
tf_class {
|
||||||
is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
|
is_instance: "<class \'tensorflow.python.keras.feature_column.sequence_feature_column.SequenceFeatures\'>"
|
||||||
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
|
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
|
||||||
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
||||||
is_instance: "<class \'tensorflow.python.module.module.Module\'>"
|
is_instance: "<class \'tensorflow.python.module.module.Module\'>"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
path: "tensorflow.keras.experimental.SequenceFeatures"
|
path: "tensorflow.keras.experimental.SequenceFeatures"
|
||||||
tf_class {
|
tf_class {
|
||||||
is_instance: "<class \'tensorflow.python.feature_column.sequence_feature_column.SequenceFeatures\'>"
|
is_instance: "<class \'tensorflow.python.keras.feature_column.sequence_feature_column.SequenceFeatures\'>"
|
||||||
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
|
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
|
||||||
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
|
||||||
is_instance: "<class \'tensorflow.python.module.module.Module\'>"
|
is_instance: "<class \'tensorflow.python.module.module.Module\'>"
|
||||||
|
Loading…
Reference in New Issue
Block a user