Embedding feature column performance optimization.
PiperOrigin-RevId: 292193767 Change-Id: I92006247b40fa0025bab6f35ac74e44ef43c2397
This commit is contained in:
parent
7db3d7abe3
commit
f1e95d1ba1
@ -821,7 +821,8 @@ def _embedding_column(categorical_column,
|
||||
ckpt_to_load_from=None,
|
||||
tensor_name_in_ckpt=None,
|
||||
max_norm=None,
|
||||
trainable=True):
|
||||
trainable=True,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""`_DenseColumn` that converts from sparse, categorical input.
|
||||
|
||||
Use this when your inputs are sparse, but you want to convert them to a dense
|
||||
@ -882,6 +883,13 @@ def _embedding_column(categorical_column,
|
||||
not `None`.
|
||||
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
||||
trainable: Whether or not the embedding is trainable. Default is True.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
`_DenseColumn` that converts from sparse input.
|
||||
@ -926,7 +934,8 @@ def _embedding_column(categorical_column,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable)
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
|
||||
def _numeric_column(key,
|
||||
@ -2444,9 +2453,32 @@ class _EmbeddingColumn(
|
||||
collections.namedtuple(
|
||||
'_EmbeddingColumn',
|
||||
('categorical_column', 'dimension', 'combiner', 'layer_creator',
|
||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||
'use_safe_embedding_lookup'))):
|
||||
"""See `embedding_column`."""
|
||||
|
||||
def __new__(cls,
|
||||
categorical_column,
|
||||
dimension,
|
||||
combiner,
|
||||
layer_creator,
|
||||
ckpt_to_load_from,
|
||||
tensor_name_in_ckpt,
|
||||
max_norm,
|
||||
trainable,
|
||||
use_safe_embedding_lookup=True):
|
||||
return super(_EmbeddingColumn, cls).__new__(
|
||||
cls,
|
||||
categorical_column=categorical_column,
|
||||
dimension=dimension,
|
||||
combiner=combiner,
|
||||
layer_creator=layer_creator,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if not hasattr(self, '_name'):
|
||||
@ -2489,11 +2521,17 @@ class _EmbeddingColumn(
|
||||
self.tensor_name_in_ckpt: to_restore
|
||||
})
|
||||
|
||||
sparse_id_rank = tensor_shape.dimension_value(
|
||||
sparse_ids.dense_shape.get_shape()[0])
|
||||
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||
sparse_id_rank <= 2):
|
||||
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||
# Return embedding lookup result.
|
||||
return embedding_ops.safe_embedding_lookup_sparse(
|
||||
embedding_weights=embedding_weights,
|
||||
sparse_ids=sparse_ids,
|
||||
sparse_weights=sparse_weights,
|
||||
return embedding_lookup_sparse(
|
||||
embedding_weights,
|
||||
sparse_ids,
|
||||
sparse_weights,
|
||||
combiner=self.combiner,
|
||||
name='%s_weights' % self.name,
|
||||
max_norm=self.max_norm)
|
||||
@ -2551,7 +2589,8 @@ class _SharedEmbeddingColumn(
|
||||
'_SharedEmbeddingColumn',
|
||||
('categorical_column', 'dimension', 'combiner', 'initializer',
|
||||
'shared_embedding_collection_name', 'ckpt_to_load_from',
|
||||
'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
||||
'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||
'use_safe_embedding_lookup'))):
|
||||
"""See `embedding_column`."""
|
||||
|
||||
@property
|
||||
@ -2632,11 +2671,17 @@ class _SharedEmbeddingColumn(
|
||||
self.tensor_name_in_ckpt: to_restore
|
||||
})
|
||||
|
||||
sparse_id_rank = tensor_shape.dimension_value(
|
||||
sparse_ids.dense_shape.get_shape()[0])
|
||||
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||
sparse_id_rank <= 2):
|
||||
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||
# Return embedding lookup result.
|
||||
return embedding_ops.safe_embedding_lookup_sparse(
|
||||
embedding_weights=embedding_weights,
|
||||
sparse_ids=sparse_ids,
|
||||
sparse_weights=sparse_weights,
|
||||
return embedding_lookup_sparse(
|
||||
embedding_weights,
|
||||
sparse_ids,
|
||||
sparse_weights,
|
||||
combiner=self.combiner,
|
||||
name='%s_weights' % self.name,
|
||||
max_norm=self.max_norm)
|
||||
|
@ -850,7 +850,8 @@ def embedding_column(categorical_column,
|
||||
ckpt_to_load_from=None,
|
||||
tensor_name_in_ckpt=None,
|
||||
max_norm=None,
|
||||
trainable=True):
|
||||
trainable=True,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""`DenseColumn` that converts from sparse, categorical input.
|
||||
|
||||
Use this when your inputs are sparse, but you want to convert them to a dense
|
||||
@ -911,6 +912,13 @@ def embedding_column(categorical_column,
|
||||
`None`.
|
||||
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
||||
trainable: Whether or not the embedding is trainable. Default is True.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
`DenseColumn` that converts from sparse input.
|
||||
@ -944,7 +952,8 @@ def embedding_column(categorical_column,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable)
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
|
||||
@tf_export(v1=['feature_column.shared_embedding_columns'])
|
||||
@ -956,7 +965,8 @@ def shared_embedding_columns(categorical_columns,
|
||||
ckpt_to_load_from=None,
|
||||
tensor_name_in_ckpt=None,
|
||||
max_norm=None,
|
||||
trainable=True):
|
||||
trainable=True,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""List of dense columns that convert from sparse, categorical input.
|
||||
|
||||
This is similar to `embedding_column`, except that it produces a list of
|
||||
@ -1039,6 +1049,13 @@ def shared_embedding_columns(categorical_columns,
|
||||
max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
|
||||
than this value, before combining.
|
||||
trainable: Whether or not the embedding is trainable. Default is True.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A list of dense columns that converts from sparse input. The order of
|
||||
@ -1117,7 +1134,8 @@ def shared_embedding_columns(categorical_columns,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable))
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup))
|
||||
|
||||
return result
|
||||
|
||||
@ -1131,7 +1149,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
ckpt_to_load_from=None,
|
||||
tensor_name_in_ckpt=None,
|
||||
max_norm=None,
|
||||
trainable=True):
|
||||
trainable=True,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""List of dense columns that convert from sparse, categorical input.
|
||||
|
||||
This is similar to `embedding_column`, except that it produces a list of
|
||||
@ -1213,6 +1232,13 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
max_norm: If not `None`, each embedding is clipped if its l2-norm is
|
||||
larger than this value, before combining.
|
||||
trainable: Whether or not the embedding is trainable. Default is True.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A list of dense columns that converts from sparse input. The order of
|
||||
@ -1277,7 +1303,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
|
||||
column_creator = SharedEmbeddingColumnCreator(
|
||||
dimension, initializer, ckpt_to_load_from, tensor_name_in_ckpt,
|
||||
num_buckets, trainable, shared_embedding_collection_name)
|
||||
num_buckets, trainable, shared_embedding_collection_name,
|
||||
use_safe_embedding_lookup)
|
||||
|
||||
result = []
|
||||
for column in categorical_columns:
|
||||
@ -3082,9 +3109,32 @@ class EmbeddingColumn(
|
||||
collections.namedtuple(
|
||||
'EmbeddingColumn',
|
||||
('categorical_column', 'dimension', 'combiner', 'initializer',
|
||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||
'use_safe_embedding_lookup'))):
|
||||
"""See `embedding_column`."""
|
||||
|
||||
def __new__(cls,
|
||||
categorical_column,
|
||||
dimension,
|
||||
combiner,
|
||||
initializer,
|
||||
ckpt_to_load_from,
|
||||
tensor_name_in_ckpt,
|
||||
max_norm,
|
||||
trainable,
|
||||
use_safe_embedding_lookup=True):
|
||||
return super(EmbeddingColumn, cls).__new__(
|
||||
cls,
|
||||
categorical_column=categorical_column,
|
||||
dimension=dimension,
|
||||
combiner=combiner,
|
||||
initializer=initializer,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
@property
|
||||
def _is_v2_column(self):
|
||||
return (isinstance(self.categorical_column, FeatureColumn) and
|
||||
@ -3156,11 +3206,17 @@ class EmbeddingColumn(
|
||||
self.tensor_name_in_ckpt: to_restore
|
||||
})
|
||||
|
||||
sparse_id_rank = tensor_shape.dimension_value(
|
||||
sparse_ids.dense_shape.get_shape()[0])
|
||||
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||
sparse_id_rank <= 2):
|
||||
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||
# Return embedding lookup result.
|
||||
return embedding_ops.safe_embedding_lookup_sparse(
|
||||
embedding_weights=embedding_weights,
|
||||
sparse_ids=sparse_ids,
|
||||
sparse_weights=sparse_weights,
|
||||
return embedding_lookup_sparse(
|
||||
embedding_weights,
|
||||
sparse_ids,
|
||||
sparse_weights,
|
||||
combiner=self.combiner,
|
||||
name='%s_weights' % self.name,
|
||||
max_norm=self.max_norm)
|
||||
@ -3301,6 +3357,8 @@ class EmbeddingColumn(
|
||||
@classmethod
|
||||
def from_config(cls, config, custom_objects=None, columns_by_name=None):
|
||||
"""See 'FeatureColumn` base class."""
|
||||
if 'use_safe_embedding_lookup' not in config:
|
||||
config['use_safe_embedding_lookup'] = True
|
||||
from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top
|
||||
_check_config_keys(config, cls._fields)
|
||||
kwargs = _standardize_and_copy_config(config)
|
||||
@ -3326,7 +3384,8 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
|
||||
tensor_name_in_ckpt,
|
||||
num_buckets,
|
||||
trainable,
|
||||
name='shared_embedding_column_creator'):
|
||||
name='shared_embedding_column_creator',
|
||||
use_safe_embedding_lookup=True):
|
||||
self._dimension = dimension
|
||||
self._initializer = initializer
|
||||
self._ckpt_to_load_from = ckpt_to_load_from
|
||||
@ -3334,11 +3393,13 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
|
||||
self._num_buckets = num_buckets
|
||||
self._trainable = trainable
|
||||
self._name = name
|
||||
self._use_safe_embedding_lookup = use_safe_embedding_lookup
|
||||
# Map from graph keys to embedding_weight variables.
|
||||
self._embedding_weights = {}
|
||||
|
||||
def __call__(self, categorical_column, combiner, max_norm):
|
||||
return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm)
|
||||
return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm,
|
||||
self._use_safe_embedding_lookup)
|
||||
|
||||
@property
|
||||
def embedding_weights(self):
|
||||
@ -3374,9 +3435,23 @@ class SharedEmbeddingColumn(
|
||||
collections.namedtuple(
|
||||
'SharedEmbeddingColumn',
|
||||
('categorical_column', 'shared_embedding_column_creator', 'combiner',
|
||||
'max_norm'))):
|
||||
'max_norm', 'use_safe_embedding_lookup'))):
|
||||
"""See `embedding_column`."""
|
||||
|
||||
def __new__(cls,
|
||||
categorical_column,
|
||||
shared_embedding_column_creator,
|
||||
combiner,
|
||||
max_norm,
|
||||
use_safe_embedding_lookup=True):
|
||||
return super(SharedEmbeddingColumn, cls).__new__(
|
||||
cls,
|
||||
categorical_column=categorical_column,
|
||||
shared_embedding_column_creator=shared_embedding_column_creator,
|
||||
combiner=combiner,
|
||||
max_norm=max_norm,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
@property
|
||||
def _is_v2_column(self):
|
||||
return True
|
||||
@ -3426,11 +3501,17 @@ class SharedEmbeddingColumn(
|
||||
|
||||
embedding_weights = self.shared_embedding_column_creator.embedding_weights
|
||||
|
||||
sparse_id_rank = tensor_shape.dimension_value(
|
||||
sparse_ids.dense_shape.get_shape()[0])
|
||||
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||
sparse_id_rank <= 2):
|
||||
embedding_lookup_sparse = (embedding_ops.embedding_lookup_sparse)
|
||||
# Return embedding lookup result.
|
||||
return embedding_ops.safe_embedding_lookup_sparse(
|
||||
embedding_weights=embedding_weights,
|
||||
sparse_ids=sparse_ids,
|
||||
sparse_weights=sparse_weights,
|
||||
return embedding_lookup_sparse(
|
||||
embedding_weights,
|
||||
sparse_ids,
|
||||
sparse_weights,
|
||||
combiner=self.combiner,
|
||||
name='%s_weights' % self.name,
|
||||
max_norm=self.max_norm)
|
||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||
import collections
|
||||
import copy
|
||||
|
||||
from absl.testing import parameterized
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.core.example import example_pb2
|
||||
@ -5704,7 +5705,7 @@ class _TestStateManager(fc.StateManager):
|
||||
raise ValueError('Could not find variable.')
|
||||
|
||||
|
||||
class EmbeddingColumnTest(test.TestCase):
|
||||
class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
def test_defaults(self):
|
||||
@ -6272,8 +6273,16 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)),
|
||||
self.evaluate(predictions))
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False
|
||||
})
|
||||
@test_util.run_deprecated_v1
|
||||
def test_dense_features(self):
|
||||
def test_dense_features(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
sparse_input = sparse_tensor.SparseTensorValue(
|
||||
@ -6317,7 +6326,8 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
embedding_column = fc.embedding_column(
|
||||
categorical_column,
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer)
|
||||
initializer=_initializer,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
l = df.DenseFeatures((embedding_column,))
|
||||
@ -6339,6 +6349,14 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
|
||||
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
|
||||
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertIn('SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
else:
|
||||
self.assertNotIn(
|
||||
'SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
def test_dense_features_not_trainable(self):
|
||||
# Inputs.
|
||||
@ -6646,31 +6664,33 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
self.assertEqual([categorical_column], embedding_column.parents)
|
||||
|
||||
config = embedding_column.get_config()
|
||||
self.assertEqual({
|
||||
'categorical_column': {
|
||||
'class_name': 'IdentityCategoricalColumn',
|
||||
'config': {
|
||||
'number_buckets': 3,
|
||||
'key': 'aaa',
|
||||
'default_value': None
|
||||
}
|
||||
},
|
||||
'ckpt_to_load_from': None,
|
||||
'combiner': 'mean',
|
||||
'dimension': 2,
|
||||
'initializer': {
|
||||
'class_name': 'TruncatedNormal',
|
||||
'config': {
|
||||
'dtype': 'float32',
|
||||
'stddev': 0.7071067811865475,
|
||||
'seed': None,
|
||||
'mean': 0.0
|
||||
}
|
||||
},
|
||||
'max_norm': None,
|
||||
'tensor_name_in_ckpt': None,
|
||||
'trainable': True
|
||||
}, config)
|
||||
self.assertEqual(
|
||||
{
|
||||
'categorical_column': {
|
||||
'class_name': 'IdentityCategoricalColumn',
|
||||
'config': {
|
||||
'number_buckets': 3,
|
||||
'key': 'aaa',
|
||||
'default_value': None
|
||||
}
|
||||
},
|
||||
'ckpt_to_load_from': None,
|
||||
'combiner': 'mean',
|
||||
'dimension': 2,
|
||||
'initializer': {
|
||||
'class_name': 'TruncatedNormal',
|
||||
'config': {
|
||||
'dtype': 'float32',
|
||||
'stddev': 0.7071067811865475,
|
||||
'seed': None,
|
||||
'mean': 0.0
|
||||
}
|
||||
},
|
||||
'max_norm': None,
|
||||
'tensor_name_in_ckpt': None,
|
||||
'trainable': True,
|
||||
'use_safe_embedding_lookup': True
|
||||
}, config)
|
||||
|
||||
custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal}
|
||||
new_embedding_column = fc.EmbeddingColumn.from_config(
|
||||
@ -6707,28 +6727,33 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
self.assertEqual([categorical_column], embedding_column.parents)
|
||||
|
||||
config = embedding_column.get_config()
|
||||
self.assertEqual({
|
||||
'categorical_column': {
|
||||
'class_name': 'IdentityCategoricalColumn',
|
||||
'config': {
|
||||
'number_buckets': 3,
|
||||
'key': 'aaa',
|
||||
'default_value': None
|
||||
}
|
||||
},
|
||||
'ckpt_to_load_from': None,
|
||||
'combiner': 'mean',
|
||||
'dimension': 2,
|
||||
'initializer': '_initializer',
|
||||
'max_norm': None,
|
||||
'tensor_name_in_ckpt': None,
|
||||
'trainable': True
|
||||
}, config)
|
||||
self.assertEqual(
|
||||
{
|
||||
'categorical_column': {
|
||||
'class_name': 'IdentityCategoricalColumn',
|
||||
'config': {
|
||||
'number_buckets': 3,
|
||||
'key': 'aaa',
|
||||
'default_value': None
|
||||
}
|
||||
},
|
||||
'ckpt_to_load_from': None,
|
||||
'combiner': 'mean',
|
||||
'dimension': 2,
|
||||
'initializer': '_initializer',
|
||||
'max_norm': None,
|
||||
'tensor_name_in_ckpt': None,
|
||||
'trainable': True,
|
||||
'use_safe_embedding_lookup': True
|
||||
}, config)
|
||||
|
||||
custom_objects = {
|
||||
'_initializer': _initializer,
|
||||
}
|
||||
|
||||
# use_safe_embedding_lookup might not be populated for legacy reasons.
|
||||
del config['use_safe_embedding_lookup']
|
||||
|
||||
new_embedding_column = fc.EmbeddingColumn.from_config(
|
||||
config, custom_objects=custom_objects)
|
||||
self.assertEqual(embedding_column, new_embedding_column)
|
||||
@ -6746,7 +6771,7 @@ class EmbeddingColumnTest(test.TestCase):
|
||||
self.assertIs(categorical_column, new_embedding_column.categorical_column)
|
||||
|
||||
|
||||
class SharedEmbeddingColumnTest(test.TestCase):
|
||||
class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
@test_util.run_deprecated_v1
|
||||
def test_defaults(self):
|
||||
@ -6952,8 +6977,16 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
||||
_assert_sparse_tensor_value(self, self.evaluate(output_b),
|
||||
self.evaluate(output_b_embedded))
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False
|
||||
})
|
||||
@test_util.run_deprecated_v1
|
||||
def test_get_dense_tensor(self):
|
||||
def test_get_dense_tensor(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
# -1 values are ignored.
|
||||
@ -6988,12 +7021,18 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
||||
# example 1:
|
||||
(2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
|
||||
)
|
||||
expected_lookups_b = (
|
||||
# example 0:
|
||||
(1., 2.), # ids [0], embedding = [1, 2]
|
||||
# example 1:
|
||||
(0., 0.), # ids [], embedding = [0, 0]
|
||||
)
|
||||
if use_safe_embedding_lookup:
|
||||
expected_lookups_b = (
|
||||
# example 0:
|
||||
(1., 2.), # ids [0], embedding = [1, 2]
|
||||
# example 1:
|
||||
(0., 0.), # ids [], embedding = [0, 0]
|
||||
)
|
||||
else:
|
||||
expected_lookups_b = (
|
||||
# example 0:
|
||||
(1., 2.), # ids [0], embedding = [1, 2]
|
||||
)
|
||||
|
||||
# Build columns.
|
||||
categorical_column_a = fc.categorical_column_with_identity(
|
||||
@ -7003,7 +7042,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
||||
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||
[categorical_column_a, categorical_column_b],
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer)
|
||||
initializer=_initializer,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||
@ -7024,8 +7064,112 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
||||
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
|
||||
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
|
||||
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertIn('SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
else:
|
||||
self.assertNotIn(
|
||||
'SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False
|
||||
})
|
||||
@test_util.run_deprecated_v1
|
||||
def test_get_dense_tensor_placeholder_inputs(self):
|
||||
def test_get_dense_tensor_valid(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
# -1 values are ignored.
|
||||
input_a = np.array([
|
||||
[2, 1], # example 0, ids [2, 1]
|
||||
[0, -1]
|
||||
]) # example 1, ids [0]
|
||||
input_b = np.array([
|
||||
[1, -1], # example 0, ids [1]
|
||||
[1, 2]
|
||||
]) # example 1, ids [1, 2]
|
||||
input_features = {'aaa': input_a, 'bbb': input_b}
|
||||
|
||||
# Embedding variable.
|
||||
embedding_dimension = 2
|
||||
embedding_values = (
|
||||
(1., 2.), # id 0
|
||||
(3., 5.), # id 1
|
||||
(7., 11.) # id 2
|
||||
)
|
||||
|
||||
def _initializer(shape, dtype, partition_info=None):
|
||||
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
||||
self.assertEqual(dtypes.float32, dtype)
|
||||
self.assertIsNone(partition_info)
|
||||
return embedding_values
|
||||
|
||||
# Expected lookup result, using combiner='mean'.
|
||||
expected_lookups_a = (
|
||||
# example 0:
|
||||
(5., 8.), # ids [2, 1], embedding = mean([3, 5] + [7, 11]) = [5, 8]
|
||||
# example 1:
|
||||
(1., 2), # ids [0], embedding = [1, 2]
|
||||
)
|
||||
expected_lookups_b = (
|
||||
# example 0:
|
||||
(3., 5.), # ids [1], embedding = [3, 5]
|
||||
# example 1:
|
||||
(5., 8.), # ids [1, 2], embedding = mean([3, 5] + [7, 11]) = [5, 8]
|
||||
)
|
||||
|
||||
# Build columns.
|
||||
categorical_column_a = fc.categorical_column_with_identity(
|
||||
key='aaa', num_buckets=vocabulary_size)
|
||||
categorical_column_b = fc.categorical_column_with_identity(
|
||||
key='bbb', num_buckets=vocabulary_size)
|
||||
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||
[categorical_column_a, categorical_column_b],
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||
fc.FeatureTransformationCache(input_features), None)
|
||||
embedding_lookup_b = embedding_column_b.get_dense_tensor(
|
||||
fc.FeatureTransformationCache(input_features), None)
|
||||
|
||||
# Assert expected embedding variable and lookups.
|
||||
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
|
||||
self.assertCountEqual(('aaa_bbb_shared_embedding:0',),
|
||||
tuple([v.name for v in global_vars]))
|
||||
embedding_var = global_vars[0]
|
||||
|
||||
self.evaluate(variables_lib.global_variables_initializer())
|
||||
self.evaluate(lookup_ops.tables_initializer())
|
||||
|
||||
self.assertAllEqual(embedding_values, self.evaluate(embedding_var))
|
||||
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
|
||||
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertIn('SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
else:
|
||||
self.assertNotIn(
|
||||
'SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False
|
||||
})
|
||||
@test_util.run_deprecated_v1
|
||||
def test_get_dense_tensor_placeholder_inputs(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
# -1 values are ignored.
|
||||
@ -7073,13 +7217,21 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
||||
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||
[categorical_column_a, categorical_column_b],
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer)
|
||||
initializer=_initializer,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||
fc.FeatureTransformationCache(input_features), None)
|
||||
embedding_lookup_b = embedding_column_b.get_dense_tensor(
|
||||
fc.FeatureTransformationCache(input_features), None)
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertIn('SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
else:
|
||||
self.assertNotIn(
|
||||
'SparseFillEmptyRows',
|
||||
[x.type for x in ops.get_default_graph().get_operations()])
|
||||
|
||||
with _initialized_session() as sess:
|
||||
sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
|
||||
|
@ -57,7 +57,8 @@ def embedding_column(categorical_column,
|
||||
combiner='mean',
|
||||
initializer=None,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""TPU embedding_column for `tf.feature_column.embedding_column`.
|
||||
|
||||
Note that the interface for TPU embedding_column is different from the non-TPU
|
||||
@ -86,6 +87,13 @@ def embedding_column(categorical_column,
|
||||
sequence features and 0 for non-sequence features.
|
||||
learning_rate_fn: A function that takes global step and returns learning
|
||||
rate for the embedding table.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A _TPUEmbeddingColumn.
|
||||
@ -137,7 +145,8 @@ def embedding_column(categorical_column,
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn)
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
# For Embedding column, the initializer is hidden inside the creator Fn, which
|
||||
# is not accessiable later. So, we attach it to a speicial field. Also note
|
||||
# that non-TPU Embedding column and non-TPU shared Embedding column handle the
|
||||
@ -152,7 +161,8 @@ def shared_embedding_columns(categorical_columns,
|
||||
initializer=None,
|
||||
shared_embedding_collection_name=None,
|
||||
max_sequence_lengths=None,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""List of dense columns that convert from sparse, categorical input.
|
||||
|
||||
Note that the interface for TPU embedding_column is different from the non-TPU
|
||||
@ -187,6 +197,13 @@ def shared_embedding_columns(categorical_columns,
|
||||
sequence longer will be truncated.
|
||||
learning_rate_fn: A function that takes global step and returns learning
|
||||
rate for the embedding table.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A _TPUEmbeddingColumn.
|
||||
@ -261,7 +278,8 @@ def shared_embedding_columns(categorical_columns,
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn)
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
tpu_columns.append(column)
|
||||
|
||||
return tpu_columns
|
||||
@ -347,7 +365,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
# Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
|
||||
# are not supported on TPU. They are solely for matching the signature of
|
||||
# __new__ of parent class fc._EmbeddingColumn.
|
||||
@ -360,7 +379,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable)
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
def __init__(self,
|
||||
categorical_column,
|
||||
@ -372,7 +392,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
_TPUBaseEmbeddingColumn.__init__(
|
||||
self,
|
||||
categorical_column,
|
||||
@ -479,7 +500,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
return fc._SharedEmbeddingColumn.__new__(
|
||||
cls,
|
||||
categorical_column,
|
||||
@ -490,7 +512,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
||||
ckpt_to_load_from=ckpt_to_load_from,
|
||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||
max_norm=max_norm,
|
||||
trainable=trainable)
|
||||
trainable=trainable,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
def __init__(self,
|
||||
categorical_column,
|
||||
@ -503,7 +526,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
||||
max_norm=None,
|
||||
trainable=True,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
|
||||
_TPUBaseEmbeddingColumn.__init__(
|
||||
self,
|
||||
|
@ -56,7 +56,8 @@ def embedding_column_v2(categorical_column,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None,
|
||||
embedding_lookup_device=None,
|
||||
tensor_core_shape=None):
|
||||
tensor_core_shape=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""TPU version of `tf.compat.v1.feature_column.embedding_column`.
|
||||
|
||||
Note that the interface for `tf.tpu.experimental.embedding_column` is
|
||||
@ -122,6 +123,13 @@ def embedding_column_v2(categorical_column,
|
||||
the intended dense shape to run embedding lookup for this feature on
|
||||
TensorCore. The batch dimension can be left None or -1 to indicate
|
||||
a dynamic shape. Only rank 2 shapes currently supported.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A `_TPUEmbeddingColumnV2`.
|
||||
@ -175,7 +183,8 @@ def embedding_column_v2(categorical_column,
|
||||
combiner=combiner,
|
||||
initializer=initializer,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn)
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
else:
|
||||
return _TPUDeviceSpecificEmbeddingColumnV2(
|
||||
categorical_column=categorical_column,
|
||||
@ -185,7 +194,8 @@ def embedding_column_v2(categorical_column,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
embedding_lookup_device=embedding_lookup_device,
|
||||
tensor_core_shape=tensor_core_shape)
|
||||
tensor_core_shape=tensor_core_shape,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
|
||||
@tf_export(v1=['tpu.experimental.shared_embedding_columns'])
|
||||
@ -197,7 +207,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
max_sequence_lengths=None,
|
||||
learning_rate_fn=None,
|
||||
embedding_lookup_device=None,
|
||||
tensor_core_shape=None):
|
||||
tensor_core_shape=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
"""TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`.
|
||||
|
||||
Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is
|
||||
@ -271,6 +282,13 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
intended dense shape to run embedding lookup for this feature on
|
||||
TensorCore. The batch dimension can be left None or -1 to indicate a
|
||||
dynamic shape. Only rank 2 shapes currently supported.
|
||||
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||
there are no empty rows and all weights and ids are positive at the
|
||||
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||
input tensors. Defaults to true, consider turning off if the above checks
|
||||
are not needed. Note that having empty rows will not trigger any error
|
||||
though the output result might be 0 or omitted.
|
||||
|
||||
Returns:
|
||||
A list of `_TPUSharedEmbeddingColumnV2`.
|
||||
@ -364,7 +382,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
initializer=initializer,
|
||||
shared_embedding_collection_name=shared_embedding_collection_name,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn)
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
else:
|
||||
column = _TPUSharedDeviceSpecificEmbeddingColumnV2(
|
||||
categorical_column=categorical_column,
|
||||
@ -375,7 +394,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
||||
max_sequence_length=max_sequence_length,
|
||||
learning_rate_fn=learning_rate_fn,
|
||||
embedding_lookup_device=embedding_lookup_device,
|
||||
tensor_core_shape=tensor_core_shape)
|
||||
tensor_core_shape=tensor_core_shape,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
tpu_columns.append(column)
|
||||
|
||||
return tpu_columns
|
||||
@ -390,7 +410,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
||||
combiner='mean',
|
||||
initializer=None,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
return fc_lib.EmbeddingColumn.__new__(
|
||||
cls,
|
||||
categorical_column,
|
||||
@ -400,7 +421,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
||||
ckpt_to_load_from=None,
|
||||
tensor_name_in_ckpt=None,
|
||||
max_norm=None,
|
||||
trainable=True)
|
||||
trainable=True,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
def __getnewargs__(self):
|
||||
return (self._tpu_categorical_column, self.dimension, self.combiner,
|
||||
@ -416,7 +438,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
||||
combiner='mean',
|
||||
initializer=None,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
_TPUBaseEmbeddingColumn.__init__(
|
||||
self,
|
||||
categorical_column,
|
||||
@ -573,13 +596,15 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
|
||||
initializer=None,
|
||||
shared_embedding_collection_name=None,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
return fc_lib.SharedEmbeddingColumn.__new__(
|
||||
cls,
|
||||
categorical_column,
|
||||
combiner=combiner,
|
||||
shared_embedding_column_creator=shared_embedding_column_creator,
|
||||
max_norm=None)
|
||||
max_norm=None,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
def __getnewargs__(self):
|
||||
return (self._tpu_categorical_column, self.shared_embedding_column_creator,
|
||||
@ -598,7 +623,8 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
|
||||
initializer=None,
|
||||
shared_embedding_collection_name=None,
|
||||
max_sequence_length=0,
|
||||
learning_rate_fn=None):
|
||||
learning_rate_fn=None,
|
||||
use_safe_embedding_lookup=True):
|
||||
|
||||
_TPUBaseEmbeddingColumn.__init__(
|
||||
self,
|
||||
|
@ -43,7 +43,7 @@ def _initialized_session():
|
||||
return sess
|
||||
|
||||
|
||||
class EmbeddingColumnTestV2(test.TestCase):
|
||||
class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
|
||||
|
||||
def test_defaults(self):
|
||||
categorical_column = fc_lib.categorical_column_with_identity(
|
||||
@ -77,8 +77,16 @@ class EmbeddingColumnTestV2(test.TestCase):
|
||||
'aaa': parsing_ops.VarLenFeature(dtypes.int64)
|
||||
}, embedding_column._parse_example_spec)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True,
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False,
|
||||
})
|
||||
@test_util.deprecated_graph_mode_only
|
||||
def test_feature_layer_cpu(self):
|
||||
def test_feature_layer_cpu(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
sparse_input = sparse_tensor.SparseTensorValue(
|
||||
@ -135,12 +143,14 @@ class EmbeddingColumnTestV2(test.TestCase):
|
||||
embedding_column = tpu_fc.embedding_column_v2(
|
||||
categorical_column,
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer)
|
||||
initializer=_initializer,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
sequence_embedding_column = tpu_fc.embedding_column_v2(
|
||||
sequence_categorical_column,
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer,
|
||||
max_sequence_length=2)
|
||||
max_sequence_length=2,
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
features = {'aaa': sparse_input, 'bbb': sparse_input}
|
||||
@ -160,6 +170,16 @@ class EmbeddingColumnTestV2(test.TestCase):
|
||||
self.assertAllEqual(expected_lookups, embedding_lookup.eval())
|
||||
self.assertAllEqual(expected_lookups_sequence,
|
||||
sequence_embedding_lookup[0].eval())
|
||||
# The graph will still have SparseFillEmptyRows due to sequence being
|
||||
# a Rank3 embedding lookup.
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertEqual(2, [
|
||||
x.type for x in ops.get_default_graph().get_operations()
|
||||
].count('SparseFillEmptyRows'))
|
||||
else:
|
||||
self.assertEqual(1, [
|
||||
x.type for x in ops.get_default_graph().get_operations()
|
||||
].count('SparseFillEmptyRows'))
|
||||
|
||||
def test_deepcopy(self):
|
||||
categorical_column = fc_lib.categorical_column_with_identity(
|
||||
@ -173,7 +193,7 @@ class EmbeddingColumnTestV2(test.TestCase):
|
||||
embedding_column_copy._max_sequence_length)
|
||||
|
||||
|
||||
class SharedEmbeddingColumnTestV2(test.TestCase):
|
||||
class SharedEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
|
||||
|
||||
@test_util.deprecated_graph_mode_only
|
||||
def test_defaults(self):
|
||||
@ -238,8 +258,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
||||
self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape)
|
||||
self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
{
|
||||
'testcase_name': 'use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': True
|
||||
}, {
|
||||
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||
'use_safe_embedding_lookup': False
|
||||
})
|
||||
@test_util.deprecated_graph_mode_only
|
||||
def test_feature_layer_cpu(self):
|
||||
def test_feature_layer_cpu(self, use_safe_embedding_lookup):
|
||||
# Inputs.
|
||||
vocabulary_size = 3
|
||||
input_a = sparse_tensor.SparseTensorValue(
|
||||
@ -296,7 +324,8 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
||||
[categorical_column_a, categorical_column_b],
|
||||
dimension=embedding_dimension,
|
||||
initializer=_initializer,
|
||||
max_sequence_lengths=[0, 2])
|
||||
max_sequence_lengths=[0, 2],
|
||||
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||
|
||||
# Provide sparse input and get dense result.
|
||||
dense_features = fc_lib.DenseFeatures([embedding_column_a])
|
||||
@ -315,6 +344,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
||||
self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
|
||||
self.assertAllEqual(expected_lookups_b,
|
||||
embedding_lookup_b[0].eval())
|
||||
# The graph will still have SparseFillEmptyRows due to sequence being
|
||||
# a Rank3 embedding lookup.
|
||||
if use_safe_embedding_lookup:
|
||||
self.assertEqual(2, [
|
||||
x.type for x in ops.get_default_graph().get_operations()
|
||||
].count('SparseFillEmptyRows'))
|
||||
else:
|
||||
self.assertEqual(1, [
|
||||
x.type for x in ops.get_default_graph().get_operations()
|
||||
].count('SparseFillEmptyRows'))
|
||||
|
||||
def test_deepcopy(self):
|
||||
vocabulary_size = 3
|
||||
|
@ -26,7 +26,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "embedding_column"
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "indicator_column"
|
||||
@ -70,7 +70,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "shared_embedding_columns"
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "weighted_categorical_column"
|
||||
|
@ -22,7 +22,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "embedding_column"
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\'], "
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "initialize_tpu_system"
|
||||
@ -30,7 +30,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "shared_embedding_columns"
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "shutdown_tpu_system"
|
||||
|
@ -26,7 +26,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "embedding_column"
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "indicator_column"
|
||||
@ -62,7 +62,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "shared_embeddings"
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "weighted_categorical_column"
|
||||
|
Loading…
Reference in New Issue
Block a user