Embedding feature column performance optimization.

PiperOrigin-RevId: 292193767
Change-Id: I92006247b40fa0025bab6f35ac74e44ef43c2397
This commit is contained in:
A. Unique TensorFlower 2020-01-29 12:19:21 -08:00 committed by TensorFlower Gardener
parent 7db3d7abe3
commit f1e95d1ba1
9 changed files with 488 additions and 121 deletions

View File

@ -821,7 +821,8 @@ def _embedding_column(categorical_column,
ckpt_to_load_from=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, tensor_name_in_ckpt=None,
max_norm=None, max_norm=None,
trainable=True): trainable=True,
use_safe_embedding_lookup=True):
"""`_DenseColumn` that converts from sparse, categorical input. """`_DenseColumn` that converts from sparse, categorical input.
Use this when your inputs are sparse, but you want to convert them to a dense Use this when your inputs are sparse, but you want to convert them to a dense
@ -882,6 +883,13 @@ def _embedding_column(categorical_column,
not `None`. not `None`.
max_norm: If not `None`, embedding values are l2-normalized to this value. max_norm: If not `None`, embedding values are l2-normalized to this value.
trainable: Whether or not the embedding is trainable. Default is True. trainable: Whether or not the embedding is trainable. Default is True.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
`_DenseColumn` that converts from sparse input. `_DenseColumn` that converts from sparse input.
@ -926,7 +934,8 @@ def _embedding_column(categorical_column,
ckpt_to_load_from=ckpt_to_load_from, ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt, tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm, max_norm=max_norm,
trainable=trainable) trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
def _numeric_column(key, def _numeric_column(key,
@ -2444,9 +2453,32 @@ class _EmbeddingColumn(
collections.namedtuple( collections.namedtuple(
'_EmbeddingColumn', '_EmbeddingColumn',
('categorical_column', 'dimension', 'combiner', 'layer_creator', ('categorical_column', 'dimension', 'combiner', 'layer_creator',
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
'use_safe_embedding_lookup'))):
"""See `embedding_column`.""" """See `embedding_column`."""
def __new__(cls,
categorical_column,
dimension,
combiner,
layer_creator,
ckpt_to_load_from,
tensor_name_in_ckpt,
max_norm,
trainable,
use_safe_embedding_lookup=True):
return super(_EmbeddingColumn, cls).__new__(
cls,
categorical_column=categorical_column,
dimension=dimension,
combiner=combiner,
layer_creator=layer_creator,
ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm,
trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
@property @property
def name(self): def name(self):
if not hasattr(self, '_name'): if not hasattr(self, '_name'):
@ -2489,11 +2521,17 @@ class _EmbeddingColumn(
self.tensor_name_in_ckpt: to_restore self.tensor_name_in_ckpt: to_restore
}) })
sparse_id_rank = tensor_shape.dimension_value(
sparse_ids.dense_shape.get_shape()[0])
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
# Return embedding lookup result. # Return embedding lookup result.
return embedding_ops.safe_embedding_lookup_sparse( return embedding_lookup_sparse(
embedding_weights=embedding_weights, embedding_weights,
sparse_ids=sparse_ids, sparse_ids,
sparse_weights=sparse_weights, sparse_weights,
combiner=self.combiner, combiner=self.combiner,
name='%s_weights' % self.name, name='%s_weights' % self.name,
max_norm=self.max_norm) max_norm=self.max_norm)
@ -2551,7 +2589,8 @@ class _SharedEmbeddingColumn(
'_SharedEmbeddingColumn', '_SharedEmbeddingColumn',
('categorical_column', 'dimension', 'combiner', 'initializer', ('categorical_column', 'dimension', 'combiner', 'initializer',
'shared_embedding_collection_name', 'ckpt_to_load_from', 'shared_embedding_collection_name', 'ckpt_to_load_from',
'tensor_name_in_ckpt', 'max_norm', 'trainable'))): 'tensor_name_in_ckpt', 'max_norm', 'trainable',
'use_safe_embedding_lookup'))):
"""See `embedding_column`.""" """See `embedding_column`."""
@property @property
@ -2632,11 +2671,17 @@ class _SharedEmbeddingColumn(
self.tensor_name_in_ckpt: to_restore self.tensor_name_in_ckpt: to_restore
}) })
sparse_id_rank = tensor_shape.dimension_value(
sparse_ids.dense_shape.get_shape()[0])
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
# Return embedding lookup result. # Return embedding lookup result.
return embedding_ops.safe_embedding_lookup_sparse( return embedding_lookup_sparse(
embedding_weights=embedding_weights, embedding_weights,
sparse_ids=sparse_ids, sparse_ids,
sparse_weights=sparse_weights, sparse_weights,
combiner=self.combiner, combiner=self.combiner,
name='%s_weights' % self.name, name='%s_weights' % self.name,
max_norm=self.max_norm) max_norm=self.max_norm)

View File

@ -850,7 +850,8 @@ def embedding_column(categorical_column,
ckpt_to_load_from=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, tensor_name_in_ckpt=None,
max_norm=None, max_norm=None,
trainable=True): trainable=True,
use_safe_embedding_lookup=True):
"""`DenseColumn` that converts from sparse, categorical input. """`DenseColumn` that converts from sparse, categorical input.
Use this when your inputs are sparse, but you want to convert them to a dense Use this when your inputs are sparse, but you want to convert them to a dense
@ -911,6 +912,13 @@ def embedding_column(categorical_column,
`None`. `None`.
max_norm: If not `None`, embedding values are l2-normalized to this value. max_norm: If not `None`, embedding values are l2-normalized to this value.
trainable: Whether or not the embedding is trainable. Default is True. trainable: Whether or not the embedding is trainable. Default is True.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
`DenseColumn` that converts from sparse input. `DenseColumn` that converts from sparse input.
@ -944,7 +952,8 @@ def embedding_column(categorical_column,
ckpt_to_load_from=ckpt_to_load_from, ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt, tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm, max_norm=max_norm,
trainable=trainable) trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
@tf_export(v1=['feature_column.shared_embedding_columns']) @tf_export(v1=['feature_column.shared_embedding_columns'])
@ -956,7 +965,8 @@ def shared_embedding_columns(categorical_columns,
ckpt_to_load_from=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, tensor_name_in_ckpt=None,
max_norm=None, max_norm=None,
trainable=True): trainable=True,
use_safe_embedding_lookup=True):
"""List of dense columns that convert from sparse, categorical input. """List of dense columns that convert from sparse, categorical input.
This is similar to `embedding_column`, except that it produces a list of This is similar to `embedding_column`, except that it produces a list of
@ -1039,6 +1049,13 @@ def shared_embedding_columns(categorical_columns,
max_norm: If not `None`, each embedding is clipped if its l2-norm is larger max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
than this value, before combining. than this value, before combining.
trainable: Whether or not the embedding is trainable. Default is True. trainable: Whether or not the embedding is trainable. Default is True.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A list of dense columns that converts from sparse input. The order of A list of dense columns that converts from sparse input. The order of
@ -1117,7 +1134,8 @@ def shared_embedding_columns(categorical_columns,
ckpt_to_load_from=ckpt_to_load_from, ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt, tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm, max_norm=max_norm,
trainable=trainable)) trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup))
return result return result
@ -1131,7 +1149,8 @@ def shared_embedding_columns_v2(categorical_columns,
ckpt_to_load_from=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, tensor_name_in_ckpt=None,
max_norm=None, max_norm=None,
trainable=True): trainable=True,
use_safe_embedding_lookup=True):
"""List of dense columns that convert from sparse, categorical input. """List of dense columns that convert from sparse, categorical input.
This is similar to `embedding_column`, except that it produces a list of This is similar to `embedding_column`, except that it produces a list of
@ -1213,6 +1232,13 @@ def shared_embedding_columns_v2(categorical_columns,
max_norm: If not `None`, each embedding is clipped if its l2-norm is max_norm: If not `None`, each embedding is clipped if its l2-norm is
larger than this value, before combining. larger than this value, before combining.
trainable: Whether or not the embedding is trainable. Default is True. trainable: Whether or not the embedding is trainable. Default is True.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A list of dense columns that converts from sparse input. The order of A list of dense columns that converts from sparse input. The order of
@ -1277,7 +1303,8 @@ def shared_embedding_columns_v2(categorical_columns,
column_creator = SharedEmbeddingColumnCreator( column_creator = SharedEmbeddingColumnCreator(
dimension, initializer, ckpt_to_load_from, tensor_name_in_ckpt, dimension, initializer, ckpt_to_load_from, tensor_name_in_ckpt,
num_buckets, trainable, shared_embedding_collection_name) num_buckets, trainable, shared_embedding_collection_name,
use_safe_embedding_lookup)
result = [] result = []
for column in categorical_columns: for column in categorical_columns:
@ -3082,9 +3109,32 @@ class EmbeddingColumn(
collections.namedtuple( collections.namedtuple(
'EmbeddingColumn', 'EmbeddingColumn',
('categorical_column', 'dimension', 'combiner', 'initializer', ('categorical_column', 'dimension', 'combiner', 'initializer',
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))): 'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
'use_safe_embedding_lookup'))):
"""See `embedding_column`.""" """See `embedding_column`."""
def __new__(cls,
categorical_column,
dimension,
combiner,
initializer,
ckpt_to_load_from,
tensor_name_in_ckpt,
max_norm,
trainable,
use_safe_embedding_lookup=True):
return super(EmbeddingColumn, cls).__new__(
cls,
categorical_column=categorical_column,
dimension=dimension,
combiner=combiner,
initializer=initializer,
ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm,
trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
@property @property
def _is_v2_column(self): def _is_v2_column(self):
return (isinstance(self.categorical_column, FeatureColumn) and return (isinstance(self.categorical_column, FeatureColumn) and
@ -3156,11 +3206,17 @@ class EmbeddingColumn(
self.tensor_name_in_ckpt: to_restore self.tensor_name_in_ckpt: to_restore
}) })
sparse_id_rank = tensor_shape.dimension_value(
sparse_ids.dense_shape.get_shape()[0])
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
# Return embedding lookup result. # Return embedding lookup result.
return embedding_ops.safe_embedding_lookup_sparse( return embedding_lookup_sparse(
embedding_weights=embedding_weights, embedding_weights,
sparse_ids=sparse_ids, sparse_ids,
sparse_weights=sparse_weights, sparse_weights,
combiner=self.combiner, combiner=self.combiner,
name='%s_weights' % self.name, name='%s_weights' % self.name,
max_norm=self.max_norm) max_norm=self.max_norm)
@ -3301,6 +3357,8 @@ class EmbeddingColumn(
@classmethod @classmethod
def from_config(cls, config, custom_objects=None, columns_by_name=None): def from_config(cls, config, custom_objects=None, columns_by_name=None):
"""See 'FeatureColumn` base class.""" """See 'FeatureColumn` base class."""
if 'use_safe_embedding_lookup' not in config:
config['use_safe_embedding_lookup'] = True
from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top
_check_config_keys(config, cls._fields) _check_config_keys(config, cls._fields)
kwargs = _standardize_and_copy_config(config) kwargs = _standardize_and_copy_config(config)
@ -3326,7 +3384,8 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
tensor_name_in_ckpt, tensor_name_in_ckpt,
num_buckets, num_buckets,
trainable, trainable,
name='shared_embedding_column_creator'): name='shared_embedding_column_creator',
use_safe_embedding_lookup=True):
self._dimension = dimension self._dimension = dimension
self._initializer = initializer self._initializer = initializer
self._ckpt_to_load_from = ckpt_to_load_from self._ckpt_to_load_from = ckpt_to_load_from
@ -3334,11 +3393,13 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
self._num_buckets = num_buckets self._num_buckets = num_buckets
self._trainable = trainable self._trainable = trainable
self._name = name self._name = name
self._use_safe_embedding_lookup = use_safe_embedding_lookup
# Map from graph keys to embedding_weight variables. # Map from graph keys to embedding_weight variables.
self._embedding_weights = {} self._embedding_weights = {}
def __call__(self, categorical_column, combiner, max_norm): def __call__(self, categorical_column, combiner, max_norm):
return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm) return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm,
self._use_safe_embedding_lookup)
@property @property
def embedding_weights(self): def embedding_weights(self):
@ -3374,9 +3435,23 @@ class SharedEmbeddingColumn(
collections.namedtuple( collections.namedtuple(
'SharedEmbeddingColumn', 'SharedEmbeddingColumn',
('categorical_column', 'shared_embedding_column_creator', 'combiner', ('categorical_column', 'shared_embedding_column_creator', 'combiner',
'max_norm'))): 'max_norm', 'use_safe_embedding_lookup'))):
"""See `embedding_column`.""" """See `embedding_column`."""
def __new__(cls,
categorical_column,
shared_embedding_column_creator,
combiner,
max_norm,
use_safe_embedding_lookup=True):
return super(SharedEmbeddingColumn, cls).__new__(
cls,
categorical_column=categorical_column,
shared_embedding_column_creator=shared_embedding_column_creator,
combiner=combiner,
max_norm=max_norm,
use_safe_embedding_lookup=use_safe_embedding_lookup)
@property @property
def _is_v2_column(self): def _is_v2_column(self):
return True return True
@ -3426,11 +3501,17 @@ class SharedEmbeddingColumn(
embedding_weights = self.shared_embedding_column_creator.embedding_weights embedding_weights = self.shared_embedding_column_creator.embedding_weights
sparse_id_rank = tensor_shape.dimension_value(
sparse_ids.dense_shape.get_shape()[0])
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = (embedding_ops.embedding_lookup_sparse)
# Return embedding lookup result. # Return embedding lookup result.
return embedding_ops.safe_embedding_lookup_sparse( return embedding_lookup_sparse(
embedding_weights=embedding_weights, embedding_weights,
sparse_ids=sparse_ids, sparse_ids,
sparse_weights=sparse_weights, sparse_weights,
combiner=self.combiner, combiner=self.combiner,
name='%s_weights' % self.name, name='%s_weights' % self.name,
max_norm=self.max_norm) max_norm=self.max_norm)

View File

@ -21,6 +21,7 @@ from __future__ import print_function
import collections import collections
import copy import copy
from absl.testing import parameterized
import numpy as np import numpy as np
from tensorflow.core.example import example_pb2 from tensorflow.core.example import example_pb2
@ -5704,7 +5705,7 @@ class _TestStateManager(fc.StateManager):
raise ValueError('Could not find variable.') raise ValueError('Could not find variable.')
class EmbeddingColumnTest(test.TestCase): class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_defaults(self): def test_defaults(self):
@ -6272,8 +6273,16 @@ class EmbeddingColumnTest(test.TestCase):
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.assertAllClose(((94.,), (29.,), (0.,), (42.,)),
self.evaluate(predictions)) self.evaluate(predictions))
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
})
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_dense_features(self): def test_dense_features(self, use_safe_embedding_lookup):
# Inputs. # Inputs.
vocabulary_size = 3 vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue( sparse_input = sparse_tensor.SparseTensorValue(
@ -6317,7 +6326,8 @@ class EmbeddingColumnTest(test.TestCase):
embedding_column = fc.embedding_column( embedding_column = fc.embedding_column(
categorical_column, categorical_column,
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer) initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result. # Provide sparse input and get dense result.
l = df.DenseFeatures((embedding_column,)) l = df.DenseFeatures((embedding_column,))
@ -6339,6 +6349,14 @@ class EmbeddingColumnTest(test.TestCase):
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features)) self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_dense_features_not_trainable(self): def test_dense_features_not_trainable(self):
# Inputs. # Inputs.
@ -6646,31 +6664,33 @@ class EmbeddingColumnTest(test.TestCase):
self.assertEqual([categorical_column], embedding_column.parents) self.assertEqual([categorical_column], embedding_column.parents)
config = embedding_column.get_config() config = embedding_column.get_config()
self.assertEqual({ self.assertEqual(
'categorical_column': { {
'class_name': 'IdentityCategoricalColumn', 'categorical_column': {
'config': { 'class_name': 'IdentityCategoricalColumn',
'number_buckets': 3, 'config': {
'key': 'aaa', 'number_buckets': 3,
'default_value': None 'key': 'aaa',
} 'default_value': None
}, }
'ckpt_to_load_from': None, },
'combiner': 'mean', 'ckpt_to_load_from': None,
'dimension': 2, 'combiner': 'mean',
'initializer': { 'dimension': 2,
'class_name': 'TruncatedNormal', 'initializer': {
'config': { 'class_name': 'TruncatedNormal',
'dtype': 'float32', 'config': {
'stddev': 0.7071067811865475, 'dtype': 'float32',
'seed': None, 'stddev': 0.7071067811865475,
'mean': 0.0 'seed': None,
} 'mean': 0.0
}, }
'max_norm': None, },
'tensor_name_in_ckpt': None, 'max_norm': None,
'trainable': True 'tensor_name_in_ckpt': None,
}, config) 'trainable': True,
'use_safe_embedding_lookup': True
}, config)
custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal} custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal}
new_embedding_column = fc.EmbeddingColumn.from_config( new_embedding_column = fc.EmbeddingColumn.from_config(
@ -6707,28 +6727,33 @@ class EmbeddingColumnTest(test.TestCase):
self.assertEqual([categorical_column], embedding_column.parents) self.assertEqual([categorical_column], embedding_column.parents)
config = embedding_column.get_config() config = embedding_column.get_config()
self.assertEqual({ self.assertEqual(
'categorical_column': { {
'class_name': 'IdentityCategoricalColumn', 'categorical_column': {
'config': { 'class_name': 'IdentityCategoricalColumn',
'number_buckets': 3, 'config': {
'key': 'aaa', 'number_buckets': 3,
'default_value': None 'key': 'aaa',
} 'default_value': None
}, }
'ckpt_to_load_from': None, },
'combiner': 'mean', 'ckpt_to_load_from': None,
'dimension': 2, 'combiner': 'mean',
'initializer': '_initializer', 'dimension': 2,
'max_norm': None, 'initializer': '_initializer',
'tensor_name_in_ckpt': None, 'max_norm': None,
'trainable': True 'tensor_name_in_ckpt': None,
}, config) 'trainable': True,
'use_safe_embedding_lookup': True
}, config)
custom_objects = { custom_objects = {
'_initializer': _initializer, '_initializer': _initializer,
} }
# use_safe_embedding_lookup might not be populated for legacy reasons.
del config['use_safe_embedding_lookup']
new_embedding_column = fc.EmbeddingColumn.from_config( new_embedding_column = fc.EmbeddingColumn.from_config(
config, custom_objects=custom_objects) config, custom_objects=custom_objects)
self.assertEqual(embedding_column, new_embedding_column) self.assertEqual(embedding_column, new_embedding_column)
@ -6746,7 +6771,7 @@ class EmbeddingColumnTest(test.TestCase):
self.assertIs(categorical_column, new_embedding_column.categorical_column) self.assertIs(categorical_column, new_embedding_column.categorical_column)
class SharedEmbeddingColumnTest(test.TestCase): class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_defaults(self): def test_defaults(self):
@ -6952,8 +6977,16 @@ class SharedEmbeddingColumnTest(test.TestCase):
_assert_sparse_tensor_value(self, self.evaluate(output_b), _assert_sparse_tensor_value(self, self.evaluate(output_b),
self.evaluate(output_b_embedded)) self.evaluate(output_b_embedded))
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
})
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_get_dense_tensor(self): def test_get_dense_tensor(self, use_safe_embedding_lookup):
# Inputs. # Inputs.
vocabulary_size = 3 vocabulary_size = 3
# -1 values are ignored. # -1 values are ignored.
@ -6988,12 +7021,18 @@ class SharedEmbeddingColumnTest(test.TestCase):
# example 1: # example 1:
(2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
) )
expected_lookups_b = ( if use_safe_embedding_lookup:
# example 0: expected_lookups_b = (
(1., 2.), # ids [0], embedding = [1, 2] # example 0:
# example 1: (1., 2.), # ids [0], embedding = [1, 2]
(0., 0.), # ids [], embedding = [0, 0] # example 1:
) (0., 0.), # ids [], embedding = [0, 0]
)
else:
expected_lookups_b = (
# example 0:
(1., 2.), # ids [0], embedding = [1, 2]
)
# Build columns. # Build columns.
categorical_column_a = fc.categorical_column_with_identity( categorical_column_a = fc.categorical_column_with_identity(
@ -7003,7 +7042,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b], [categorical_column_a, categorical_column_b],
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer) initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result. # Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor( embedding_lookup_a = embedding_column_a.get_dense_tensor(
@ -7024,8 +7064,112 @@ class SharedEmbeddingColumnTest(test.TestCase):
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
})
@test_util.run_deprecated_v1 @test_util.run_deprecated_v1
def test_get_dense_tensor_placeholder_inputs(self): def test_get_dense_tensor_valid(self, use_safe_embedding_lookup):
# Inputs.
vocabulary_size = 3
# -1 values are ignored.
input_a = np.array([
[2, 1], # example 0, ids [2, 1]
[0, -1]
]) # example 1, ids [0]
input_b = np.array([
[1, -1], # example 0, ids [1]
[1, 2]
]) # example 1, ids [1, 2]
input_features = {'aaa': input_a, 'bbb': input_b}
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info=None):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups_a = (
# example 0:
(5., 8.), # ids [2, 1], embedding = mean([3, 5] + [7, 11]) = [5, 8]
# example 1:
(1., 2), # ids [0], embedding = [1, 2]
)
expected_lookups_b = (
# example 0:
(3., 5.), # ids [1], embedding = [3, 5]
# example 1:
(5., 8.), # ids [1, 2], embedding = mean([3, 5] + [7, 11]) = [5, 8]
)
# Build columns.
categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor(
fc.FeatureTransformationCache(input_features), None)
embedding_lookup_b = embedding_column_b.get_dense_tensor(
fc.FeatureTransformationCache(input_features), None)
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertCountEqual(('aaa_bbb_shared_embedding:0',),
tuple([v.name for v in global_vars]))
embedding_var = global_vars[0]
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(embedding_var))
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
})
@test_util.run_deprecated_v1
def test_get_dense_tensor_placeholder_inputs(self, use_safe_embedding_lookup):
# Inputs. # Inputs.
vocabulary_size = 3 vocabulary_size = 3
# -1 values are ignored. # -1 values are ignored.
@ -7073,13 +7217,21 @@ class SharedEmbeddingColumnTest(test.TestCase):
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b], [categorical_column_a, categorical_column_b],
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer) initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result. # Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor( embedding_lookup_a = embedding_column_a.get_dense_tensor(
fc.FeatureTransformationCache(input_features), None) fc.FeatureTransformationCache(input_features), None)
embedding_lookup_b = embedding_column_b.get_dense_tensor( embedding_lookup_b = embedding_column_b.get_dense_tensor(
fc.FeatureTransformationCache(input_features), None) fc.FeatureTransformationCache(input_features), None)
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
with _initialized_session() as sess: with _initialized_session() as sess:
sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)

View File

@ -57,7 +57,8 @@ def embedding_column(categorical_column,
combiner='mean', combiner='mean',
initializer=None, initializer=None,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
"""TPU embedding_column for `tf.feature_column.embedding_column`. """TPU embedding_column for `tf.feature_column.embedding_column`.
Note that the interface for TPU embedding_column is different from the non-TPU Note that the interface for TPU embedding_column is different from the non-TPU
@ -86,6 +87,13 @@ def embedding_column(categorical_column,
sequence features and 0 for non-sequence features. sequence features and 0 for non-sequence features.
learning_rate_fn: A function that takes global step and returns learning learning_rate_fn: A function that takes global step and returns learning
rate for the embedding table. rate for the embedding table.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A _TPUEmbeddingColumn. A _TPUEmbeddingColumn.
@ -137,7 +145,8 @@ def embedding_column(categorical_column,
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn) learning_rate_fn=learning_rate_fn,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# For Embedding column, the initializer is hidden inside the creator Fn, which # For Embedding column, the initializer is hidden inside the creator Fn, which
# is not accessiable later. So, we attach it to a speicial field. Also note # is not accessiable later. So, we attach it to a speicial field. Also note
# that non-TPU Embedding column and non-TPU shared Embedding column handle the # that non-TPU Embedding column and non-TPU shared Embedding column handle the
@ -152,7 +161,8 @@ def shared_embedding_columns(categorical_columns,
initializer=None, initializer=None,
shared_embedding_collection_name=None, shared_embedding_collection_name=None,
max_sequence_lengths=None, max_sequence_lengths=None,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
"""List of dense columns that convert from sparse, categorical input. """List of dense columns that convert from sparse, categorical input.
Note that the interface for TPU embedding_column is different from the non-TPU Note that the interface for TPU embedding_column is different from the non-TPU
@ -187,6 +197,13 @@ def shared_embedding_columns(categorical_columns,
sequence longer will be truncated. sequence longer will be truncated.
learning_rate_fn: A function that takes global step and returns learning learning_rate_fn: A function that takes global step and returns learning
rate for the embedding table. rate for the embedding table.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A _TPUEmbeddingColumn. A _TPUEmbeddingColumn.
@ -261,7 +278,8 @@ def shared_embedding_columns(categorical_columns,
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn) learning_rate_fn=learning_rate_fn,
use_safe_embedding_lookup=use_safe_embedding_lookup)
tpu_columns.append(column) tpu_columns.append(column)
return tpu_columns return tpu_columns
@ -347,7 +365,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
# Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable # Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
# are not supported on TPU. They are solely for matching the signature of # are not supported on TPU. They are solely for matching the signature of
# __new__ of parent class fc._EmbeddingColumn. # __new__ of parent class fc._EmbeddingColumn.
@ -360,7 +379,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
ckpt_to_load_from=ckpt_to_load_from, ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt, tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm, max_norm=max_norm,
trainable=trainable) trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
def __init__(self, def __init__(self,
categorical_column, categorical_column,
@ -372,7 +392,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
_TPUBaseEmbeddingColumn.__init__( _TPUBaseEmbeddingColumn.__init__(
self, self,
categorical_column, categorical_column,
@ -479,7 +500,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
return fc._SharedEmbeddingColumn.__new__( return fc._SharedEmbeddingColumn.__new__(
cls, cls,
categorical_column, categorical_column,
@ -490,7 +512,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
ckpt_to_load_from=ckpt_to_load_from, ckpt_to_load_from=ckpt_to_load_from,
tensor_name_in_ckpt=tensor_name_in_ckpt, tensor_name_in_ckpt=tensor_name_in_ckpt,
max_norm=max_norm, max_norm=max_norm,
trainable=trainable) trainable=trainable,
use_safe_embedding_lookup=use_safe_embedding_lookup)
def __init__(self, def __init__(self,
categorical_column, categorical_column,
@ -503,7 +526,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
max_norm=None, max_norm=None,
trainable=True, trainable=True,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
_TPUBaseEmbeddingColumn.__init__( _TPUBaseEmbeddingColumn.__init__(
self, self,

View File

@ -56,7 +56,8 @@ def embedding_column_v2(categorical_column,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None, learning_rate_fn=None,
embedding_lookup_device=None, embedding_lookup_device=None,
tensor_core_shape=None): tensor_core_shape=None,
use_safe_embedding_lookup=True):
"""TPU version of `tf.compat.v1.feature_column.embedding_column`. """TPU version of `tf.compat.v1.feature_column.embedding_column`.
Note that the interface for `tf.tpu.experimental.embedding_column` is Note that the interface for `tf.tpu.experimental.embedding_column` is
@ -122,6 +123,13 @@ def embedding_column_v2(categorical_column,
the intended dense shape to run embedding lookup for this feature on the intended dense shape to run embedding lookup for this feature on
TensorCore. The batch dimension can be left None or -1 to indicate TensorCore. The batch dimension can be left None or -1 to indicate
a dynamic shape. Only rank 2 shapes currently supported. a dynamic shape. Only rank 2 shapes currently supported.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A `_TPUEmbeddingColumnV2`. A `_TPUEmbeddingColumnV2`.
@ -175,7 +183,8 @@ def embedding_column_v2(categorical_column,
combiner=combiner, combiner=combiner,
initializer=initializer, initializer=initializer,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn) learning_rate_fn=learning_rate_fn,
use_safe_embedding_lookup=use_safe_embedding_lookup)
else: else:
return _TPUDeviceSpecificEmbeddingColumnV2( return _TPUDeviceSpecificEmbeddingColumnV2(
categorical_column=categorical_column, categorical_column=categorical_column,
@ -185,7 +194,8 @@ def embedding_column_v2(categorical_column,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn, learning_rate_fn=learning_rate_fn,
embedding_lookup_device=embedding_lookup_device, embedding_lookup_device=embedding_lookup_device,
tensor_core_shape=tensor_core_shape) tensor_core_shape=tensor_core_shape,
use_safe_embedding_lookup=use_safe_embedding_lookup)
@tf_export(v1=['tpu.experimental.shared_embedding_columns']) @tf_export(v1=['tpu.experimental.shared_embedding_columns'])
@ -197,7 +207,8 @@ def shared_embedding_columns_v2(categorical_columns,
max_sequence_lengths=None, max_sequence_lengths=None,
learning_rate_fn=None, learning_rate_fn=None,
embedding_lookup_device=None, embedding_lookup_device=None,
tensor_core_shape=None): tensor_core_shape=None,
use_safe_embedding_lookup=True):
"""TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`. """TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`.
Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is
@ -271,6 +282,13 @@ def shared_embedding_columns_v2(categorical_columns,
intended dense shape to run embedding lookup for this feature on intended dense shape to run embedding lookup for this feature on
TensorCore. The batch dimension can be left None or -1 to indicate a TensorCore. The batch dimension can be left None or -1 to indicate a
dynamic shape. Only rank 2 shapes currently supported. dynamic shape. Only rank 2 shapes currently supported.
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
there are no empty rows and all weights and ids are positive at the
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
input tensors. Defaults to true, consider turning off if the above checks
are not needed. Note that having empty rows will not trigger any error
though the output result might be 0 or omitted.
Returns: Returns:
A list of `_TPUSharedEmbeddingColumnV2`. A list of `_TPUSharedEmbeddingColumnV2`.
@ -364,7 +382,8 @@ def shared_embedding_columns_v2(categorical_columns,
initializer=initializer, initializer=initializer,
shared_embedding_collection_name=shared_embedding_collection_name, shared_embedding_collection_name=shared_embedding_collection_name,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn) learning_rate_fn=learning_rate_fn,
use_safe_embedding_lookup=use_safe_embedding_lookup)
else: else:
column = _TPUSharedDeviceSpecificEmbeddingColumnV2( column = _TPUSharedDeviceSpecificEmbeddingColumnV2(
categorical_column=categorical_column, categorical_column=categorical_column,
@ -375,7 +394,8 @@ def shared_embedding_columns_v2(categorical_columns,
max_sequence_length=max_sequence_length, max_sequence_length=max_sequence_length,
learning_rate_fn=learning_rate_fn, learning_rate_fn=learning_rate_fn,
embedding_lookup_device=embedding_lookup_device, embedding_lookup_device=embedding_lookup_device,
tensor_core_shape=tensor_core_shape) tensor_core_shape=tensor_core_shape,
use_safe_embedding_lookup=use_safe_embedding_lookup)
tpu_columns.append(column) tpu_columns.append(column)
return tpu_columns return tpu_columns
@ -390,7 +410,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
combiner='mean', combiner='mean',
initializer=None, initializer=None,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
return fc_lib.EmbeddingColumn.__new__( return fc_lib.EmbeddingColumn.__new__(
cls, cls,
categorical_column, categorical_column,
@ -400,7 +421,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
ckpt_to_load_from=None, ckpt_to_load_from=None,
tensor_name_in_ckpt=None, tensor_name_in_ckpt=None,
max_norm=None, max_norm=None,
trainable=True) trainable=True,
use_safe_embedding_lookup=use_safe_embedding_lookup)
def __getnewargs__(self): def __getnewargs__(self):
return (self._tpu_categorical_column, self.dimension, self.combiner, return (self._tpu_categorical_column, self.dimension, self.combiner,
@ -416,7 +438,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
combiner='mean', combiner='mean',
initializer=None, initializer=None,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
_TPUBaseEmbeddingColumn.__init__( _TPUBaseEmbeddingColumn.__init__(
self, self,
categorical_column, categorical_column,
@ -573,13 +596,15 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
initializer=None, initializer=None,
shared_embedding_collection_name=None, shared_embedding_collection_name=None,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
return fc_lib.SharedEmbeddingColumn.__new__( return fc_lib.SharedEmbeddingColumn.__new__(
cls, cls,
categorical_column, categorical_column,
combiner=combiner, combiner=combiner,
shared_embedding_column_creator=shared_embedding_column_creator, shared_embedding_column_creator=shared_embedding_column_creator,
max_norm=None) max_norm=None,
use_safe_embedding_lookup=use_safe_embedding_lookup)
def __getnewargs__(self): def __getnewargs__(self):
return (self._tpu_categorical_column, self.shared_embedding_column_creator, return (self._tpu_categorical_column, self.shared_embedding_column_creator,
@ -598,7 +623,8 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
initializer=None, initializer=None,
shared_embedding_collection_name=None, shared_embedding_collection_name=None,
max_sequence_length=0, max_sequence_length=0,
learning_rate_fn=None): learning_rate_fn=None,
use_safe_embedding_lookup=True):
_TPUBaseEmbeddingColumn.__init__( _TPUBaseEmbeddingColumn.__init__(
self, self,

View File

@ -43,7 +43,7 @@ def _initialized_session():
return sess return sess
class EmbeddingColumnTestV2(test.TestCase): class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
def test_defaults(self): def test_defaults(self):
categorical_column = fc_lib.categorical_column_with_identity( categorical_column = fc_lib.categorical_column_with_identity(
@ -77,8 +77,16 @@ class EmbeddingColumnTestV2(test.TestCase):
'aaa': parsing_ops.VarLenFeature(dtypes.int64) 'aaa': parsing_ops.VarLenFeature(dtypes.int64)
}, embedding_column._parse_example_spec) }, embedding_column._parse_example_spec)
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False,
})
@test_util.deprecated_graph_mode_only @test_util.deprecated_graph_mode_only
def test_feature_layer_cpu(self): def test_feature_layer_cpu(self, use_safe_embedding_lookup):
# Inputs. # Inputs.
vocabulary_size = 3 vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue( sparse_input = sparse_tensor.SparseTensorValue(
@ -135,12 +143,14 @@ class EmbeddingColumnTestV2(test.TestCase):
embedding_column = tpu_fc.embedding_column_v2( embedding_column = tpu_fc.embedding_column_v2(
categorical_column, categorical_column,
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer) initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
sequence_embedding_column = tpu_fc.embedding_column_v2( sequence_embedding_column = tpu_fc.embedding_column_v2(
sequence_categorical_column, sequence_categorical_column,
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer, initializer=_initializer,
max_sequence_length=2) max_sequence_length=2,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result. # Provide sparse input and get dense result.
features = {'aaa': sparse_input, 'bbb': sparse_input} features = {'aaa': sparse_input, 'bbb': sparse_input}
@ -160,6 +170,16 @@ class EmbeddingColumnTestV2(test.TestCase):
self.assertAllEqual(expected_lookups, embedding_lookup.eval()) self.assertAllEqual(expected_lookups, embedding_lookup.eval())
self.assertAllEqual(expected_lookups_sequence, self.assertAllEqual(expected_lookups_sequence,
sequence_embedding_lookup[0].eval()) sequence_embedding_lookup[0].eval())
# The graph will still have SparseFillEmptyRows due to sequence being
# a Rank3 embedding lookup.
if use_safe_embedding_lookup:
self.assertEqual(2, [
x.type for x in ops.get_default_graph().get_operations()
].count('SparseFillEmptyRows'))
else:
self.assertEqual(1, [
x.type for x in ops.get_default_graph().get_operations()
].count('SparseFillEmptyRows'))
def test_deepcopy(self): def test_deepcopy(self):
categorical_column = fc_lib.categorical_column_with_identity( categorical_column = fc_lib.categorical_column_with_identity(
@ -173,7 +193,7 @@ class EmbeddingColumnTestV2(test.TestCase):
embedding_column_copy._max_sequence_length) embedding_column_copy._max_sequence_length)
class SharedEmbeddingColumnTestV2(test.TestCase): class SharedEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
@test_util.deprecated_graph_mode_only @test_util.deprecated_graph_mode_only
def test_defaults(self): def test_defaults(self):
@ -238,8 +258,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape)
self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape) self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
})
@test_util.deprecated_graph_mode_only @test_util.deprecated_graph_mode_only
def test_feature_layer_cpu(self): def test_feature_layer_cpu(self, use_safe_embedding_lookup):
# Inputs. # Inputs.
vocabulary_size = 3 vocabulary_size = 3
input_a = sparse_tensor.SparseTensorValue( input_a = sparse_tensor.SparseTensorValue(
@ -296,7 +324,8 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
[categorical_column_a, categorical_column_b], [categorical_column_a, categorical_column_b],
dimension=embedding_dimension, dimension=embedding_dimension,
initializer=_initializer, initializer=_initializer,
max_sequence_lengths=[0, 2]) max_sequence_lengths=[0, 2],
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result. # Provide sparse input and get dense result.
dense_features = fc_lib.DenseFeatures([embedding_column_a]) dense_features = fc_lib.DenseFeatures([embedding_column_a])
@ -315,6 +344,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval()) self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
self.assertAllEqual(expected_lookups_b, self.assertAllEqual(expected_lookups_b,
embedding_lookup_b[0].eval()) embedding_lookup_b[0].eval())
# The graph will still have SparseFillEmptyRows due to sequence being
# a Rank3 embedding lookup.
if use_safe_embedding_lookup:
self.assertEqual(2, [
x.type for x in ops.get_default_graph().get_operations()
].count('SparseFillEmptyRows'))
else:
self.assertEqual(1, [
x.type for x in ops.get_default_graph().get_operations()
].count('SparseFillEmptyRows'))
def test_deepcopy(self): def test_deepcopy(self):
vocabulary_size = 3 vocabulary_size = 3

View File

@ -26,7 +26,7 @@ tf_module {
} }
member_method { member_method {
name: "embedding_column" name: "embedding_column"
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], " argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
} }
member_method { member_method {
name: "indicator_column" name: "indicator_column"
@ -70,7 +70,7 @@ tf_module {
} }
member_method { member_method {
name: "shared_embedding_columns" name: "shared_embedding_columns"
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], " argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
} }
member_method { member_method {
name: "weighted_categorical_column" name: "weighted_categorical_column"

View File

@ -22,7 +22,7 @@ tf_module {
} }
member_method { member_method {
name: "embedding_column" name: "embedding_column"
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\'], " argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\', \'True\'], "
} }
member_method { member_method {
name: "initialize_tpu_system" name: "initialize_tpu_system"
@ -30,7 +30,7 @@ tf_module {
} }
member_method { member_method {
name: "shared_embedding_columns" name: "shared_embedding_columns"
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
} }
member_method { member_method {
name: "shutdown_tpu_system" name: "shutdown_tpu_system"

View File

@ -26,7 +26,7 @@ tf_module {
} }
member_method { member_method {
name: "embedding_column" name: "embedding_column"
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], " argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
} }
member_method { member_method {
name: "indicator_column" name: "indicator_column"
@ -62,7 +62,7 @@ tf_module {
} }
member_method { member_method {
name: "shared_embeddings" name: "shared_embeddings"
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], " argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
} }
member_method { member_method {
name: "weighted_categorical_column" name: "weighted_categorical_column"