Embedding feature column performance optimization.
PiperOrigin-RevId: 292193767 Change-Id: I92006247b40fa0025bab6f35ac74e44ef43c2397
This commit is contained in:
parent
7db3d7abe3
commit
f1e95d1ba1
@ -821,7 +821,8 @@ def _embedding_column(categorical_column,
|
|||||||
ckpt_to_load_from=None,
|
ckpt_to_load_from=None,
|
||||||
tensor_name_in_ckpt=None,
|
tensor_name_in_ckpt=None,
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True):
|
trainable=True,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""`_DenseColumn` that converts from sparse, categorical input.
|
"""`_DenseColumn` that converts from sparse, categorical input.
|
||||||
|
|
||||||
Use this when your inputs are sparse, but you want to convert them to a dense
|
Use this when your inputs are sparse, but you want to convert them to a dense
|
||||||
@ -882,6 +883,13 @@ def _embedding_column(categorical_column,
|
|||||||
not `None`.
|
not `None`.
|
||||||
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
||||||
trainable: Whether or not the embedding is trainable. Default is True.
|
trainable: Whether or not the embedding is trainable. Default is True.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
`_DenseColumn` that converts from sparse input.
|
`_DenseColumn` that converts from sparse input.
|
||||||
@ -926,7 +934,8 @@ def _embedding_column(categorical_column,
|
|||||||
ckpt_to_load_from=ckpt_to_load_from,
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
max_norm=max_norm,
|
max_norm=max_norm,
|
||||||
trainable=trainable)
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
|
|
||||||
def _numeric_column(key,
|
def _numeric_column(key,
|
||||||
@ -2444,9 +2453,32 @@ class _EmbeddingColumn(
|
|||||||
collections.namedtuple(
|
collections.namedtuple(
|
||||||
'_EmbeddingColumn',
|
'_EmbeddingColumn',
|
||||||
('categorical_column', 'dimension', 'combiner', 'layer_creator',
|
('categorical_column', 'dimension', 'combiner', 'layer_creator',
|
||||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||||
|
'use_safe_embedding_lookup'))):
|
||||||
"""See `embedding_column`."""
|
"""See `embedding_column`."""
|
||||||
|
|
||||||
|
def __new__(cls,
|
||||||
|
categorical_column,
|
||||||
|
dimension,
|
||||||
|
combiner,
|
||||||
|
layer_creator,
|
||||||
|
ckpt_to_load_from,
|
||||||
|
tensor_name_in_ckpt,
|
||||||
|
max_norm,
|
||||||
|
trainable,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
|
return super(_EmbeddingColumn, cls).__new__(
|
||||||
|
cls,
|
||||||
|
categorical_column=categorical_column,
|
||||||
|
dimension=dimension,
|
||||||
|
combiner=combiner,
|
||||||
|
layer_creator=layer_creator,
|
||||||
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
|
max_norm=max_norm,
|
||||||
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
if not hasattr(self, '_name'):
|
if not hasattr(self, '_name'):
|
||||||
@ -2489,11 +2521,17 @@ class _EmbeddingColumn(
|
|||||||
self.tensor_name_in_ckpt: to_restore
|
self.tensor_name_in_ckpt: to_restore
|
||||||
})
|
})
|
||||||
|
|
||||||
|
sparse_id_rank = tensor_shape.dimension_value(
|
||||||
|
sparse_ids.dense_shape.get_shape()[0])
|
||||||
|
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||||
|
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||||
|
sparse_id_rank <= 2):
|
||||||
|
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||||
# Return embedding lookup result.
|
# Return embedding lookup result.
|
||||||
return embedding_ops.safe_embedding_lookup_sparse(
|
return embedding_lookup_sparse(
|
||||||
embedding_weights=embedding_weights,
|
embedding_weights,
|
||||||
sparse_ids=sparse_ids,
|
sparse_ids,
|
||||||
sparse_weights=sparse_weights,
|
sparse_weights,
|
||||||
combiner=self.combiner,
|
combiner=self.combiner,
|
||||||
name='%s_weights' % self.name,
|
name='%s_weights' % self.name,
|
||||||
max_norm=self.max_norm)
|
max_norm=self.max_norm)
|
||||||
@ -2551,7 +2589,8 @@ class _SharedEmbeddingColumn(
|
|||||||
'_SharedEmbeddingColumn',
|
'_SharedEmbeddingColumn',
|
||||||
('categorical_column', 'dimension', 'combiner', 'initializer',
|
('categorical_column', 'dimension', 'combiner', 'initializer',
|
||||||
'shared_embedding_collection_name', 'ckpt_to_load_from',
|
'shared_embedding_collection_name', 'ckpt_to_load_from',
|
||||||
'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||||
|
'use_safe_embedding_lookup'))):
|
||||||
"""See `embedding_column`."""
|
"""See `embedding_column`."""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -2632,11 +2671,17 @@ class _SharedEmbeddingColumn(
|
|||||||
self.tensor_name_in_ckpt: to_restore
|
self.tensor_name_in_ckpt: to_restore
|
||||||
})
|
})
|
||||||
|
|
||||||
|
sparse_id_rank = tensor_shape.dimension_value(
|
||||||
|
sparse_ids.dense_shape.get_shape()[0])
|
||||||
|
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||||
|
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||||
|
sparse_id_rank <= 2):
|
||||||
|
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||||
# Return embedding lookup result.
|
# Return embedding lookup result.
|
||||||
return embedding_ops.safe_embedding_lookup_sparse(
|
return embedding_lookup_sparse(
|
||||||
embedding_weights=embedding_weights,
|
embedding_weights,
|
||||||
sparse_ids=sparse_ids,
|
sparse_ids,
|
||||||
sparse_weights=sparse_weights,
|
sparse_weights,
|
||||||
combiner=self.combiner,
|
combiner=self.combiner,
|
||||||
name='%s_weights' % self.name,
|
name='%s_weights' % self.name,
|
||||||
max_norm=self.max_norm)
|
max_norm=self.max_norm)
|
||||||
|
@ -850,7 +850,8 @@ def embedding_column(categorical_column,
|
|||||||
ckpt_to_load_from=None,
|
ckpt_to_load_from=None,
|
||||||
tensor_name_in_ckpt=None,
|
tensor_name_in_ckpt=None,
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True):
|
trainable=True,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""`DenseColumn` that converts from sparse, categorical input.
|
"""`DenseColumn` that converts from sparse, categorical input.
|
||||||
|
|
||||||
Use this when your inputs are sparse, but you want to convert them to a dense
|
Use this when your inputs are sparse, but you want to convert them to a dense
|
||||||
@ -911,6 +912,13 @@ def embedding_column(categorical_column,
|
|||||||
`None`.
|
`None`.
|
||||||
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
max_norm: If not `None`, embedding values are l2-normalized to this value.
|
||||||
trainable: Whether or not the embedding is trainable. Default is True.
|
trainable: Whether or not the embedding is trainable. Default is True.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
`DenseColumn` that converts from sparse input.
|
`DenseColumn` that converts from sparse input.
|
||||||
@ -944,7 +952,8 @@ def embedding_column(categorical_column,
|
|||||||
ckpt_to_load_from=ckpt_to_load_from,
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
max_norm=max_norm,
|
max_norm=max_norm,
|
||||||
trainable=trainable)
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
|
|
||||||
@tf_export(v1=['feature_column.shared_embedding_columns'])
|
@tf_export(v1=['feature_column.shared_embedding_columns'])
|
||||||
@ -956,7 +965,8 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
ckpt_to_load_from=None,
|
ckpt_to_load_from=None,
|
||||||
tensor_name_in_ckpt=None,
|
tensor_name_in_ckpt=None,
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True):
|
trainable=True,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""List of dense columns that convert from sparse, categorical input.
|
"""List of dense columns that convert from sparse, categorical input.
|
||||||
|
|
||||||
This is similar to `embedding_column`, except that it produces a list of
|
This is similar to `embedding_column`, except that it produces a list of
|
||||||
@ -1039,6 +1049,13 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
|
max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
|
||||||
than this value, before combining.
|
than this value, before combining.
|
||||||
trainable: Whether or not the embedding is trainable. Default is True.
|
trainable: Whether or not the embedding is trainable. Default is True.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of dense columns that converts from sparse input. The order of
|
A list of dense columns that converts from sparse input. The order of
|
||||||
@ -1117,7 +1134,8 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
ckpt_to_load_from=ckpt_to_load_from,
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
max_norm=max_norm,
|
max_norm=max_norm,
|
||||||
trainable=trainable))
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -1131,7 +1149,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
ckpt_to_load_from=None,
|
ckpt_to_load_from=None,
|
||||||
tensor_name_in_ckpt=None,
|
tensor_name_in_ckpt=None,
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True):
|
trainable=True,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""List of dense columns that convert from sparse, categorical input.
|
"""List of dense columns that convert from sparse, categorical input.
|
||||||
|
|
||||||
This is similar to `embedding_column`, except that it produces a list of
|
This is similar to `embedding_column`, except that it produces a list of
|
||||||
@ -1213,6 +1232,13 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
max_norm: If not `None`, each embedding is clipped if its l2-norm is
|
max_norm: If not `None`, each embedding is clipped if its l2-norm is
|
||||||
larger than this value, before combining.
|
larger than this value, before combining.
|
||||||
trainable: Whether or not the embedding is trainable. Default is True.
|
trainable: Whether or not the embedding is trainable. Default is True.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of dense columns that converts from sparse input. The order of
|
A list of dense columns that converts from sparse input. The order of
|
||||||
@ -1277,7 +1303,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
|
|
||||||
column_creator = SharedEmbeddingColumnCreator(
|
column_creator = SharedEmbeddingColumnCreator(
|
||||||
dimension, initializer, ckpt_to_load_from, tensor_name_in_ckpt,
|
dimension, initializer, ckpt_to_load_from, tensor_name_in_ckpt,
|
||||||
num_buckets, trainable, shared_embedding_collection_name)
|
num_buckets, trainable, shared_embedding_collection_name,
|
||||||
|
use_safe_embedding_lookup)
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for column in categorical_columns:
|
for column in categorical_columns:
|
||||||
@ -3082,9 +3109,32 @@ class EmbeddingColumn(
|
|||||||
collections.namedtuple(
|
collections.namedtuple(
|
||||||
'EmbeddingColumn',
|
'EmbeddingColumn',
|
||||||
('categorical_column', 'dimension', 'combiner', 'initializer',
|
('categorical_column', 'dimension', 'combiner', 'initializer',
|
||||||
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable'))):
|
'ckpt_to_load_from', 'tensor_name_in_ckpt', 'max_norm', 'trainable',
|
||||||
|
'use_safe_embedding_lookup'))):
|
||||||
"""See `embedding_column`."""
|
"""See `embedding_column`."""
|
||||||
|
|
||||||
|
def __new__(cls,
|
||||||
|
categorical_column,
|
||||||
|
dimension,
|
||||||
|
combiner,
|
||||||
|
initializer,
|
||||||
|
ckpt_to_load_from,
|
||||||
|
tensor_name_in_ckpt,
|
||||||
|
max_norm,
|
||||||
|
trainable,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
|
return super(EmbeddingColumn, cls).__new__(
|
||||||
|
cls,
|
||||||
|
categorical_column=categorical_column,
|
||||||
|
dimension=dimension,
|
||||||
|
combiner=combiner,
|
||||||
|
initializer=initializer,
|
||||||
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
|
max_norm=max_norm,
|
||||||
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _is_v2_column(self):
|
def _is_v2_column(self):
|
||||||
return (isinstance(self.categorical_column, FeatureColumn) and
|
return (isinstance(self.categorical_column, FeatureColumn) and
|
||||||
@ -3156,11 +3206,17 @@ class EmbeddingColumn(
|
|||||||
self.tensor_name_in_ckpt: to_restore
|
self.tensor_name_in_ckpt: to_restore
|
||||||
})
|
})
|
||||||
|
|
||||||
|
sparse_id_rank = tensor_shape.dimension_value(
|
||||||
|
sparse_ids.dense_shape.get_shape()[0])
|
||||||
|
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||||
|
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||||
|
sparse_id_rank <= 2):
|
||||||
|
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
|
||||||
# Return embedding lookup result.
|
# Return embedding lookup result.
|
||||||
return embedding_ops.safe_embedding_lookup_sparse(
|
return embedding_lookup_sparse(
|
||||||
embedding_weights=embedding_weights,
|
embedding_weights,
|
||||||
sparse_ids=sparse_ids,
|
sparse_ids,
|
||||||
sparse_weights=sparse_weights,
|
sparse_weights,
|
||||||
combiner=self.combiner,
|
combiner=self.combiner,
|
||||||
name='%s_weights' % self.name,
|
name='%s_weights' % self.name,
|
||||||
max_norm=self.max_norm)
|
max_norm=self.max_norm)
|
||||||
@ -3301,6 +3357,8 @@ class EmbeddingColumn(
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_config(cls, config, custom_objects=None, columns_by_name=None):
|
def from_config(cls, config, custom_objects=None, columns_by_name=None):
|
||||||
"""See 'FeatureColumn` base class."""
|
"""See 'FeatureColumn` base class."""
|
||||||
|
if 'use_safe_embedding_lookup' not in config:
|
||||||
|
config['use_safe_embedding_lookup'] = True
|
||||||
from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top
|
from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top
|
||||||
_check_config_keys(config, cls._fields)
|
_check_config_keys(config, cls._fields)
|
||||||
kwargs = _standardize_and_copy_config(config)
|
kwargs = _standardize_and_copy_config(config)
|
||||||
@ -3326,7 +3384,8 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
|
|||||||
tensor_name_in_ckpt,
|
tensor_name_in_ckpt,
|
||||||
num_buckets,
|
num_buckets,
|
||||||
trainable,
|
trainable,
|
||||||
name='shared_embedding_column_creator'):
|
name='shared_embedding_column_creator',
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
self._dimension = dimension
|
self._dimension = dimension
|
||||||
self._initializer = initializer
|
self._initializer = initializer
|
||||||
self._ckpt_to_load_from = ckpt_to_load_from
|
self._ckpt_to_load_from = ckpt_to_load_from
|
||||||
@ -3334,11 +3393,13 @@ class SharedEmbeddingColumnCreator(tracking.AutoTrackable):
|
|||||||
self._num_buckets = num_buckets
|
self._num_buckets = num_buckets
|
||||||
self._trainable = trainable
|
self._trainable = trainable
|
||||||
self._name = name
|
self._name = name
|
||||||
|
self._use_safe_embedding_lookup = use_safe_embedding_lookup
|
||||||
# Map from graph keys to embedding_weight variables.
|
# Map from graph keys to embedding_weight variables.
|
||||||
self._embedding_weights = {}
|
self._embedding_weights = {}
|
||||||
|
|
||||||
def __call__(self, categorical_column, combiner, max_norm):
|
def __call__(self, categorical_column, combiner, max_norm):
|
||||||
return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm)
|
return SharedEmbeddingColumn(categorical_column, self, combiner, max_norm,
|
||||||
|
self._use_safe_embedding_lookup)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def embedding_weights(self):
|
def embedding_weights(self):
|
||||||
@ -3374,9 +3435,23 @@ class SharedEmbeddingColumn(
|
|||||||
collections.namedtuple(
|
collections.namedtuple(
|
||||||
'SharedEmbeddingColumn',
|
'SharedEmbeddingColumn',
|
||||||
('categorical_column', 'shared_embedding_column_creator', 'combiner',
|
('categorical_column', 'shared_embedding_column_creator', 'combiner',
|
||||||
'max_norm'))):
|
'max_norm', 'use_safe_embedding_lookup'))):
|
||||||
"""See `embedding_column`."""
|
"""See `embedding_column`."""
|
||||||
|
|
||||||
|
def __new__(cls,
|
||||||
|
categorical_column,
|
||||||
|
shared_embedding_column_creator,
|
||||||
|
combiner,
|
||||||
|
max_norm,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
|
return super(SharedEmbeddingColumn, cls).__new__(
|
||||||
|
cls,
|
||||||
|
categorical_column=categorical_column,
|
||||||
|
shared_embedding_column_creator=shared_embedding_column_creator,
|
||||||
|
combiner=combiner,
|
||||||
|
max_norm=max_norm,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _is_v2_column(self):
|
def _is_v2_column(self):
|
||||||
return True
|
return True
|
||||||
@ -3426,11 +3501,17 @@ class SharedEmbeddingColumn(
|
|||||||
|
|
||||||
embedding_weights = self.shared_embedding_column_creator.embedding_weights
|
embedding_weights = self.shared_embedding_column_creator.embedding_weights
|
||||||
|
|
||||||
|
sparse_id_rank = tensor_shape.dimension_value(
|
||||||
|
sparse_ids.dense_shape.get_shape()[0])
|
||||||
|
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
|
||||||
|
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
|
||||||
|
sparse_id_rank <= 2):
|
||||||
|
embedding_lookup_sparse = (embedding_ops.embedding_lookup_sparse)
|
||||||
# Return embedding lookup result.
|
# Return embedding lookup result.
|
||||||
return embedding_ops.safe_embedding_lookup_sparse(
|
return embedding_lookup_sparse(
|
||||||
embedding_weights=embedding_weights,
|
embedding_weights,
|
||||||
sparse_ids=sparse_ids,
|
sparse_ids,
|
||||||
sparse_weights=sparse_weights,
|
sparse_weights,
|
||||||
combiner=self.combiner,
|
combiner=self.combiner,
|
||||||
name='%s_weights' % self.name,
|
name='%s_weights' % self.name,
|
||||||
max_norm=self.max_norm)
|
max_norm=self.max_norm)
|
||||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
|||||||
import collections
|
import collections
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
|
from absl.testing import parameterized
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from tensorflow.core.example import example_pb2
|
from tensorflow.core.example import example_pb2
|
||||||
@ -5704,7 +5705,7 @@ class _TestStateManager(fc.StateManager):
|
|||||||
raise ValueError('Could not find variable.')
|
raise ValueError('Could not find variable.')
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingColumnTest(test.TestCase):
|
class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_defaults(self):
|
def test_defaults(self):
|
||||||
@ -6272,8 +6273,16 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)),
|
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)),
|
||||||
self.evaluate(predictions))
|
self.evaluate(predictions))
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False
|
||||||
|
})
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_dense_features(self):
|
def test_dense_features(self, use_safe_embedding_lookup):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(
|
sparse_input = sparse_tensor.SparseTensorValue(
|
||||||
@ -6317,7 +6326,8 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
embedding_column = fc.embedding_column(
|
embedding_column = fc.embedding_column(
|
||||||
categorical_column,
|
categorical_column,
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer)
|
initializer=_initializer,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
# Provide sparse input and get dense result.
|
# Provide sparse input and get dense result.
|
||||||
l = df.DenseFeatures((embedding_column,))
|
l = df.DenseFeatures((embedding_column,))
|
||||||
@ -6339,6 +6349,14 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
|
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
|
||||||
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
|
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
|
||||||
|
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertIn('SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
else:
|
||||||
|
self.assertNotIn(
|
||||||
|
'SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_dense_features_not_trainable(self):
|
def test_dense_features_not_trainable(self):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
@ -6646,31 +6664,33 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertEqual([categorical_column], embedding_column.parents)
|
self.assertEqual([categorical_column], embedding_column.parents)
|
||||||
|
|
||||||
config = embedding_column.get_config()
|
config = embedding_column.get_config()
|
||||||
self.assertEqual({
|
self.assertEqual(
|
||||||
'categorical_column': {
|
{
|
||||||
'class_name': 'IdentityCategoricalColumn',
|
'categorical_column': {
|
||||||
'config': {
|
'class_name': 'IdentityCategoricalColumn',
|
||||||
'number_buckets': 3,
|
'config': {
|
||||||
'key': 'aaa',
|
'number_buckets': 3,
|
||||||
'default_value': None
|
'key': 'aaa',
|
||||||
}
|
'default_value': None
|
||||||
},
|
}
|
||||||
'ckpt_to_load_from': None,
|
},
|
||||||
'combiner': 'mean',
|
'ckpt_to_load_from': None,
|
||||||
'dimension': 2,
|
'combiner': 'mean',
|
||||||
'initializer': {
|
'dimension': 2,
|
||||||
'class_name': 'TruncatedNormal',
|
'initializer': {
|
||||||
'config': {
|
'class_name': 'TruncatedNormal',
|
||||||
'dtype': 'float32',
|
'config': {
|
||||||
'stddev': 0.7071067811865475,
|
'dtype': 'float32',
|
||||||
'seed': None,
|
'stddev': 0.7071067811865475,
|
||||||
'mean': 0.0
|
'seed': None,
|
||||||
}
|
'mean': 0.0
|
||||||
},
|
}
|
||||||
'max_norm': None,
|
},
|
||||||
'tensor_name_in_ckpt': None,
|
'max_norm': None,
|
||||||
'trainable': True
|
'tensor_name_in_ckpt': None,
|
||||||
}, config)
|
'trainable': True,
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, config)
|
||||||
|
|
||||||
custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal}
|
custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal}
|
||||||
new_embedding_column = fc.EmbeddingColumn.from_config(
|
new_embedding_column = fc.EmbeddingColumn.from_config(
|
||||||
@ -6707,28 +6727,33 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertEqual([categorical_column], embedding_column.parents)
|
self.assertEqual([categorical_column], embedding_column.parents)
|
||||||
|
|
||||||
config = embedding_column.get_config()
|
config = embedding_column.get_config()
|
||||||
self.assertEqual({
|
self.assertEqual(
|
||||||
'categorical_column': {
|
{
|
||||||
'class_name': 'IdentityCategoricalColumn',
|
'categorical_column': {
|
||||||
'config': {
|
'class_name': 'IdentityCategoricalColumn',
|
||||||
'number_buckets': 3,
|
'config': {
|
||||||
'key': 'aaa',
|
'number_buckets': 3,
|
||||||
'default_value': None
|
'key': 'aaa',
|
||||||
}
|
'default_value': None
|
||||||
},
|
}
|
||||||
'ckpt_to_load_from': None,
|
},
|
||||||
'combiner': 'mean',
|
'ckpt_to_load_from': None,
|
||||||
'dimension': 2,
|
'combiner': 'mean',
|
||||||
'initializer': '_initializer',
|
'dimension': 2,
|
||||||
'max_norm': None,
|
'initializer': '_initializer',
|
||||||
'tensor_name_in_ckpt': None,
|
'max_norm': None,
|
||||||
'trainable': True
|
'tensor_name_in_ckpt': None,
|
||||||
}, config)
|
'trainable': True,
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, config)
|
||||||
|
|
||||||
custom_objects = {
|
custom_objects = {
|
||||||
'_initializer': _initializer,
|
'_initializer': _initializer,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# use_safe_embedding_lookup might not be populated for legacy reasons.
|
||||||
|
del config['use_safe_embedding_lookup']
|
||||||
|
|
||||||
new_embedding_column = fc.EmbeddingColumn.from_config(
|
new_embedding_column = fc.EmbeddingColumn.from_config(
|
||||||
config, custom_objects=custom_objects)
|
config, custom_objects=custom_objects)
|
||||||
self.assertEqual(embedding_column, new_embedding_column)
|
self.assertEqual(embedding_column, new_embedding_column)
|
||||||
@ -6746,7 +6771,7 @@ class EmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertIs(categorical_column, new_embedding_column.categorical_column)
|
self.assertIs(categorical_column, new_embedding_column.categorical_column)
|
||||||
|
|
||||||
|
|
||||||
class SharedEmbeddingColumnTest(test.TestCase):
|
class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_defaults(self):
|
def test_defaults(self):
|
||||||
@ -6952,8 +6977,16 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
|||||||
_assert_sparse_tensor_value(self, self.evaluate(output_b),
|
_assert_sparse_tensor_value(self, self.evaluate(output_b),
|
||||||
self.evaluate(output_b_embedded))
|
self.evaluate(output_b_embedded))
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False
|
||||||
|
})
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_get_dense_tensor(self):
|
def test_get_dense_tensor(self, use_safe_embedding_lookup):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
# -1 values are ignored.
|
# -1 values are ignored.
|
||||||
@ -6988,12 +7021,18 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
|||||||
# example 1:
|
# example 1:
|
||||||
(2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
|
(2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
|
||||||
)
|
)
|
||||||
expected_lookups_b = (
|
if use_safe_embedding_lookup:
|
||||||
# example 0:
|
expected_lookups_b = (
|
||||||
(1., 2.), # ids [0], embedding = [1, 2]
|
# example 0:
|
||||||
# example 1:
|
(1., 2.), # ids [0], embedding = [1, 2]
|
||||||
(0., 0.), # ids [], embedding = [0, 0]
|
# example 1:
|
||||||
)
|
(0., 0.), # ids [], embedding = [0, 0]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
expected_lookups_b = (
|
||||||
|
# example 0:
|
||||||
|
(1., 2.), # ids [0], embedding = [1, 2]
|
||||||
|
)
|
||||||
|
|
||||||
# Build columns.
|
# Build columns.
|
||||||
categorical_column_a = fc.categorical_column_with_identity(
|
categorical_column_a = fc.categorical_column_with_identity(
|
||||||
@ -7003,7 +7042,8 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
|||||||
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||||
[categorical_column_a, categorical_column_b],
|
[categorical_column_a, categorical_column_b],
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer)
|
initializer=_initializer,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
# Provide sparse input and get dense result.
|
# Provide sparse input and get dense result.
|
||||||
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||||
@ -7024,8 +7064,112 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
|||||||
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
|
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
|
||||||
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
|
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
|
||||||
|
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertIn('SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
else:
|
||||||
|
self.assertNotIn(
|
||||||
|
'SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False
|
||||||
|
})
|
||||||
@test_util.run_deprecated_v1
|
@test_util.run_deprecated_v1
|
||||||
def test_get_dense_tensor_placeholder_inputs(self):
|
def test_get_dense_tensor_valid(self, use_safe_embedding_lookup):
|
||||||
|
# Inputs.
|
||||||
|
vocabulary_size = 3
|
||||||
|
# -1 values are ignored.
|
||||||
|
input_a = np.array([
|
||||||
|
[2, 1], # example 0, ids [2, 1]
|
||||||
|
[0, -1]
|
||||||
|
]) # example 1, ids [0]
|
||||||
|
input_b = np.array([
|
||||||
|
[1, -1], # example 0, ids [1]
|
||||||
|
[1, 2]
|
||||||
|
]) # example 1, ids [1, 2]
|
||||||
|
input_features = {'aaa': input_a, 'bbb': input_b}
|
||||||
|
|
||||||
|
# Embedding variable.
|
||||||
|
embedding_dimension = 2
|
||||||
|
embedding_values = (
|
||||||
|
(1., 2.), # id 0
|
||||||
|
(3., 5.), # id 1
|
||||||
|
(7., 11.) # id 2
|
||||||
|
)
|
||||||
|
|
||||||
|
def _initializer(shape, dtype, partition_info=None):
|
||||||
|
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
|
||||||
|
self.assertEqual(dtypes.float32, dtype)
|
||||||
|
self.assertIsNone(partition_info)
|
||||||
|
return embedding_values
|
||||||
|
|
||||||
|
# Expected lookup result, using combiner='mean'.
|
||||||
|
expected_lookups_a = (
|
||||||
|
# example 0:
|
||||||
|
(5., 8.), # ids [2, 1], embedding = mean([3, 5] + [7, 11]) = [5, 8]
|
||||||
|
# example 1:
|
||||||
|
(1., 2), # ids [0], embedding = [1, 2]
|
||||||
|
)
|
||||||
|
expected_lookups_b = (
|
||||||
|
# example 0:
|
||||||
|
(3., 5.), # ids [1], embedding = [3, 5]
|
||||||
|
# example 1:
|
||||||
|
(5., 8.), # ids [1, 2], embedding = mean([3, 5] + [7, 11]) = [5, 8]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build columns.
|
||||||
|
categorical_column_a = fc.categorical_column_with_identity(
|
||||||
|
key='aaa', num_buckets=vocabulary_size)
|
||||||
|
categorical_column_b = fc.categorical_column_with_identity(
|
||||||
|
key='bbb', num_buckets=vocabulary_size)
|
||||||
|
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||||
|
[categorical_column_a, categorical_column_b],
|
||||||
|
dimension=embedding_dimension,
|
||||||
|
initializer=_initializer,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
|
# Provide sparse input and get dense result.
|
||||||
|
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||||
|
fc.FeatureTransformationCache(input_features), None)
|
||||||
|
embedding_lookup_b = embedding_column_b.get_dense_tensor(
|
||||||
|
fc.FeatureTransformationCache(input_features), None)
|
||||||
|
|
||||||
|
# Assert expected embedding variable and lookups.
|
||||||
|
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
|
||||||
|
self.assertCountEqual(('aaa_bbb_shared_embedding:0',),
|
||||||
|
tuple([v.name for v in global_vars]))
|
||||||
|
embedding_var = global_vars[0]
|
||||||
|
|
||||||
|
self.evaluate(variables_lib.global_variables_initializer())
|
||||||
|
self.evaluate(lookup_ops.tables_initializer())
|
||||||
|
|
||||||
|
self.assertAllEqual(embedding_values, self.evaluate(embedding_var))
|
||||||
|
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
|
||||||
|
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertIn('SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
else:
|
||||||
|
self.assertNotIn(
|
||||||
|
'SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False
|
||||||
|
})
|
||||||
|
@test_util.run_deprecated_v1
|
||||||
|
def test_get_dense_tensor_placeholder_inputs(self, use_safe_embedding_lookup):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
# -1 values are ignored.
|
# -1 values are ignored.
|
||||||
@ -7073,13 +7217,21 @@ class SharedEmbeddingColumnTest(test.TestCase):
|
|||||||
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
|
||||||
[categorical_column_a, categorical_column_b],
|
[categorical_column_a, categorical_column_b],
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer)
|
initializer=_initializer,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
# Provide sparse input and get dense result.
|
# Provide sparse input and get dense result.
|
||||||
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
embedding_lookup_a = embedding_column_a.get_dense_tensor(
|
||||||
fc.FeatureTransformationCache(input_features), None)
|
fc.FeatureTransformationCache(input_features), None)
|
||||||
embedding_lookup_b = embedding_column_b.get_dense_tensor(
|
embedding_lookup_b = embedding_column_b.get_dense_tensor(
|
||||||
fc.FeatureTransformationCache(input_features), None)
|
fc.FeatureTransformationCache(input_features), None)
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertIn('SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
else:
|
||||||
|
self.assertNotIn(
|
||||||
|
'SparseFillEmptyRows',
|
||||||
|
[x.type for x in ops.get_default_graph().get_operations()])
|
||||||
|
|
||||||
with _initialized_session() as sess:
|
with _initialized_session() as sess:
|
||||||
sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
|
sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict)
|
||||||
|
@ -57,7 +57,8 @@ def embedding_column(categorical_column,
|
|||||||
combiner='mean',
|
combiner='mean',
|
||||||
initializer=None,
|
initializer=None,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""TPU embedding_column for `tf.feature_column.embedding_column`.
|
"""TPU embedding_column for `tf.feature_column.embedding_column`.
|
||||||
|
|
||||||
Note that the interface for TPU embedding_column is different from the non-TPU
|
Note that the interface for TPU embedding_column is different from the non-TPU
|
||||||
@ -86,6 +87,13 @@ def embedding_column(categorical_column,
|
|||||||
sequence features and 0 for non-sequence features.
|
sequence features and 0 for non-sequence features.
|
||||||
learning_rate_fn: A function that takes global step and returns learning
|
learning_rate_fn: A function that takes global step and returns learning
|
||||||
rate for the embedding table.
|
rate for the embedding table.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A _TPUEmbeddingColumn.
|
A _TPUEmbeddingColumn.
|
||||||
@ -137,7 +145,8 @@ def embedding_column(categorical_column,
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn)
|
learning_rate_fn=learning_rate_fn,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
# For Embedding column, the initializer is hidden inside the creator Fn, which
|
# For Embedding column, the initializer is hidden inside the creator Fn, which
|
||||||
# is not accessiable later. So, we attach it to a speicial field. Also note
|
# is not accessiable later. So, we attach it to a speicial field. Also note
|
||||||
# that non-TPU Embedding column and non-TPU shared Embedding column handle the
|
# that non-TPU Embedding column and non-TPU shared Embedding column handle the
|
||||||
@ -152,7 +161,8 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
initializer=None,
|
initializer=None,
|
||||||
shared_embedding_collection_name=None,
|
shared_embedding_collection_name=None,
|
||||||
max_sequence_lengths=None,
|
max_sequence_lengths=None,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""List of dense columns that convert from sparse, categorical input.
|
"""List of dense columns that convert from sparse, categorical input.
|
||||||
|
|
||||||
Note that the interface for TPU embedding_column is different from the non-TPU
|
Note that the interface for TPU embedding_column is different from the non-TPU
|
||||||
@ -187,6 +197,13 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
sequence longer will be truncated.
|
sequence longer will be truncated.
|
||||||
learning_rate_fn: A function that takes global step and returns learning
|
learning_rate_fn: A function that takes global step and returns learning
|
||||||
rate for the embedding table.
|
rate for the embedding table.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A _TPUEmbeddingColumn.
|
A _TPUEmbeddingColumn.
|
||||||
@ -261,7 +278,8 @@ def shared_embedding_columns(categorical_columns,
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn)
|
learning_rate_fn=learning_rate_fn,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
tpu_columns.append(column)
|
tpu_columns.append(column)
|
||||||
|
|
||||||
return tpu_columns
|
return tpu_columns
|
||||||
@ -347,7 +365,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
# Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
|
# Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
|
||||||
# are not supported on TPU. They are solely for matching the signature of
|
# are not supported on TPU. They are solely for matching the signature of
|
||||||
# __new__ of parent class fc._EmbeddingColumn.
|
# __new__ of parent class fc._EmbeddingColumn.
|
||||||
@ -360,7 +379,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
|||||||
ckpt_to_load_from=ckpt_to_load_from,
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
max_norm=max_norm,
|
max_norm=max_norm,
|
||||||
trainable=trainable)
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -372,7 +392,8 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
_TPUBaseEmbeddingColumn.__init__(
|
_TPUBaseEmbeddingColumn.__init__(
|
||||||
self,
|
self,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -479,7 +500,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
return fc._SharedEmbeddingColumn.__new__(
|
return fc._SharedEmbeddingColumn.__new__(
|
||||||
cls,
|
cls,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -490,7 +512,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
|||||||
ckpt_to_load_from=ckpt_to_load_from,
|
ckpt_to_load_from=ckpt_to_load_from,
|
||||||
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
tensor_name_in_ckpt=tensor_name_in_ckpt,
|
||||||
max_norm=max_norm,
|
max_norm=max_norm,
|
||||||
trainable=trainable)
|
trainable=trainable,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -503,7 +526,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
|
|||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True,
|
trainable=True,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
|
|
||||||
_TPUBaseEmbeddingColumn.__init__(
|
_TPUBaseEmbeddingColumn.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -56,7 +56,8 @@ def embedding_column_v2(categorical_column,
|
|||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None,
|
learning_rate_fn=None,
|
||||||
embedding_lookup_device=None,
|
embedding_lookup_device=None,
|
||||||
tensor_core_shape=None):
|
tensor_core_shape=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""TPU version of `tf.compat.v1.feature_column.embedding_column`.
|
"""TPU version of `tf.compat.v1.feature_column.embedding_column`.
|
||||||
|
|
||||||
Note that the interface for `tf.tpu.experimental.embedding_column` is
|
Note that the interface for `tf.tpu.experimental.embedding_column` is
|
||||||
@ -122,6 +123,13 @@ def embedding_column_v2(categorical_column,
|
|||||||
the intended dense shape to run embedding lookup for this feature on
|
the intended dense shape to run embedding lookup for this feature on
|
||||||
TensorCore. The batch dimension can be left None or -1 to indicate
|
TensorCore. The batch dimension can be left None or -1 to indicate
|
||||||
a dynamic shape. Only rank 2 shapes currently supported.
|
a dynamic shape. Only rank 2 shapes currently supported.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A `_TPUEmbeddingColumnV2`.
|
A `_TPUEmbeddingColumnV2`.
|
||||||
@ -175,7 +183,8 @@ def embedding_column_v2(categorical_column,
|
|||||||
combiner=combiner,
|
combiner=combiner,
|
||||||
initializer=initializer,
|
initializer=initializer,
|
||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn)
|
learning_rate_fn=learning_rate_fn,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
else:
|
else:
|
||||||
return _TPUDeviceSpecificEmbeddingColumnV2(
|
return _TPUDeviceSpecificEmbeddingColumnV2(
|
||||||
categorical_column=categorical_column,
|
categorical_column=categorical_column,
|
||||||
@ -185,7 +194,8 @@ def embedding_column_v2(categorical_column,
|
|||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn,
|
learning_rate_fn=learning_rate_fn,
|
||||||
embedding_lookup_device=embedding_lookup_device,
|
embedding_lookup_device=embedding_lookup_device,
|
||||||
tensor_core_shape=tensor_core_shape)
|
tensor_core_shape=tensor_core_shape,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
|
|
||||||
@tf_export(v1=['tpu.experimental.shared_embedding_columns'])
|
@tf_export(v1=['tpu.experimental.shared_embedding_columns'])
|
||||||
@ -197,7 +207,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
max_sequence_lengths=None,
|
max_sequence_lengths=None,
|
||||||
learning_rate_fn=None,
|
learning_rate_fn=None,
|
||||||
embedding_lookup_device=None,
|
embedding_lookup_device=None,
|
||||||
tensor_core_shape=None):
|
tensor_core_shape=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
"""TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`.
|
"""TPU version of `tf.compat.v1.feature_column.shared_embedding_columns`.
|
||||||
|
|
||||||
Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is
|
Note that the interface for `tf.tpu.experimental.shared_embedding_columns` is
|
||||||
@ -271,6 +282,13 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
intended dense shape to run embedding lookup for this feature on
|
intended dense shape to run embedding lookup for this feature on
|
||||||
TensorCore. The batch dimension can be left None or -1 to indicate a
|
TensorCore. The batch dimension can be left None or -1 to indicate a
|
||||||
dynamic shape. Only rank 2 shapes currently supported.
|
dynamic shape. Only rank 2 shapes currently supported.
|
||||||
|
use_safe_embedding_lookup: If true, uses safe_embedding_lookup_sparse
|
||||||
|
instead of embedding_lookup_sparse. safe_embedding_lookup_sparse ensures
|
||||||
|
there are no empty rows and all weights and ids are positive at the
|
||||||
|
expense of extra compute cost. This only applies to rank 2 (NxM) shaped
|
||||||
|
input tensors. Defaults to true, consider turning off if the above checks
|
||||||
|
are not needed. Note that having empty rows will not trigger any error
|
||||||
|
though the output result might be 0 or omitted.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of `_TPUSharedEmbeddingColumnV2`.
|
A list of `_TPUSharedEmbeddingColumnV2`.
|
||||||
@ -364,7 +382,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
initializer=initializer,
|
initializer=initializer,
|
||||||
shared_embedding_collection_name=shared_embedding_collection_name,
|
shared_embedding_collection_name=shared_embedding_collection_name,
|
||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn)
|
learning_rate_fn=learning_rate_fn,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
else:
|
else:
|
||||||
column = _TPUSharedDeviceSpecificEmbeddingColumnV2(
|
column = _TPUSharedDeviceSpecificEmbeddingColumnV2(
|
||||||
categorical_column=categorical_column,
|
categorical_column=categorical_column,
|
||||||
@ -375,7 +394,8 @@ def shared_embedding_columns_v2(categorical_columns,
|
|||||||
max_sequence_length=max_sequence_length,
|
max_sequence_length=max_sequence_length,
|
||||||
learning_rate_fn=learning_rate_fn,
|
learning_rate_fn=learning_rate_fn,
|
||||||
embedding_lookup_device=embedding_lookup_device,
|
embedding_lookup_device=embedding_lookup_device,
|
||||||
tensor_core_shape=tensor_core_shape)
|
tensor_core_shape=tensor_core_shape,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
tpu_columns.append(column)
|
tpu_columns.append(column)
|
||||||
|
|
||||||
return tpu_columns
|
return tpu_columns
|
||||||
@ -390,7 +410,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
|||||||
combiner='mean',
|
combiner='mean',
|
||||||
initializer=None,
|
initializer=None,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
return fc_lib.EmbeddingColumn.__new__(
|
return fc_lib.EmbeddingColumn.__new__(
|
||||||
cls,
|
cls,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -400,7 +421,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
|||||||
ckpt_to_load_from=None,
|
ckpt_to_load_from=None,
|
||||||
tensor_name_in_ckpt=None,
|
tensor_name_in_ckpt=None,
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
trainable=True)
|
trainable=True,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
def __getnewargs__(self):
|
def __getnewargs__(self):
|
||||||
return (self._tpu_categorical_column, self.dimension, self.combiner,
|
return (self._tpu_categorical_column, self.dimension, self.combiner,
|
||||||
@ -416,7 +438,8 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn):
|
|||||||
combiner='mean',
|
combiner='mean',
|
||||||
initializer=None,
|
initializer=None,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
_TPUBaseEmbeddingColumn.__init__(
|
_TPUBaseEmbeddingColumn.__init__(
|
||||||
self,
|
self,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
@ -573,13 +596,15 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
|
|||||||
initializer=None,
|
initializer=None,
|
||||||
shared_embedding_collection_name=None,
|
shared_embedding_collection_name=None,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
return fc_lib.SharedEmbeddingColumn.__new__(
|
return fc_lib.SharedEmbeddingColumn.__new__(
|
||||||
cls,
|
cls,
|
||||||
categorical_column,
|
categorical_column,
|
||||||
combiner=combiner,
|
combiner=combiner,
|
||||||
shared_embedding_column_creator=shared_embedding_column_creator,
|
shared_embedding_column_creator=shared_embedding_column_creator,
|
||||||
max_norm=None)
|
max_norm=None,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
def __getnewargs__(self):
|
def __getnewargs__(self):
|
||||||
return (self._tpu_categorical_column, self.shared_embedding_column_creator,
|
return (self._tpu_categorical_column, self.shared_embedding_column_creator,
|
||||||
@ -598,7 +623,8 @@ class _TPUSharedEmbeddingColumnV2(_TPUBaseEmbeddingColumn,
|
|||||||
initializer=None,
|
initializer=None,
|
||||||
shared_embedding_collection_name=None,
|
shared_embedding_collection_name=None,
|
||||||
max_sequence_length=0,
|
max_sequence_length=0,
|
||||||
learning_rate_fn=None):
|
learning_rate_fn=None,
|
||||||
|
use_safe_embedding_lookup=True):
|
||||||
|
|
||||||
_TPUBaseEmbeddingColumn.__init__(
|
_TPUBaseEmbeddingColumn.__init__(
|
||||||
self,
|
self,
|
||||||
|
@ -43,7 +43,7 @@ def _initialized_session():
|
|||||||
return sess
|
return sess
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingColumnTestV2(test.TestCase):
|
class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
def test_defaults(self):
|
def test_defaults(self):
|
||||||
categorical_column = fc_lib.categorical_column_with_identity(
|
categorical_column = fc_lib.categorical_column_with_identity(
|
||||||
@ -77,8 +77,16 @@ class EmbeddingColumnTestV2(test.TestCase):
|
|||||||
'aaa': parsing_ops.VarLenFeature(dtypes.int64)
|
'aaa': parsing_ops.VarLenFeature(dtypes.int64)
|
||||||
}, embedding_column._parse_example_spec)
|
}, embedding_column._parse_example_spec)
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True,
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False,
|
||||||
|
})
|
||||||
@test_util.deprecated_graph_mode_only
|
@test_util.deprecated_graph_mode_only
|
||||||
def test_feature_layer_cpu(self):
|
def test_feature_layer_cpu(self, use_safe_embedding_lookup):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
sparse_input = sparse_tensor.SparseTensorValue(
|
sparse_input = sparse_tensor.SparseTensorValue(
|
||||||
@ -135,12 +143,14 @@ class EmbeddingColumnTestV2(test.TestCase):
|
|||||||
embedding_column = tpu_fc.embedding_column_v2(
|
embedding_column = tpu_fc.embedding_column_v2(
|
||||||
categorical_column,
|
categorical_column,
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer)
|
initializer=_initializer,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
sequence_embedding_column = tpu_fc.embedding_column_v2(
|
sequence_embedding_column = tpu_fc.embedding_column_v2(
|
||||||
sequence_categorical_column,
|
sequence_categorical_column,
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer,
|
initializer=_initializer,
|
||||||
max_sequence_length=2)
|
max_sequence_length=2,
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
# Provide sparse input and get dense result.
|
# Provide sparse input and get dense result.
|
||||||
features = {'aaa': sparse_input, 'bbb': sparse_input}
|
features = {'aaa': sparse_input, 'bbb': sparse_input}
|
||||||
@ -160,6 +170,16 @@ class EmbeddingColumnTestV2(test.TestCase):
|
|||||||
self.assertAllEqual(expected_lookups, embedding_lookup.eval())
|
self.assertAllEqual(expected_lookups, embedding_lookup.eval())
|
||||||
self.assertAllEqual(expected_lookups_sequence,
|
self.assertAllEqual(expected_lookups_sequence,
|
||||||
sequence_embedding_lookup[0].eval())
|
sequence_embedding_lookup[0].eval())
|
||||||
|
# The graph will still have SparseFillEmptyRows due to sequence being
|
||||||
|
# a Rank3 embedding lookup.
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertEqual(2, [
|
||||||
|
x.type for x in ops.get_default_graph().get_operations()
|
||||||
|
].count('SparseFillEmptyRows'))
|
||||||
|
else:
|
||||||
|
self.assertEqual(1, [
|
||||||
|
x.type for x in ops.get_default_graph().get_operations()
|
||||||
|
].count('SparseFillEmptyRows'))
|
||||||
|
|
||||||
def test_deepcopy(self):
|
def test_deepcopy(self):
|
||||||
categorical_column = fc_lib.categorical_column_with_identity(
|
categorical_column = fc_lib.categorical_column_with_identity(
|
||||||
@ -173,7 +193,7 @@ class EmbeddingColumnTestV2(test.TestCase):
|
|||||||
embedding_column_copy._max_sequence_length)
|
embedding_column_copy._max_sequence_length)
|
||||||
|
|
||||||
|
|
||||||
class SharedEmbeddingColumnTestV2(test.TestCase):
|
class SharedEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
@test_util.deprecated_graph_mode_only
|
@test_util.deprecated_graph_mode_only
|
||||||
def test_defaults(self):
|
def test_defaults(self):
|
||||||
@ -238,8 +258,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
|||||||
self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape)
|
self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape)
|
||||||
self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
|
self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
|
||||||
|
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
{
|
||||||
|
'testcase_name': 'use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': True
|
||||||
|
}, {
|
||||||
|
'testcase_name': 'dont_use_safe_embedding_lookup',
|
||||||
|
'use_safe_embedding_lookup': False
|
||||||
|
})
|
||||||
@test_util.deprecated_graph_mode_only
|
@test_util.deprecated_graph_mode_only
|
||||||
def test_feature_layer_cpu(self):
|
def test_feature_layer_cpu(self, use_safe_embedding_lookup):
|
||||||
# Inputs.
|
# Inputs.
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
input_a = sparse_tensor.SparseTensorValue(
|
input_a = sparse_tensor.SparseTensorValue(
|
||||||
@ -296,7 +324,8 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
|||||||
[categorical_column_a, categorical_column_b],
|
[categorical_column_a, categorical_column_b],
|
||||||
dimension=embedding_dimension,
|
dimension=embedding_dimension,
|
||||||
initializer=_initializer,
|
initializer=_initializer,
|
||||||
max_sequence_lengths=[0, 2])
|
max_sequence_lengths=[0, 2],
|
||||||
|
use_safe_embedding_lookup=use_safe_embedding_lookup)
|
||||||
|
|
||||||
# Provide sparse input and get dense result.
|
# Provide sparse input and get dense result.
|
||||||
dense_features = fc_lib.DenseFeatures([embedding_column_a])
|
dense_features = fc_lib.DenseFeatures([embedding_column_a])
|
||||||
@ -315,6 +344,16 @@ class SharedEmbeddingColumnTestV2(test.TestCase):
|
|||||||
self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
|
self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
|
||||||
self.assertAllEqual(expected_lookups_b,
|
self.assertAllEqual(expected_lookups_b,
|
||||||
embedding_lookup_b[0].eval())
|
embedding_lookup_b[0].eval())
|
||||||
|
# The graph will still have SparseFillEmptyRows due to sequence being
|
||||||
|
# a Rank3 embedding lookup.
|
||||||
|
if use_safe_embedding_lookup:
|
||||||
|
self.assertEqual(2, [
|
||||||
|
x.type for x in ops.get_default_graph().get_operations()
|
||||||
|
].count('SparseFillEmptyRows'))
|
||||||
|
else:
|
||||||
|
self.assertEqual(1, [
|
||||||
|
x.type for x in ops.get_default_graph().get_operations()
|
||||||
|
].count('SparseFillEmptyRows'))
|
||||||
|
|
||||||
def test_deepcopy(self):
|
def test_deepcopy(self):
|
||||||
vocabulary_size = 3
|
vocabulary_size = 3
|
||||||
|
@ -26,7 +26,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "embedding_column"
|
name: "embedding_column"
|
||||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "indicator_column"
|
name: "indicator_column"
|
||||||
@ -70,7 +70,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "shared_embedding_columns"
|
name: "shared_embedding_columns"
|
||||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "weighted_categorical_column"
|
name: "weighted_categorical_column"
|
||||||
|
@ -22,7 +22,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "embedding_column"
|
name: "embedding_column"
|
||||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\'], "
|
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "initialize_tpu_system"
|
name: "initialize_tpu_system"
|
||||||
@ -30,7 +30,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "shared_embedding_columns"
|
name: "shared_embedding_columns"
|
||||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'max_sequence_lengths\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "shutdown_tpu_system"
|
name: "shutdown_tpu_system"
|
||||||
|
@ -26,7 +26,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "embedding_column"
|
name: "embedding_column"
|
||||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "indicator_column"
|
name: "indicator_column"
|
||||||
@ -62,7 +62,7 @@ tf_module {
|
|||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "shared_embeddings"
|
name: "shared_embeddings"
|
||||||
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], "
|
argspec: "args=[\'categorical_columns\', \'dimension\', \'combiner\', \'initializer\', \'shared_embedding_collection_name\', \'ckpt_to_load_from\', \'tensor_name_in_ckpt\', \'max_norm\', \'trainable\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'True\'], "
|
||||||
}
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "weighted_categorical_column"
|
name: "weighted_categorical_column"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user