Fix case where embedding column -> use_safe_embedding = false is used with variable partitioning.

PiperOrigin-RevId: 311782693
Change-Id: I38b59943a25adbe77e9f3f01c49a713876cc3f22
This commit is contained in:
A. Unique TensorFlower 2020-05-15 12:40:36 -07:00 committed by TensorFlower Gardener
parent 321d3d9fd0
commit 2db0d85d05
4 changed files with 336 additions and 177 deletions

View File

@ -2546,7 +2546,7 @@ class _EmbeddingColumn(
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse_v2
# Return embedding lookup result.
return embedding_lookup_sparse(
embedding_weights,
@ -2696,7 +2696,7 @@ class _SharedEmbeddingColumn(
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse_v2
# Return embedding lookup result.
return embedding_lookup_sparse(
embedding_weights,

View File

@ -21,6 +21,7 @@ from __future__ import print_function
import collections
import copy
from absl.testing import parameterized
import numpy as np
from tensorflow.core.example import example_pb2
@ -852,9 +853,9 @@ class HashedCategoricalColumnTest(test.TestCase):
'aaa': inputs
}), weight_collections=('my_weights',))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertItemsEqual([], ops.get_collection('my_weights'))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertCountEqual([], ops.get_collection('my_weights'))
@test_util.run_deprecated_v1
def test_get_sparse_tensors_dense_input(self):
@ -1714,10 +1715,10 @@ class LinearModelTest(test.TestCase):
# We check the mapping by checking that we have the right keys,
# and that the values (output_tensors) were indeed the ones used to
# form the input layer.
self.assertItemsEqual(all_cols, cols_to_output_tensors.keys())
self.assertCountEqual(all_cols, cols_to_output_tensors.keys())
input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]]
output_tensors = [tensor for tensor in cols_to_output_tensors.values()]
self.assertItemsEqual(input_layer_inputs, output_tensors)
self.assertCountEqual(input_layer_inputs, output_tensors)
def test_dense_collection(self):
price = fc._numeric_column('price')
@ -2841,7 +2842,7 @@ class FunctionalInputLayerTest(test.TestCase):
cols_to_vars = {}
all_cols = [price1, dense_feature_bucketized, some_embedding_column]
fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
@ -2891,7 +2892,7 @@ class FunctionalInputLayerTest(test.TestCase):
shared_embedding_a, shared_embedding_b
]
fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
@ -2927,7 +2928,7 @@ class FunctionalInputLayerTest(test.TestCase):
'input_from_feature_columns',
partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)):
fc.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
@ -3043,7 +3044,7 @@ class FunctionalInputLayerTest(test.TestCase):
'input_layer/sparse_feature_embedding/embedding_weights:0',
'input_layer_1/sparse_feature_embedding/embedding_weights:0'
]
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
@ -3077,7 +3078,7 @@ class FunctionalInputLayerTest(test.TestCase):
# Make sure that only 1 variable gets created in this case.
self.assertEqual(1, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
self.assertItemsEqual(
self.assertCountEqual(
['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
@ -3129,7 +3130,7 @@ class FunctionalInputLayerTest(test.TestCase):
# Make sure that only 1 variable gets created in this case.
self.assertEqual(1, len(
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
self.assertItemsEqual(
self.assertCountEqual(
['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
@ -3618,9 +3619,9 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
'aaa': inputs
}), weight_collections=('my_weights',))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertItemsEqual([], ops.get_collection('my_weights'))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertCountEqual([], ops.get_collection('my_weights'))
@test_util.run_deprecated_v1
def test_get_sparse_tensors_dense_input(self):
@ -4058,9 +4059,9 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
'aaa': inputs
}), weight_collections=('my_weights',))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertItemsEqual([], ops.get_collection('my_weights'))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertCountEqual([], ops.get_collection('my_weights'))
@test_util.run_deprecated_v1
def test_get_sparse_tensors_dense_input(self):
@ -4363,9 +4364,9 @@ class IdentityCategoricalColumnTest(test.TestCase):
'aaa': inputs
}), weight_collections=('my_weights',))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertItemsEqual([], ops.get_collection('my_weights'))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
self.assertCountEqual([], ops.get_collection('my_weights'))
@test_util.run_deprecated_v1
def test_get_sparse_tensors_dense_input(self):
@ -4820,7 +4821,7 @@ class IndicatorColumnTest(test.TestCase):
self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net))
class EmbeddingColumnTest(test.TestCase):
class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@test_util.run_deprecated_v1
def test_defaults(self):
@ -4956,10 +4957,29 @@ class EmbeddingColumnTest(test.TestCase):
_assert_sparse_tensor_value(self, self.evaluate(output_a),
self.evaluate(output_embedded))
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_get_dense_tensor(self):
def test_get_dense_tensor(self, use_safe_embedding_lookup,
partition_variables):
# Inputs.
vocabulary_size = 3
vocabulary_size = 4
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
@ -4974,12 +4994,20 @@ class EmbeddingColumnTest(test.TestCase):
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info):
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
@ -4997,25 +5025,43 @@ class EmbeddingColumnTest(test.TestCase):
# Build columns.
categorical_column = fc._categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column = fc._embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer)
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
embedding_lookup = embedding_column._get_dense_tensor(
_LazyBuilder({
'aaa': sparse_input
}))
_LazyBuilder({'aaa': sparse_input}))
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
if partition_variables:
self.assertCountEqual(('vars/embedding_weights/part_0:0',
'vars/embedding_weights/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(('vars/embedding_weights:0',),
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@test_util.run_deprecated_v1
def test_get_dense_tensor_3d(self):
# Inputs.
@ -5072,7 +5118,7 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
@ -5102,11 +5148,11 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
my_vars = ops.get_collection('my_vars')
self.assertItemsEqual(
('embedding_weights:0',), tuple([v.name for v in my_vars]))
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in my_vars]))
@test_util.run_deprecated_v1
def test_get_dense_tensor_placeholder_inputs(self):
@ -5169,8 +5215,8 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
('embedding_weights:0',), tuple([v.name for v in global_vars]))
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
self.assertAllEqual(expected_lookups, embedding_lookup.eval(
@ -5233,8 +5279,8 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
('embedding_weights:0',), tuple([v.name for v in global_vars]))
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup))
@ -5280,14 +5326,14 @@ class EmbeddingColumnTest(test.TestCase):
'linear_model/aaa_embedding/weights:0',
'linear_model/aaa_embedding/embedding_weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v for v in ops.get_collection(
ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_embedding/embedding_weights:0']
@ -5361,14 +5407,14 @@ class EmbeddingColumnTest(test.TestCase):
'linear_model/aaa_embedding/weights:0',
'linear_model/aaa_embedding/embedding_weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_embedding/embedding_weights:0']
@ -5450,12 +5496,10 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
('input_layer/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
self.assertItemsEqual(
('input_layer/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in trainable_vars]))
with _initialized_session():
self.assertAllEqual(embedding_values, trainable_vars[0].eval())
@ -5513,17 +5557,16 @@ class EmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
('input_layer/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.assertItemsEqual(
[], ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
with _initialized_session():
self.assertAllEqual(embedding_values, global_vars[0].eval())
self.assertAllEqual(expected_lookups, self.evaluate(input_layer))
class SharedEmbeddingColumnTest(test.TestCase):
class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@test_util.run_deprecated_v1
def test_defaults(self):
@ -5772,33 +5815,59 @@ class SharedEmbeddingColumnTest(test.TestCase):
_assert_sparse_tensor_value(self, self.evaluate(output_b),
self.evaluate(output_b_embedded))
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_get_dense_tensor(self):
def test_get_dense_tensor(self, use_safe_embedding_lookup,
partition_variables):
# Inputs.
vocabulary_size = 3
vocabulary_size = 4
# -1 values are ignored.
input_a = np.array(
[[2, -1, -1], # example 0, ids [2]
[0, 1, -1]]) # example 1, ids [0, 1]
input_b = np.array(
[[0, -1, -1], # example 0, ids [0]
[-1, -1, -1]]) # example 1, ids []
input_features = {
'aaa': input_a,
'bbb': input_b
}
input_a = np.array([
[2, -1, -1], # example 0, ids [2]
[0, 1, -1]
]) # example 1, ids [0, 1]
input_b = np.array([
[0, -1, -1], # example 0, ids [0]
[-1, -1, -1]
]) # example 1, ids []
input_features = {'aaa': input_a, 'bbb': input_b}
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info):
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
@ -5808,39 +5877,66 @@ class SharedEmbeddingColumnTest(test.TestCase):
# example 1:
(2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
)
if use_safe_embedding_lookup:
expected_lookups_b = (
# example 0:
(1., 2.), # ids [0], embedding = [1, 2]
# example 1:
(0., 0.), # ids [], embedding = [0, 0]
)
else:
expected_lookups_b = (
# example 0:
(1., 2.), # ids [0], embedding = [1, 2]
)
# Build columns.
categorical_column_a = fc._categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc._categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer)
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a._get_dense_tensor(
_LazyBuilder(input_features))
embedding_lookup_b = embedding_column_b._get_dense_tensor(
_LazyBuilder(input_features))
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
if partition_variables:
self.assertCountEqual(('vars/embedding_weights/part_0:0',
'vars/embedding_weights/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(('vars/embedding_weights:0',),
tuple([v.name for v in global_vars]))
embedding_var = global_vars[0]
with _initialized_session():
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(embedding_var))
self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a))
self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@test_util.run_deprecated_v1
def test_get_dense_tensor_weight_collections(self):
# Inputs.
@ -5886,11 +5982,11 @@ class SharedEmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
self.assertCountEqual(
('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',),
tuple(v.name for v in global_vars))
my_vars = ops.get_collection('my_vars')
self.assertItemsEqual(
self.assertCountEqual(
('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',),
tuple(v.name for v in my_vars))
@ -5997,14 +6093,14 @@ class SharedEmbeddingColumnTest(test.TestCase):
'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
'linear_model/aaa_bbb_shared_embedding_1/weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v for v in ops.get_collection(
ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
@ -6091,14 +6187,14 @@ class SharedEmbeddingColumnTest(test.TestCase):
'linear_model/aaa_bbb_shared_embedding/embedding_weights:0',
'linear_model/aaa_bbb_shared_embedding_1/weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_bbb_shared_embedding/embedding_weights:0']
@ -6195,16 +6291,16 @@ class SharedEmbeddingColumnTest(test.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
self.assertCountEqual(
['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
tuple([v.name for v in global_vars]))
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if trainable:
self.assertItemsEqual(
self.assertCountEqual(
['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'],
tuple([v.name for v in trainable_vars]))
else:
self.assertItemsEqual([], tuple([v.name for v in trainable_vars]))
self.assertCountEqual([], tuple([v.name for v in trainable_vars]))
shared_embedding_vars = global_vars
with _initialized_session():
self.assertAllEqual(embedding_values, shared_embedding_vars[0].eval())

View File

@ -3263,7 +3263,7 @@ class EmbeddingColumn(
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse_v2
# Return embedding lookup result.
return embedding_lookup_sparse(
embedding_weights,
@ -3558,7 +3558,7 @@ class SharedEmbeddingColumn(
embedding_lookup_sparse = embedding_ops.safe_embedding_lookup_sparse
if (not self.use_safe_embedding_lookup and sparse_id_rank is not None and
sparse_id_rank <= 2):
embedding_lookup_sparse = (embedding_ops.embedding_lookup_sparse)
embedding_lookup_sparse = embedding_ops.embedding_lookup_sparse_v2
# Return embedding lookup result.
return embedding_lookup_sparse(
embedding_weights,

View File

@ -2087,7 +2087,7 @@ class LinearModelTest(test.TestCase):
for var in model.variables:
self.assertIsInstance(var, variables_lib.VariableV1)
variable_names = [var.name for var in model.variables]
self.assertItemsEqual([
self.assertCountEqual([
'linear_model/dense_feature_bucketized/weights:0',
'linear_model/price1/weights:0',
'linear_model/sparse_feature_embedding/embedding_weights:0',
@ -2731,10 +2731,10 @@ class OldLinearModelTest(test.TestCase):
# We check the mapping by checking that we have the right keys,
# and that the values (output_tensors) were indeed the ones used to
# form the input layer.
self.assertItemsEqual(all_cols, cols_to_output_tensors.keys())
self.assertCountEqual(all_cols, cols_to_output_tensors.keys())
input_layer_inputs = [tensor for tensor in input_layer.op.inputs[:-1]]
output_tensors = [tensor for tensor in cols_to_output_tensors.values()]
self.assertItemsEqual(input_layer_inputs, output_tensors)
self.assertCountEqual(input_layer_inputs, output_tensors)
def test_dense_collection(self):
price = fc.numeric_column('price')
@ -3411,7 +3411,7 @@ class FunctionalInputLayerTest(test.TestCase):
cols_to_vars = {}
all_cols = [price1, dense_feature_bucketized, some_embedding_column]
fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
@ -3461,7 +3461,7 @@ class FunctionalInputLayerTest(test.TestCase):
shared_embedding_a, shared_embedding_b
]
fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(1, len(cols_to_vars[some_embedding_column]))
@ -3497,7 +3497,7 @@ class FunctionalInputLayerTest(test.TestCase):
'input_from_feature_columns',
partitioner=partitioned_variables.fixed_size_partitioner(3, axis=0)):
fc_old.input_layer(features, all_cols, cols_to_vars=cols_to_vars)
self.assertItemsEqual(list(cols_to_vars.keys()), all_cols)
self.assertCountEqual(list(cols_to_vars.keys()), all_cols)
self.assertEqual(0, len(cols_to_vars[price1]))
self.assertEqual(0, len(cols_to_vars[dense_feature_bucketized]))
self.assertEqual(3, len(cols_to_vars[some_embedding_column]))
@ -3616,7 +3616,7 @@ class FunctionalInputLayerTest(test.TestCase):
'input_layer/sparse_feature_embedding/embedding_weights:0',
'input_layer_1/sparse_feature_embedding/embedding_weights:0'
]
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
@ -5904,7 +5904,7 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -5968,7 +5968,7 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6036,7 +6036,7 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6109,7 +6109,7 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6180,7 +6180,7 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('embedding_weights:0',),
self.assertCountEqual(('embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6230,14 +6230,14 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
'linear_model/aaa_embedding/weights:0',
'linear_model/aaa_embedding/embedding_weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_embedding/embedding_weights:0']
@ -6274,15 +6274,25 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_dense_features(self, use_safe_embedding_lookup):
def test_dense_features(self, use_safe_embedding_lookup, partition_variables):
# Inputs.
vocabulary_size = 3
vocabulary_size = 4
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
@ -6297,13 +6307,20 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
@ -6321,6 +6338,10 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Build columns.
categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
@ -6333,12 +6354,26 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',),
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',),
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in trainable_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in trainable_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6410,9 +6445,9 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('dense_features/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.assertItemsEqual([],
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
self.evaluate(variables_lib.global_variables_initializer())
@ -6475,10 +6510,10 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
self.assertItemsEqual(('input_layer/aaa_embedding/embedding_weights:0',),
self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in trainable_vars]))
self.evaluate(variables_lib.global_variables_initializer())
@ -6528,14 +6563,14 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
'linear_model/aaa_embedding/weights:0',
'linear_model/aaa_embedding/embedding_weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_embedding/embedding_weights:0']
@ -6610,14 +6645,14 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
'linear_model/aaa_embedding/weights:0',
'linear_model/aaa_embedding/embedding_weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars[
'linear_model/aaa_embedding/embedding_weights:0']
@ -6972,15 +7007,26 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_get_dense_tensor(self, use_safe_embedding_lookup):
def test_get_dense_tensor(self, use_safe_embedding_lookup,
partition_variables):
# Inputs.
vocabulary_size = 3
vocabulary_size = 4
# -1 values are ignored.
input_a = np.array([
[2, -1, -1], # example 0, ids [2]
@ -6997,13 +7043,20 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
@ -7031,12 +7084,17 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
embedding_lookup_a = embedding_column_a.get_dense_tensor(
fc.FeatureTransformationCache(input_features), None)
@ -7045,7 +7103,12 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(('aaa_bbb_shared_embedding:0',),
if partition_variables:
self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0',
'vars/aaa_bbb_shared_embedding/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',),
tuple([v.name for v in global_vars]))
embedding_var = global_vars[0]
@ -7279,14 +7342,14 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
'aaa_bbb_shared_embedding:0',
'linear_model/bbb_shared_embedding/weights:0',
)
self.assertItemsEqual(
self.assertCountEqual(
expected_var_names,
[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])
trainable_vars = {
v.name: v
for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
}
self.assertItemsEqual(expected_var_names, trainable_vars.keys())
self.assertCountEqual(expected_var_names, trainable_vars.keys())
bias = trainable_vars['linear_model/bias_weights:0']
embedding_weights = trainable_vars['aaa_bbb_shared_embedding:0']
linear_weights_a = trainable_vars[
@ -7420,18 +7483,18 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertItemsEqual(
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if trainable:
self.assertItemsEqual(
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in trainable_vars]))
else:
self.assertItemsEqual([], tuple([v.name for v in trainable_vars]))
self.assertCountEqual([], tuple([v.name for v in trainable_vars]))
shared_embedding_vars = global_vars
self.evaluate(variables_lib.global_variables_initializer())