Move tf.keras.layers.featureDenseFeature back to Keras package.

PiperOrigin-RevId: 312201284
Change-Id: I8e51198c62a8e79ef493a173d7f4f8ab65f300eb
This commit is contained in:
Scott Zhu 2020-05-18 19:27:50 -07:00 committed by TensorFlower Gardener
parent efd77d2e45
commit d98a0e6017
17 changed files with 545 additions and 498 deletions

View File

@ -55,8 +55,6 @@ py_library(
py_library(
name = "feature_column_v2",
srcs = [
"dense_features.py",
"dense_features_v2.py",
"feature_column_v2.py",
"sequence_feature_column.py",
"serialization.py",
@ -126,15 +124,6 @@ tf_py_test(
],
)
tf_py_test(
name = "dense_features_test",
srcs = ["dense_features_test.py"],
tags = ["no_pip"],
deps = [
":feature_column_test_main_lib",
],
)
py_library(
name = "feature_column_test_main_lib",
srcs = ["feature_column_test.py"],
@ -177,15 +166,6 @@ tf_py_test(
deps = [":feature_column_v2_test_main_lib"],
)
tf_py_test(
name = "dense_features_v2_test",
srcs = ["dense_features_v2_test.py"],
tags = ["no_pip"],
deps = [
":feature_column_v2_test_main_lib",
],
)
py_library(
name = "feature_column_v2_test_main_lib",
srcs = ["feature_column_v2_test.py"],

View File

@ -19,13 +19,13 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,line-too-long,wildcard-import,g-bad-import-order
# We import dense_features_v2 first so that the V1 DenseFeatures is the default
# if users directly import feature_column_lib.
from tensorflow.python.feature_column.dense_features_v2 import *
from tensorflow.python.feature_column.dense_features import *
from tensorflow.python.feature_column.feature_column import *
from tensorflow.python.feature_column.feature_column_v2 import *
from tensorflow.python.feature_column.sequence_feature_column import *
from tensorflow.python.feature_column.serialization import *
# We import dense_features_v2 first so that the V1 DenseFeatures is the default
# if users directly import feature_column_lib.
from tensorflow.python.keras.feature_column.dense_features_v2 import *
from tensorflow.python.keras.feature_column.dense_features import *
from tensorflow.python.keras.feature_column.sequence_feature_column import *
# pylint: enable=unused-import,line-too-long

View File

@ -31,7 +31,6 @@ from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.client import session
from tensorflow.python.eager import backprop
from tensorflow.python.eager import context
from tensorflow.python.feature_column import dense_features as df
from tensorflow.python.feature_column import feature_column as fc_old
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column import serialization
@ -5582,23 +5581,6 @@ class IndicatorColumnTest(test.TestCase):
self.evaluate(weight_var.assign([[1.], [2.], [3.], [4.]]))
self.assertAllClose([[2. + 3.]], self.evaluate(predictions))
@test_util.run_deprecated_v1
def test_dense_features(self):
animal = fc.indicator_column(
fc.categorical_column_with_identity('animal', num_buckets=4))
with ops.Graph().as_default():
features = {
'animal':
sparse_tensor.SparseTensor(
indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
}
net = df.DenseFeatures([animal])(features)
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net))
@test_util.run_deprecated_v1
def test_input_layer(self):
animal = fc.indicator_column(
@ -6271,191 +6253,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
self.assertAllClose(((94.,), (29.,), (0.,), (42.,)),
self.evaluate(predictions))
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_dense_features(self, use_safe_embedding_lookup, partition_variables):
# Inputs.
vocabulary_size = 4
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
# example 2, ids []
# example 3, ids [1]
indices=((0, 0), (1, 0), (1, 4), (3, 0)),
values=(2, 0, 1, 1),
dense_shape=(4, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0, ids [2], embedding = [7, 11]
(7., 11.),
# example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
(2., 3.5),
# example 2, ids [], embedding = [0, 0]
(0., 0.),
# example 3, ids [1], embedding = [3, 5]
(3., 5.),
)
# Build columns.
categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
l = df.DenseFeatures((embedding_column,))
dense_features = l({'aaa': sparse_input})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in trainable_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in trainable_vars]))
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@test_util.run_deprecated_v1
def test_dense_features_not_trainable(self):
# Inputs.
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
# example 2, ids []
# example 3, ids [1]
indices=((0, 0), (1, 0), (1, 4), (3, 0)),
values=(2, 0, 1, 1),
dense_shape=(4, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info=None):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0, ids [2], embedding = [7, 11]
(7., 11.),
# example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
(2., 3.5),
# example 2, ids [], embedding = [0, 0]
(0., 0.),
# example 3, ids [1], embedding = [3, 5]
(3., 5.),
)
# Build columns.
categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer,
trainable=False)
# Provide sparse input and get dense result.
dense_features = df.DenseFeatures((embedding_column,))({
'aaa': sparse_input
})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(global_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
@test_util.run_deprecated_v1
def test_input_layer(self):
# Inputs.
@ -7389,129 +7186,6 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
# = [3*1 + 5*2, 3*0 +5*0] = [13, 0]
self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions))
def _test_dense_features(self, trainable=True):
# Inputs.
vocabulary_size = 3
sparse_input_a = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 4)),
values=(2, 0, 1),
dense_shape=(2, 5))
sparse_input_b = sparse_tensor.SparseTensorValue(
# example 0, ids [0]
# example 1, ids []
indices=((0, 0),),
values=(0,),
dense_shape=(2, 5))
sparse_input_c = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 1), (1, 1), (1, 3)),
values=(2, 0, 1),
dense_shape=(2, 5))
sparse_input_d = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids []
indices=((0, 1),),
values=(2,),
dense_shape=(2, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info=None):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0:
# A ids [2], embedding = [7, 11]
# B ids [0], embedding = [1, 2]
# C ids [2], embedding = [7, 11]
# D ids [2], embedding = [7, 11]
(7., 11., 1., 2., 7., 11., 7., 11.),
# example 1:
# A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
# B ids [], embedding = [0, 0]
# C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
# D ids [], embedding = [0, 0]
(2., 3.5, 0., 0., 2., 3.5, 0., 0.),
)
# Build columns.
categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
categorical_column_c = fc.categorical_column_with_identity(
key='ccc', num_buckets=vocabulary_size)
categorical_column_d = fc.categorical_column_with_identity(
key='ddd', num_buckets=vocabulary_size)
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer,
trainable=trainable)
embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2(
[categorical_column_c, categorical_column_d],
dimension=embedding_dimension,
initializer=_initializer,
trainable=trainable)
features = {
'aaa': sparse_input_a,
'bbb': sparse_input_b,
'ccc': sparse_input_c,
'ddd': sparse_input_d
}
# Provide sparse input and get dense result.
dense_features = df.DenseFeatures(
feature_columns=(embedding_column_b, embedding_column_a,
embedding_column_c, embedding_column_d))(
features)
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if trainable:
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in trainable_vars]))
else:
self.assertCountEqual([], tuple([v.name for v in trainable_vars]))
shared_embedding_vars = global_vars
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values,
self.evaluate(shared_embedding_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
@test_util.run_deprecated_v1
def test_dense_features(self):
self._test_dense_features()
@test_util.run_deprecated_v1
def test_dense_features_no_trainable(self):
self._test_dense_features(trainable=False)
@test_util.run_deprecated_v1
def test_serialization(self):

View File

@ -23,12 +23,12 @@ import numpy as np
from tensorflow.python import keras
from tensorflow.python import tf2
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.feature_column import dense_features_v2
from tensorflow.python.feature_column import feature_column_lib as fc
from tensorflow.python.feature_column import feature_column_v2
from tensorflow.python.keras import keras_parameterized
from tensorflow.python.keras import metrics as metrics_module
from tensorflow.python.keras import testing_utils
from tensorflow.python.keras.feature_column import dense_features_v2
from tensorflow.python.keras.optimizer_v2 import gradient_descent
from tensorflow.python.keras.premade import linear
from tensorflow.python.keras.premade import wide_deep

View File

@ -24,7 +24,6 @@ from absl.testing import parameterized
import numpy as np
from tensorflow.python.client import session
from tensorflow.python.feature_column import dense_features
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column import sequence_feature_column as sfc
from tensorflow.python.feature_column import serialization
@ -111,54 +110,6 @@ class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase):
sfc.concatenate_context_input(context_input, seq_input)
@test_util.run_all_in_graph_and_eager_modes
class DenseFeaturesTest(test.TestCase):
"""Tests DenseFeatures with sequence feature columns."""
def test_embedding_column(self):
"""Tests that error is raised for sequence embedding column."""
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 1)),
values=(2, 0, 1),
dense_shape=(2, 2))
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column_a = fc.embedding_column(
categorical_column_a, dimension=2)
input_layer = dense_features.DenseFeatures([embedding_column_a])
with self.assertRaisesRegexp(
ValueError,
r'In embedding_column: aaa_embedding\. categorical_column must not be '
r'of type SequenceCategoricalColumn\.'):
_ = input_layer({'aaa': sparse_input})
def test_indicator_column(self):
"""Tests that error is raised for sequence indicator column."""
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 1)),
values=(2, 0, 1),
dense_shape=(2, 2))
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
indicator_column_a = fc.indicator_column(categorical_column_a)
input_layer = dense_features.DenseFeatures([indicator_column_a])
with self.assertRaisesRegexp(
ValueError,
r'In indicator_column: aaa_indicator\. categorical_column must not be '
r'of type SequenceCategoricalColumn\.'):
_ = input_layer({'aaa': sparse_input})
def _assert_sparse_tensor_value(test_case, expected, actual):
_assert_sparse_tensor_indices_shape(test_case, expected, actual)

View File

@ -20,7 +20,6 @@ from __future__ import print_function
from absl.testing import parameterized
from tensorflow.python.feature_column import dense_features
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column import serialization
from tensorflow.python.framework import test_util
@ -114,71 +113,6 @@ class FeatureColumnSerializationTest(test.TestCase):
self.assertIs(new_price.normalizer_fn, _custom_fn)
@test_util.run_all_in_graph_and_eager_modes
class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('default', None, None),
('trainable', True, 'trainable'),
('not_trainable', False, 'frozen'))
def test_get_config(self, trainable, name):
cols = [fc.numeric_column('a'),
fc.embedding_column(fc.categorical_column_with_identity(
key='b', num_buckets=3), dimension=2)]
orig_layer = dense_features.DenseFeatures(
cols, trainable=trainable, name=name)
config = orig_layer.get_config()
self.assertEqual(config['name'], orig_layer.name)
self.assertEqual(config['trainable'], trainable)
self.assertLen(config['feature_columns'], 2)
self.assertEqual(
config['feature_columns'][0]['class_name'], 'NumericColumn')
self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
self.assertEqual(
config['feature_columns'][1]['class_name'], 'EmbeddingColumn')
@parameterized.named_parameters(
('default', None, None),
('trainable', True, 'trainable'),
('not_trainable', False, 'frozen'))
def test_from_config(self, trainable, name):
cols = [fc.numeric_column('a'),
fc.embedding_column(fc.categorical_column_with_vocabulary_list(
'b', vocabulary_list=['1', '2', '3']), dimension=2),
fc.indicator_column(fc.categorical_column_with_hash_bucket(
key='c', hash_bucket_size=3))]
orig_layer = dense_features.DenseFeatures(
cols, trainable=trainable, name=name)
config = orig_layer.get_config()
new_layer = dense_features.DenseFeatures.from_config(config)
self.assertEqual(new_layer.name, orig_layer.name)
self.assertEqual(new_layer.trainable, trainable)
self.assertLen(new_layer._feature_columns, 3)
self.assertEqual(new_layer._feature_columns[0].name, 'a')
self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0)
self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b')
self.assertIsInstance(new_layer._feature_columns[2], fc.IndicatorColumn)
def test_crossed_column(self):
a = fc.categorical_column_with_vocabulary_list(
'a', vocabulary_list=['1', '2', '3'])
b = fc.categorical_column_with_vocabulary_list(
'b', vocabulary_list=['1', '2', '3'])
ab = fc.crossed_column([a, b], hash_bucket_size=2)
cols = [fc.indicator_column(ab)]
orig_layer = dense_features.DenseFeatures(cols)
config = orig_layer.get_config()
new_layer = dense_features.DenseFeatures.from_config(config)
self.assertLen(new_layer._feature_columns, 1)
self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
@test_util.run_all_in_graph_and_eager_modes
class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase):

View File

@ -12,11 +12,88 @@ exports_files(["LICENSE"])
py_library(
name = "feature_column",
srcs = ["__init__.py"],
deps = [
":dense_features",
":dense_features_v2",
":sequence_feature_column",
],
)
py_library(
name = "dense_features",
srcs = [
"dense_features.py",
],
deps = [
"//tensorflow/python:framework_ops",
"//tensorflow/python:tf_export",
"//tensorflow/python:util",
"//tensorflow/python/feature_column:feature_column_v2",
"//tensorflow/python/keras:backend",
],
)
py_library(
name = "dense_features_v2",
srcs = [
"dense_features_v2.py",
],
deps = [
":dense_features",
"//tensorflow/python:framework_ops",
"//tensorflow/python:tf_export",
"//tensorflow/python/feature_column:feature_column_v2",
],
)
tf_py_test(
name = "dense_features_test",
srcs = ["dense_features_test.py"],
tags = ["no_pip"],
deps = [
":dense_features",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:constant_op",
"//tensorflow/python:dtypes",
"//tensorflow/python:errors",
"//tensorflow/python:framework_ops",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:lookup_ops",
"//tensorflow/python:partitioned_variables",
"//tensorflow/python:session",
"//tensorflow/python:sparse_tensor",
"//tensorflow/python:variables",
"//tensorflow/python/eager:backprop",
"//tensorflow/python/eager:context",
"//tensorflow/python/feature_column:feature_column_v2",
],
)
tf_py_test(
name = "dense_features_v2_test",
srcs = ["dense_features_v2_test.py"],
tags = ["no_pip"],
deps = [
":dense_features_v2",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:constant_op",
"//tensorflow/python:dtypes",
"//tensorflow/python:errors",
"//tensorflow/python:framework_ops",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:lookup_ops",
"//tensorflow/python:session",
"//tensorflow/python:sparse_tensor",
"//tensorflow/python:variables",
"//tensorflow/python/eager:backprop",
"//tensorflow/python/eager:context",
"//tensorflow/python/feature_column:feature_column_v2",
],
)
py_library(
name = "sequence_feature_column",
srcs = ["sequence_feature_column.py"],
@ -59,6 +136,7 @@ py_test(
srcs_version = "PY2AND3",
tags = ["no_pip"],
deps = [
":dense_features",
":sequence_feature_column",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework_test_lib",

View File

@ -23,7 +23,6 @@ import json
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.framework import ops
from tensorflow.python.keras import backend
from tensorflow.python.keras.layers import serialization as layer_serialization
from tensorflow.python.util import serialization
from tensorflow.python.util.tf_export import keras_export
@ -173,7 +172,3 @@ class DenseFeatures(fc._BaseFeaturesLayer): # pylint: disable=protected-access
cols_to_output_tensors[column] = processed_tensors
output_tensors.append(processed_tensors)
return self._verify_and_concat_tensors(output_tensors)
layer_serialization.inject_feature_column_v1_objects(
'DenseFeatures', DenseFeatures)

View File

@ -18,22 +18,25 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import numpy as np
from tensorflow.python.client import session
from tensorflow.python.eager import backprop
from tensorflow.python.eager import context
from tensorflow.python.feature_column import dense_features as df
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column import sequence_feature_column as sfc
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
from tensorflow.python.keras.feature_column import dense_features as df
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables as variables_lib
from tensorflow.python.platform import test
@ -676,5 +679,452 @@ class DenseFeaturesTest(test.TestCase):
sess.run(net, feed_dict={features['price']: np.array(1)})
class IndicatorColumnTest(test.TestCase):
@test_util.run_deprecated_v1
def test_dense_features(self):
animal = fc.indicator_column(
fc.categorical_column_with_identity('animal', num_buckets=4))
with ops.Graph().as_default():
features = {
'animal':
sparse_tensor.SparseTensor(
indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
}
net = df.DenseFeatures([animal])(features)
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net))
class EmbeddingColumnTest(test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
{
'testcase_name': 'use_safe_embedding_lookup',
'use_safe_embedding_lookup': True,
'partition_variables': False,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup',
'use_safe_embedding_lookup': False,
'partition_variables': False,
}, {
'testcase_name': 'use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': True,
'partition_variables': True,
}, {
'testcase_name': 'dont_use_safe_embedding_lookup_partitioned',
'use_safe_embedding_lookup': False,
'partition_variables': True,
})
@test_util.run_deprecated_v1
def test_dense_features(self, use_safe_embedding_lookup, partition_variables):
# Inputs.
vocabulary_size = 4
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
# example 2, ids []
# example 3, ids [1]
indices=((0, 0), (1, 0), (1, 4), (3, 0)),
values=(2, 0, 1, 1),
dense_shape=(4, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.), # id 2
(9., 13.) # id 3
)
def _initializer(shape, dtype, partition_info=None):
if partition_variables:
self.assertEqual([vocabulary_size, embedding_dimension],
partition_info.full_shape)
self.assertAllEqual((2, embedding_dimension), shape)
else:
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertIsNone(partition_info)
self.assertEqual(dtypes.float32, dtype)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0, ids [2], embedding = [7, 11]
(7., 11.),
# example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
(2., 3.5),
# example 2, ids [], embedding = [0, 0]
(0., 0.),
# example 3, ids [1], embedding = [3, 5]
(3., 5.),
)
# Build columns.
categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
partitioner = None
if partition_variables:
partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0)
with variable_scope.variable_scope('vars', partitioner=partitioner):
embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer,
use_safe_embedding_lookup=use_safe_embedding_lookup)
# Provide sparse input and get dense result.
l = df.DenseFeatures((embedding_column,))
dense_features = l({'aaa': sparse_input})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in global_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if partition_variables:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights/part_0:0',
'vars/dense_features/aaa_embedding/embedding_weights/part_1:0'),
tuple([v.name for v in trainable_vars]))
else:
self.assertCountEqual(
('vars/dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in trainable_vars]))
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
if use_safe_embedding_lookup:
self.assertIn('SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
else:
self.assertNotIn(
'SparseFillEmptyRows',
[x.type for x in ops.get_default_graph().get_operations()])
@test_util.run_deprecated_v1
def test_dense_features_not_trainable(self):
# Inputs.
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
# example 2, ids []
# example 3, ids [1]
indices=((0, 0), (1, 0), (1, 4), (3, 0)),
values=(2, 0, 1, 1),
dense_shape=(4, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info=None):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0, ids [2], embedding = [7, 11]
(7., 11.),
# example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
(2., 3.5),
# example 2, ids [], embedding = [0, 0]
(0., 0.),
# example 3, ids [1], embedding = [3, 5]
(3., 5.),
)
# Build columns.
categorical_column = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column = fc.embedding_column(
categorical_column,
dimension=embedding_dimension,
initializer=_initializer,
trainable=False)
# Provide sparse input and get dense result.
dense_features = df.DenseFeatures((embedding_column,))({
'aaa': sparse_input
})
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertCountEqual(('dense_features/aaa_embedding/embedding_weights:0',),
tuple([v.name for v in global_vars]))
self.assertCountEqual([],
ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values, self.evaluate(global_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase):
def _test_dense_features(self, trainable=True):
# Inputs.
vocabulary_size = 3
sparse_input_a = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 4)),
values=(2, 0, 1),
dense_shape=(2, 5))
sparse_input_b = sparse_tensor.SparseTensorValue(
# example 0, ids [0]
# example 1, ids []
indices=((0, 0),),
values=(0,),
dense_shape=(2, 5))
sparse_input_c = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 1), (1, 1), (1, 3)),
values=(2, 0, 1),
dense_shape=(2, 5))
sparse_input_d = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids []
indices=((0, 1),),
values=(2,),
dense_shape=(2, 5))
# Embedding variable.
embedding_dimension = 2
embedding_values = (
(1., 2.), # id 0
(3., 5.), # id 1
(7., 11.) # id 2
)
def _initializer(shape, dtype, partition_info=None):
self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
self.assertEqual(dtypes.float32, dtype)
self.assertIsNone(partition_info)
return embedding_values
# Expected lookup result, using combiner='mean'.
expected_lookups = (
# example 0:
# A ids [2], embedding = [7, 11]
# B ids [0], embedding = [1, 2]
# C ids [2], embedding = [7, 11]
# D ids [2], embedding = [7, 11]
(7., 11., 1., 2., 7., 11., 7., 11.),
# example 1:
# A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
# B ids [], embedding = [0, 0]
# C ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
# D ids [], embedding = [0, 0]
(2., 3.5, 0., 0., 2., 3.5, 0., 0.),
)
# Build columns.
categorical_column_a = fc.categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
categorical_column_b = fc.categorical_column_with_identity(
key='bbb', num_buckets=vocabulary_size)
categorical_column_c = fc.categorical_column_with_identity(
key='ccc', num_buckets=vocabulary_size)
categorical_column_d = fc.categorical_column_with_identity(
key='ddd', num_buckets=vocabulary_size)
embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2(
[categorical_column_a, categorical_column_b],
dimension=embedding_dimension,
initializer=_initializer,
trainable=trainable)
embedding_column_c, embedding_column_d = fc.shared_embedding_columns_v2(
[categorical_column_c, categorical_column_d],
dimension=embedding_dimension,
initializer=_initializer,
trainable=trainable)
features = {
'aaa': sparse_input_a,
'bbb': sparse_input_b,
'ccc': sparse_input_c,
'ddd': sparse_input_d
}
# Provide sparse input and get dense result.
dense_features = df.DenseFeatures(
feature_columns=(embedding_column_b, embedding_column_a,
embedding_column_c, embedding_column_d))(
features)
# Assert expected embedding variable and lookups.
global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in global_vars]))
for v in global_vars:
self.assertIsInstance(v, variables_lib.Variable)
trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
if trainable:
self.assertCountEqual(
['aaa_bbb_shared_embedding:0', 'ccc_ddd_shared_embedding:0'],
tuple([v.name for v in trainable_vars]))
else:
self.assertCountEqual([], tuple([v.name for v in trainable_vars]))
shared_embedding_vars = global_vars
self.evaluate(variables_lib.global_variables_initializer())
self.evaluate(lookup_ops.tables_initializer())
self.assertAllEqual(embedding_values,
self.evaluate(shared_embedding_vars[0]))
self.assertAllEqual(expected_lookups, self.evaluate(dense_features))
@test_util.run_deprecated_v1
def test_dense_features(self):
self._test_dense_features()
@test_util.run_deprecated_v1
def test_dense_features_no_trainable(self):
self._test_dense_features(trainable=False)
@test_util.run_all_in_graph_and_eager_modes
class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('default', None, None),
('trainable', True, 'trainable'),
('not_trainable', False, 'frozen'))
def test_get_config(self, trainable, name):
cols = [fc.numeric_column('a'),
fc.embedding_column(fc.categorical_column_with_identity(
key='b', num_buckets=3), dimension=2)]
orig_layer = df.DenseFeatures(
cols, trainable=trainable, name=name)
config = orig_layer.get_config()
self.assertEqual(config['name'], orig_layer.name)
self.assertEqual(config['trainable'], trainable)
self.assertLen(config['feature_columns'], 2)
self.assertEqual(
config['feature_columns'][0]['class_name'], 'NumericColumn')
self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
self.assertEqual(
config['feature_columns'][1]['class_name'], 'EmbeddingColumn')
@parameterized.named_parameters(
('default', None, None),
('trainable', True, 'trainable'),
('not_trainable', False, 'frozen'))
def test_from_config(self, trainable, name):
cols = [fc.numeric_column('a'),
fc.embedding_column(fc.categorical_column_with_vocabulary_list(
'b', vocabulary_list=['1', '2', '3']), dimension=2),
fc.indicator_column(fc.categorical_column_with_hash_bucket(
key='c', hash_bucket_size=3))]
orig_layer = df.DenseFeatures(
cols, trainable=trainable, name=name)
config = orig_layer.get_config()
new_layer = df.DenseFeatures.from_config(config)
self.assertEqual(new_layer.name, orig_layer.name)
self.assertEqual(new_layer.trainable, trainable)
self.assertLen(new_layer._feature_columns, 3)
self.assertEqual(new_layer._feature_columns[0].name, 'a')
self.assertEqual(new_layer._feature_columns[1].initializer.mean, 0.0)
self.assertEqual(new_layer._feature_columns[1].categorical_column.name, 'b')
self.assertIsInstance(new_layer._feature_columns[2], fc.IndicatorColumn)
def test_crossed_column(self):
a = fc.categorical_column_with_vocabulary_list(
'a', vocabulary_list=['1', '2', '3'])
b = fc.categorical_column_with_vocabulary_list(
'b', vocabulary_list=['1', '2', '3'])
ab = fc.crossed_column([a, b], hash_bucket_size=2)
cols = [fc.indicator_column(ab)]
orig_layer = df.DenseFeatures(cols)
config = orig_layer.get_config()
new_layer = df.DenseFeatures.from_config(config)
self.assertLen(new_layer._feature_columns, 1)
self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator')
@test_util.run_all_in_graph_and_eager_modes
class SequenceFeatureColumnsTest(test.TestCase):
"""Tests DenseFeatures with sequence feature columns."""
def test_embedding_column(self):
"""Tests that error is raised for sequence embedding column."""
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 1)),
values=(2, 0, 1),
dense_shape=(2, 2))
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
embedding_column_a = fc.embedding_column(
categorical_column_a, dimension=2)
input_layer = df.DenseFeatures([embedding_column_a])
with self.assertRaisesRegexp(
ValueError,
r'In embedding_column: aaa_embedding\. categorical_column must not be '
r'of type SequenceCategoricalColumn\.'):
_ = input_layer({'aaa': sparse_input})
def test_indicator_column(self):
"""Tests that error is raised for sequence indicator column."""
vocabulary_size = 3
sparse_input = sparse_tensor.SparseTensorValue(
# example 0, ids [2]
# example 1, ids [0, 1]
indices=((0, 0), (1, 0), (1, 1)),
values=(2, 0, 1),
dense_shape=(2, 2))
categorical_column_a = sfc.sequence_categorical_column_with_identity(
key='aaa', num_buckets=vocabulary_size)
indicator_column_a = fc.indicator_column(categorical_column_a)
input_layer = df.DenseFeatures([indicator_column_a])
with self.assertRaisesRegexp(
ValueError,
r'In indicator_column: aaa_indicator\. categorical_column must not be '
r'of type SequenceCategoricalColumn\.'):
_ = input_layer({'aaa': sparse_input})
if __name__ == '__main__':
test.main()

View File

@ -18,10 +18,9 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.feature_column import dense_features
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.framework import ops
from tensorflow.python.keras.layers import serialization as layer_serialization
from tensorflow.python.keras.feature_column import dense_features
from tensorflow.python.util.tf_export import keras_export
@ -94,7 +93,3 @@ class DenseFeatures(dense_features.DenseFeatures):
# We would like to call Layer.build and not _DenseFeaturesHelper.build.
# pylint: disable=protected-access
super(fc._BaseFeaturesLayer, self).build(None) # pylint: disable=bad-super-call
layer_serialization.inject_feature_column_v2_objects(
'DenseFeatures', DenseFeatures)

View File

@ -23,7 +23,6 @@ import numpy as np
from tensorflow.python.client import session
from tensorflow.python.eager import backprop
from tensorflow.python.eager import context
from tensorflow.python.feature_column import dense_features_v2 as df
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
@ -31,6 +30,7 @@ from tensorflow.python.framework import errors
from tensorflow.python.framework import ops
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
from tensorflow.python.keras.feature_column import dense_features_v2 as df
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import lookup_ops
from tensorflow.python.ops import variables as variables_lib

View File

@ -24,11 +24,11 @@ from google.protobuf import text_format
from tensorflow.core.example import example_pb2
from tensorflow.core.example import feature_pb2
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.feature_column import dense_features
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column import sequence_feature_column as sfc
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
from tensorflow.python.keras.feature_column import dense_features
from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc
from tensorflow.python.keras.layers import recurrent
from tensorflow.python.ops import init_ops_v2

View File

@ -64,23 +64,11 @@ ALL_V2_MODULES = (
recurrent_v2,
preprocessing_normalization
)
FEATURE_COLUMN_V1_OBJECTS = {}
FEATURE_COLUMN_V2_OBJECTS = {}
# ALL_OBJECTS is meant to be a global mutable. Hence we need to make it
# thread-local to avoid concurrent mutations.
LOCAL = threading.local()
def inject_feature_column_v1_objects(name, cls):
global FEATURE_COLUMN_V1_OBJECTS
FEATURE_COLUMN_V1_OBJECTS[name] = cls
def inject_feature_column_v2_objects(name, cls):
global FEATURE_COLUMN_V2_OBJECTS
FEATURE_COLUMN_V2_OBJECTS[name] = cls
def populate_deserializable_objects():
"""Populates dict ALL_OBJECTS with every built-in layer.
"""
@ -134,9 +122,11 @@ def populate_deserializable_objects():
LOCAL.ALL_OBJECTS['WideDeepModel'] = WideDeepModel
if tf2.enabled():
LOCAL.ALL_OBJECTS.update(FEATURE_COLUMN_V2_OBJECTS)
from tensorflow.python.keras.feature_column.dense_features_v2 import DenseFeatures # pylint: disable=g-import-not-at-top
LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures
else:
LOCAL.ALL_OBJECTS.update(FEATURE_COLUMN_V1_OBJECTS)
from tensorflow.python.keras.feature_column.dense_features import DenseFeatures # pylint: disable=g-import-not-at-top
LOCAL.ALL_OBJECTS['DenseFeatures'] = DenseFeatures
# Merge layers, function versions.
LOCAL.ALL_OBJECTS['add'] = merge.add

View File

@ -39,7 +39,6 @@ from tensorflow.python.distribute import mirrored_strategy
from tensorflow.python.eager import context
from tensorflow.python.eager import def_function
from tensorflow.python.feature_column import feature_column_v2 as fc
from tensorflow.python.feature_column.dense_features import DenseFeatures
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -48,6 +47,7 @@ from tensorflow.python.keras import combinations
from tensorflow.python.keras import keras_parameterized
from tensorflow.python.keras import regularizers
from tensorflow.python.keras import testing_utils
from tensorflow.python.keras.feature_column.dense_features import DenseFeatures
from tensorflow.python.keras.saving.saved_model import load as keras_load
from tensorflow.python.keras.saving.saved_model import save_impl as keras_save
from tensorflow.python.keras.utils import generic_utils

View File

@ -1,6 +1,6 @@
path: "tensorflow.keras.layers.DenseFeatures"
tf_class {
is_instance: "<class \'tensorflow.python.feature_column.dense_features.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.keras.feature_column.dense_features.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
is_instance: "<class \'tensorflow.python.module.module.Module\'>"

View File

@ -1,7 +1,7 @@
path: "tensorflow.keras.layers.DenseFeatures"
tf_class {
is_instance: "<class \'tensorflow.python.feature_column.dense_features_v2.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.feature_column.dense_features.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.keras.feature_column.dense_features_v2.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.keras.feature_column.dense_features.DenseFeatures\'>"
is_instance: "<class \'tensorflow.python.feature_column.feature_column_v2._BaseFeaturesLayer\'>"
is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
is_instance: "<class \'tensorflow.python.module.module.Module\'>"