diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index 7db4f17c10d..a03e4da0fae 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -145,8 +145,6 @@ from tensorflow.python.framework import tensor_shape # TODO(b/118385027): Dependency on keras can be problematic if Keras moves out # of the main repo. from tensorflow.python.keras import initializers -from tensorflow.python.keras.engine import training as keras_training -from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.utils import generic_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops @@ -154,7 +152,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops @@ -383,265 +380,6 @@ class _StateManagerImplV2(_StateManagerImpl): return var -class _LinearModelLayer(Layer): - """Layer that contains logic for `LinearModel`.""" - - def __init__(self, - feature_columns, - units=1, - sparse_combiner='sum', - trainable=True, - name=None, - **kwargs): - super(_LinearModelLayer, self).__init__( - name=name, trainable=trainable, **kwargs) - - self._feature_columns = _normalize_feature_columns(feature_columns) - for column in self._feature_columns: - if not isinstance(column, (DenseColumn, CategoricalColumn)): - raise ValueError( - 'Items of feature_columns must be either a ' - 'DenseColumn or CategoricalColumn. Given: {}'.format(column)) - - self._units = units - self._sparse_combiner = sparse_combiner - - self._state_manager = _StateManagerImpl(self, self.trainable) - self.bias = None - - def build(self, _): - # We need variable scopes for now because we want the variable partitioning - # information to percolate down. We also use _pure_variable_scope's here - # since we want to open up a name_scope in the `call` method while creating - # the ops. - with variable_scope._pure_variable_scope(self.name): # pylint: disable=protected-access - for column in self._feature_columns: - with variable_scope._pure_variable_scope( # pylint: disable=protected-access - _sanitize_column_name_for_variable_scope(column.name)): - # Create the state for each feature column - column.create_state(self._state_manager) - - # Create a weight variable for each column. - if isinstance(column, CategoricalColumn): - first_dim = column.num_buckets - else: - first_dim = column.variable_shape.num_elements() - self._state_manager.create_variable( - column, - name='weights', - dtype=dtypes.float32, - shape=(first_dim, self._units), - initializer=initializers.zeros(), - trainable=self.trainable) - - # Create a bias variable. - self.bias = self.add_variable( - name='bias_weights', - dtype=dtypes.float32, - shape=[self._units], - initializer=initializers.zeros(), - trainable=self.trainable, - use_resource=True, - # TODO(rohanj): Get rid of this hack once we have a mechanism for - # specifying a default partitioner for an entire layer. In that case, - # the default getter for Layers should work. - getter=variable_scope.get_variable) - - super(_LinearModelLayer, self).build(None) - - def call(self, features): - if not isinstance(features, dict): - raise ValueError('We expected a dictionary here. Instead we got: {}' - .format(features)) - with ops.name_scope(self.name): - transformation_cache = FeatureTransformationCache(features) - weighted_sums = [] - for column in self._feature_columns: - with ops.name_scope( - _sanitize_column_name_for_variable_scope(column.name)): - # All the weights used in the linear model are owned by the state - # manager associated with this Linear Model. - weight_var = self._state_manager.get_variable(column, 'weights') - - weighted_sum = _create_weighted_sum( - column=column, - transformation_cache=transformation_cache, - state_manager=self._state_manager, - sparse_combiner=self._sparse_combiner, - weight_var=weight_var) - weighted_sums.append(weighted_sum) - - _verify_static_batch_size_equality(weighted_sums, self._feature_columns) - predictions_no_bias = math_ops.add_n( - weighted_sums, name='weighted_sum_no_bias') - predictions = nn_ops.bias_add( - predictions_no_bias, self.bias, name='weighted_sum') - return predictions - - def get_config(self): - # Import here to avoid circular imports. - from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top - column_configs = serialization.serialize_feature_columns( - self._feature_columns) - config = { - 'feature_columns': column_configs, - 'units': self._units, - 'sparse_combiner': self._sparse_combiner - } - - base_config = super( # pylint: disable=bad-super-call - _LinearModelLayer, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - # Import here to avoid circular imports. - from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top - config_cp = config.copy() - columns = serialization.deserialize_feature_columns( - config_cp['feature_columns'], custom_objects=custom_objects) - - del config_cp['feature_columns'] - return cls(feature_columns=columns, **config_cp) - - -# TODO(tanzheny): Cleanup it with respect to Premade model b/132690565. -class LinearModel(keras_training.Model): - """Produces a linear prediction `Tensor` based on given `feature_columns`. - - This layer generates a weighted sum based on output dimension `units`. - Weighted sum refers to logits in classification problems. It refers to the - prediction itself for linear regression problems. - - Note on supported columns: `LinearLayer` treats categorical columns as - `indicator_column`s. To be specific, assume the input as `SparseTensor` looks - like: - - ```python - shape = [2, 2] - { - [0, 0]: "a" - [1, 0]: "b" - [1, 1]: "c" - } - ``` - `linear_model` assigns weights for the presence of "a", "b", "c' implicitly, - just like `indicator_column`, while `input_layer` explicitly requires wrapping - each of categorical columns with an `embedding_column` or an - `indicator_column`. - - Example of usage: - - ```python - price = numeric_column('price') - price_buckets = bucketized_column(price, boundaries=[0., 10., 100., 1000.]) - keywords = categorical_column_with_hash_bucket("keywords", 10K) - keywords_price = crossed_column('keywords', price_buckets, ...) - columns = [price_buckets, keywords, keywords_price ...] - linear_model = LinearLayer(columns) - - features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) - prediction = linear_model(features) - ``` - """ - - def __init__(self, - feature_columns, - units=1, - sparse_combiner='sum', - trainable=True, - name=None, - **kwargs): - """Constructs a LinearLayer. - - Args: - feature_columns: An iterable containing the FeatureColumns to use as - inputs to your model. All items should be instances of classes derived - from `_FeatureColumn`s. - units: An integer, dimensionality of the output space. Default value is 1. - sparse_combiner: A string specifying how to reduce if a categorical column - is multivalent. Except `numeric_column`, almost all columns passed to - `linear_model` are considered as categorical columns. It combines each - categorical column independently. Currently "mean", "sqrtn" and "sum" - are supported, with "sum" the default for linear model. "sqrtn" often - achieves good accuracy, in particular with bag-of-words columns. - * "sum": do not normalize features in the column - * "mean": do l1 normalization on features in the column - * "sqrtn": do l2 normalization on features in the column - For example, for two features represented as the categorical columns: - - ```python - # Feature 1 - - shape = [2, 2] - { - [0, 0]: "a" - [0, 1]: "b" - [1, 0]: "c" - } - - # Feature 2 - - shape = [2, 3] - { - [0, 0]: "d" - [1, 0]: "e" - [1, 1]: "f" - [1, 2]: "g" - } - ``` - - with `sparse_combiner` as "mean", the linear model outputs conceptually - are - ``` - y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0 - y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1 - ``` - where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight - assigned to the presence of `x` in the input features. - trainable: If `True` also add the variable to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - name: Name to give to the Linear Model. All variables and ops created will - be scoped by this name. - **kwargs: Keyword arguments to construct a layer. - - Raises: - ValueError: if an item in `feature_columns` is neither a `DenseColumn` - nor `CategoricalColumn`. - """ - - super(LinearModel, self).__init__(name=name, **kwargs) - self.layer = _LinearModelLayer( - feature_columns, - units, - sparse_combiner, - trainable, - name=self.name, - **kwargs) - - def call(self, features): - """Returns a `Tensor` the represents the predictions of a linear model. - - Args: - features: A mapping from key to tensors. `_FeatureColumn`s look up via - these keys. For example `numeric_column('price')` will look at 'price' - key in this dict. Values are `Tensor` or `SparseTensor` depending on - corresponding `_FeatureColumn`. - - Returns: - A `Tensor` which represents predictions/logits of a linear model. Its - shape is (batch_size, units) and its dtype is `float32`. - - Raises: - ValueError: If features are not a dictionary. - """ - return self.layer(features) - - @property - def bias(self): - return self.layer.bias - - def _transform_features_v2(features, feature_columns, state_manager): """Returns transformed features based on features columns passed in. diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 076515c84b8..91fb7eadb89 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -48,7 +48,6 @@ from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test -from tensorflow.python.training import rmsprop def _initialized_session(config=None): @@ -439,36 +438,6 @@ class NumericColumnTest(test.TestCase): 'aaa', shape=[1, 2], default_value=np.array([[3., 2.]])) self.assertEqual(a.default_value, ((3., 2.),)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - price = fc.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - model = fc.LinearModel([price]) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - self.assertAllClose([[0.]], self.evaluate(price_var)) - self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) - sess.run(price_var.assign([[10.]])) - self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) - - @test_util.run_deprecated_v1 - def test_linear_model_sanitizes_scope_names(self): - price = fc.numeric_column('price > 100') - with ops.Graph().as_default(): - features = {'price > 100': [[1.], [5.]]} - model = fc.LinearModel([price]) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - self.assertAllClose([[0.]], self.evaluate(price_var)) - self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) - sess.run(price_var.assign([[10.]])) - self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) - def test_old_linear_model(self): price = fc.numeric_column('price') with ops.Graph().as_default(): @@ -705,63 +674,6 @@ class BucketizedColumnTest(test.TestCase): self.assertAllEqual(a_bucketized_copy.variable_shape, (2, 3)) self.assertEqual(a_bucketized_copy.boundaries, (0, 1)) - def test_linear_model_one_input_value(self): - """Tests linear_model() for input with shape=[1].""" - price = fc.numeric_column('price', shape=[1]) - bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) - with ops.Graph().as_default(): - features = {'price': [[-1.], [1.], [5.], [6.]]} - model = fc.LinearModel([bucketized_price]) - predictions = model(features) - bucketized_price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - # One weight variable per bucket, all initialized to zero. - self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], - self.evaluate(bucketized_price_var)) - self.assertAllClose([[0.], [0.], [0.], [0.]], - self.evaluate(predictions)) - sess.run( - bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) - # price -1. is in the 0th bucket, whose weight is 10. - # price 1. is in the 1st bucket, whose weight is 20. - # price 5. is in the 3rd bucket, whose weight is 40. - # price 6. is in the 4th bucket, whose weight is 50. - self.assertAllClose([[10.], [20.], [40.], [50.]], - self.evaluate(predictions)) - sess.run(bias.assign([1.])) - self.assertAllClose([[11.], [21.], [41.], [51.]], - self.evaluate(predictions)) - - def test_linear_model_two_input_values(self): - """Tests linear_model() for input with shape=[2].""" - price = fc.numeric_column('price', shape=[2]) - bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) - with ops.Graph().as_default(): - features = {'price': [[-1., 1.], [5., 6.]]} - model = fc.LinearModel([bucketized_price]) - predictions = model(features) - bucketized_price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - # One weight per bucket per input column, all initialized to zero. - self.assertAllClose( - [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]], - self.evaluate(bucketized_price_var)) - self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) - sess.run( - bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.], - [60.], [70.], [80.], [90.], [100.]])) - # 1st example: - # price -1. is in the 0th bucket, whose weight is 10. - # price 1. is in the 6th bucket, whose weight is 70. - # 2nd example: - # price 5. is in the 3rd bucket, whose weight is 40. - # price 6. is in the 9th bucket, whose weight is 100. - self.assertAllClose([[80.], [140.]], self.evaluate(predictions)) - sess.run(bias.assign([1.])) - self.assertAllClose([[81.], [141.]], self.evaluate(predictions)) - def test_old_linear_model_one_input_value(self): """Tests linear_model() for input with shape=[1].""" price = fc.numeric_column('price', shape=[1]) @@ -1070,32 +982,6 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertEqual( transformation_cache.get(hashed_sparse, None), id_weight_pair.id_tensor) - @test_util.run_deprecated_v1 - def test_linear_model(self): - wire_column = fc.categorical_column_with_hash_bucket('wire', 4) - self.assertEqual(4, wire_column.num_buckets) - with ops.Graph().as_default(): - model = fc.LinearModel((wire_column,)) - predictions = model({ - wire_column.name: - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }) - wire_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) - # 'marlo' -> 3: wire_var[3] = 4 - # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 - self.assertAllClose(((4.,), (6.,)), self.evaluate(predictions)) - def test_old_linear_model(self): wire_column = fc.categorical_column_with_hash_bucket('wire', 4) self.assertEqual(4, wire_column.num_buckets) @@ -1364,101 +1250,6 @@ class CrossedColumnTest(test.TestCase): self.assertAllEqual(expected_values, id_tensor_eval.values) self.assertAllEqual((2, 4), id_tensor_eval.dense_shape) - @test_util.run_deprecated_v1 - def test_linear_model(self): - """Tests linear_model. - - Uses data from test_get_sparse_tensors_simple. - """ - a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) - b = fc.bucketized_column(a, boundaries=(0, 1)) - crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - model = fc.LinearModel((crossed,)) - predictions = model({ - 'a': - constant_op.constant(((-1., .5), (.5, 1.))), - 'c': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }) - crossed_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), - self.evaluate(crossed_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) - # Expected ids after cross = (1, 0, 1, 3, 4, 2) - self.assertAllClose(((3.,), (14.,)), self.evaluate(predictions)) - sess.run(bias.assign((.1,))) - self.assertAllClose(((3.1,), (14.1,)), self.evaluate(predictions)) - - def test_linear_model_with_weights(self): - - class _TestColumnWithWeights(BaseFeatureColumnForTests, - fc.CategoricalColumn): - """Produces sparse IDs and sparse weights.""" - - @property - def _is_v2_column(self): - return True - - @property - def name(self): - return 'test_column' - - @property - def parse_example_spec(self): - return { - self.name: - parsing_ops.VarLenFeature(dtypes.int32), - '{}_weights'.format(self.name): - parsing_ops.VarLenFeature(dtypes.float32), - } - - @property - def num_buckets(self): - return 5 - - def transform_feature(self, transformation_cache, state_manager): - return (transformation_cache.get(self.name, state_manager), - transformation_cache.get('{}_weights'.format(self.name), - state_manager)) - - def get_sparse_tensors(self, transformation_cache, state_manager): - """Populates both id_tensor and weight_tensor.""" - ids_and_weights = transformation_cache.get(self, state_manager) - return fc.CategoricalColumn.IdWeightPair( - id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) - - t = _TestColumnWithWeights() - crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, - 'crossed_column does not support weight_tensor.*{}'.format(t.name)): - model = fc.LinearModel((crossed,)) - model({ - t.name: - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[0, 1, 2], - dense_shape=(2, 2)), - '{}_weights'.format(t.name): - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[1., 10., 2.], - dense_shape=(2, 2)), - 'c': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }) - def test_old_linear_model(self): """Tests linear_model. @@ -1643,668 +1434,6 @@ class CrossedColumnTest(test.TestCase): self.assertIs(b, new_crossed.keys[0]) -class LinearModelTest(test.TestCase): - - def test_raises_if_empty_feature_columns(self): - with self.assertRaisesRegexp(ValueError, - 'feature_columns must not be empty'): - fc.LinearModel(feature_columns=[]) - - def test_should_be_feature_column(self): - with self.assertRaisesRegexp(ValueError, 'must be a FeatureColumn'): - fc.LinearModel(feature_columns='NotSupported') - - def test_should_be_dense_or_categorical_column(self): - - class NotSupportedColumn(BaseFeatureColumnForTests): - - @property - def _is_v2_column(self): - return True - - @property - def name(self): - return 'NotSupportedColumn' - - def transform_feature(self, transformation_cache, state_manager): - pass - - @property - def parse_example_spec(self): - pass - - with self.assertRaisesRegexp( - ValueError, 'must be either a DenseColumn or CategoricalColumn'): - fc.LinearModel(feature_columns=[NotSupportedColumn()]) - - def test_does_not_support_dict_columns(self): - with self.assertRaisesRegexp( - ValueError, 'Expected feature_columns to be iterable, found dict.'): - fc.LinearModel(feature_columns={'a': fc.numeric_column('a')}) - - def test_raises_if_duplicate_name(self): - with self.assertRaisesRegexp( - ValueError, 'Duplicate feature column name found for columns'): - fc.LinearModel( - feature_columns=[fc.numeric_column('a'), - fc.numeric_column('a')]) - - def test_not_dict_input_features(self): - price = fc.numeric_column('price') - with ops.Graph().as_default(): - features = [[1.], [5.]] - model = fc.LinearModel([price]) - with self.assertRaisesRegexp(ValueError, 'We expected a dictionary here'): - model(features) - - def test_dense_bias(self): - price = fc.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - model = fc.LinearModel([price]) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - sess.run(price_var.assign([[10.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[15.], [55.]], self.evaluate(predictions)) - - def test_sparse_bias(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast]) - predictions = model(features) - wire_cast_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - self.assertAllClose([[0.], [0.], [0.], [0.]], - self.evaluate(wire_cast_var)) - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [10015.]], self.evaluate(predictions)) - - def test_dense_and_sparse_bias(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - price = fc.numeric_column('price') - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} - model = fc.LinearModel([wire_cast, price]) - predictions = model(features) - price_var, wire_cast_var, bias = model.variables - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - sess.run(price_var.assign([[10.]])) - self.assertAllClose([[1015.], [10065.]], self.evaluate(predictions)) - - def test_dense_and_sparse_column(self): - """When the column is both dense and sparse, uses sparse tensors.""" - - class _DenseAndSparseColumn(BaseFeatureColumnForTests, fc.DenseColumn, - fc.CategoricalColumn): - - @property - def _is_v2_column(self): - return True - - @property - def name(self): - return 'dense_and_sparse_column' - - @property - def parse_example_spec(self): - return {self.name: parsing_ops.VarLenFeature(self.dtype)} - - def transform_feature(self, transformation_cache, state_manager): - return transformation_cache.get(self.name, state_manager) - - @property - def variable_shape(self): - raise ValueError('Should not use this method.') - - def get_dense_tensor(self, transformation_cache, state_manager): - raise ValueError('Should not use this method.') - - @property - def num_buckets(self): - return 4 - - def get_sparse_tensors(self, transformation_cache, state_manager): - sp_tensor = sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 0], [1, 1]], - values=[2, 0, 3], - dense_shape=[2, 2]) - return fc.CategoricalColumn.IdWeightPair(sp_tensor, None) - - dense_and_sparse_column = _DenseAndSparseColumn() - with ops.Graph().as_default(): - sp_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {dense_and_sparse_column.name: sp_tensor} - model = fc.LinearModel([dense_and_sparse_column]) - predictions = model(features) - dense_and_sparse_column_var, bias = model.variables - with _initialized_session() as sess: - sess.run( - dense_and_sparse_column_var.assign([[10.], [100.], [1000.], - [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [10015.]], self.evaluate(predictions)) - - def test_dense_multi_output(self): - price = fc.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - model = fc.LinearModel([price], units=3) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) - self.assertAllClose(np.zeros((1, 3)), self.evaluate(price_var)) - sess.run(price_var.assign([[10., 100., 1000.]])) - sess.run(bias.assign([5., 6., 7.])) - self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]], - self.evaluate(predictions)) - - def test_sparse_multi_output(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast], units=3) - predictions = model(features) - wire_cast_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) - self.assertAllClose(np.zeros((4, 3)), self.evaluate(wire_cast_var)) - sess.run( - wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], - [1000., 1100., 1200.], - [10000., 11000., 12000.]])) - sess.run(bias.assign([5., 6., 7.])) - self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], - self.evaluate(predictions)) - - def test_dense_multi_dimension(self): - price = fc.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - model = fc.LinearModel([price]) - predictions = model(features) - price_var, _ = model.variables - with _initialized_session() as sess: - self.assertAllClose([[0.], [0.]], self.evaluate(price_var)) - sess.run(price_var.assign([[10.], [100.]])) - self.assertAllClose([[210.], [650.]], self.evaluate(predictions)) - - def test_sparse_multi_rank(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = array_ops.sparse_placeholder(dtypes.string) - wire_value = sparse_tensor.SparseTensorValue( - values=['omar', 'stringer', 'marlo', 'omar'], # hashed = [2, 0, 3, 2] - indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], - dense_shape=[2, 2, 2]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast]) - predictions = model(features) - wire_cast_var, _ = model.variables - with _initialized_session() as sess: - self.assertAllClose(np.zeros((4, 1)), self.evaluate(wire_cast_var)) - self.assertAllClose( - np.zeros((2, 1)), - predictions.eval(feed_dict={wire_tensor: wire_value})) - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - self.assertAllClose( - [[1010.], [11000.]], - predictions.eval(feed_dict={wire_tensor: wire_value})) - - def test_sparse_combiner(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast], sparse_combiner='mean') - predictions = model(features) - wire_cast_var, bias = model.variables - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [5010.]], self.evaluate(predictions)) - - def test_sparse_combiner_sqrtn(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast], sparse_combiner='sqrtn') - predictions = model(features) - wire_cast_var, bias = model.variables - with _initialized_session() as sess: - self.evaluate(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - self.evaluate(bias.assign([5.])) - self.assertAllClose([[1005.], [7083.139]], self.evaluate(predictions)) - - def test_sparse_combiner_with_negative_weights(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights') - - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = { - 'wire_cast': wire_tensor, - 'weights': constant_op.constant([[1., 1., -1.0]]) - } - model = fc.LinearModel([wire_cast_weights], sparse_combiner='sum') - predictions = model(features) - wire_cast_var, bias = model.variables - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [-9985.]], self.evaluate(predictions)) - - def test_dense_multi_dimension_multi_output(self): - price = fc.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - model = fc.LinearModel([price], units=3) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), self.evaluate(bias)) - self.assertAllClose(np.zeros((2, 3)), self.evaluate(price_var)) - sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]])) - sess.run(bias.assign([2., 3., 4.])) - self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]], - self.evaluate(predictions)) - - def test_raises_if_shape_mismatch(self): - price = fc.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - with self.assertRaisesRegexp( - Exception, - r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - model = fc.LinearModel([price]) - model(features) - - def test_dense_reshaping(self): - price = fc.numeric_column('price', shape=[1, 2]) - with ops.Graph().as_default(): - features = {'price': [[[1., 2.]], [[5., 6.]]]} - model = fc.LinearModel([price]) - predictions = model(features) - price_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - self.assertAllClose([[0.], [0.]], self.evaluate(price_var)) - self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) - sess.run(price_var.assign([[10.], [100.]])) - self.assertAllClose([[210.], [650.]], self.evaluate(predictions)) - - def test_dense_multi_column(self): - price1 = fc.numeric_column('price1', shape=2) - price2 = fc.numeric_column('price2') - with ops.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - model = fc.LinearModel([price1, price2]) - predictions = model(features) - price1_var, price2_var, bias = model.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias)) - self.assertAllClose([[0.], [0.]], self.evaluate(price1_var)) - self.assertAllClose([[0.]], self.evaluate(price2_var)) - self.assertAllClose([[0.], [0.]], self.evaluate(predictions)) - sess.run(price1_var.assign([[10.], [100.]])) - sess.run(price2_var.assign([[1000.]])) - sess.run(bias.assign([7.])) - self.assertAllClose([[3217.], [4657.]], self.evaluate(predictions)) - - def test_dense_trainable_default(self): - price = fc.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - model = fc.LinearModel([price]) - model(features) - price_var, bias = model.variables - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertIn(bias, trainable_vars) - self.assertIn(price_var, trainable_vars) - - def test_sparse_trainable_default(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast]) - model(features) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - wire_cast_var, bias = model.variables - self.assertIn(bias, trainable_vars) - self.assertIn(wire_cast_var, trainable_vars) - - def test_dense_trainable_false(self): - price = fc.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - model = fc.LinearModel([price], trainable=False) - model(features) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertEqual([], trainable_vars) - - def test_sparse_trainable_false(self): - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - model = fc.LinearModel([wire_cast], trainable=False) - model(features) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertEqual([], trainable_vars) - - def test_column_order(self): - price_a = fc.numeric_column('price_a') - price_b = fc.numeric_column('price_b') - wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - 'wire_cast': - sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - } - model = fc.LinearModel([price_a, wire_cast, price_b]) - model(features) - - my_vars = model.variables - self.assertIn('price_a', my_vars[0].name) - self.assertIn('price_b', my_vars[1].name) - self.assertIn('wire_cast', my_vars[2].name) - - with ops.Graph().as_default(): - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - 'wire_cast': - sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - } - model = fc.LinearModel([wire_cast, price_b, price_a]) - model(features) - - my_vars = model.variables - self.assertIn('price_a', my_vars[0].name) - self.assertIn('price_b', my_vars[1].name) - self.assertIn('wire_cast', my_vars[2].name) - - def test_variable_names(self): - price1 = fc.numeric_column('price1') - dense_feature = fc.numeric_column('dense_feature') - dense_feature_bucketized = fc.bucketized_column( - dense_feature, boundaries=[0.]) - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) - all_cols = [price1, dense_feature_bucketized, some_embedding_column] - - with ops.Graph().as_default(): - model = fc.LinearModel(all_cols) - features = { - 'price1': [[3.], [4.]], - 'dense_feature': [[-1.], [4.]], - 'sparse_feature': [['a'], ['x']], - } - model(features) - for var in model.variables: - self.assertIsInstance(var, variables_lib.VariableV1) - variable_names = [var.name for var in model.variables] - self.assertCountEqual([ - 'linear_model/dense_feature_bucketized/weights:0', - 'linear_model/price1/weights:0', - 'linear_model/sparse_feature_embedding/embedding_weights:0', - 'linear_model/sparse_feature_embedding/weights:0', - 'linear_model/bias_weights:0', - ], variable_names) - - def test_fit_and_predict(self): - columns = [fc.numeric_column('a')] - - model = fc.LinearModel(columns) - model.compile( - optimizer=rmsprop.RMSPropOptimizer(1e-3), - loss='binary_crossentropy', - metrics=['accuracy']) - - x = {'a': np.random.random((10, 1))} - y = np.random.randint(0, 2, size=(10, 1)) - model.fit(x, y, epochs=1, batch_size=5) - model.fit(x, y, epochs=1, batch_size=5) - model.evaluate(x, y, batch_size=5) - model.predict(x, batch_size=5) - - def test_static_batch_size_mismatch(self): - price1 = fc.numeric_column('price1') - price2 = fc.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': [[1.], [5.], [7.]], # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - with self.assertRaisesRegexp( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - model = fc.LinearModel([price1, price2]) - model(features) - - def test_subset_of_static_batch_size_mismatch(self): - price1 = fc.numeric_column('price1') - price2 = fc.numeric_column('price2') - price3 = fc.numeric_column('price3') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 - 'price2': [[3.], [4.]], # batchsize = 2 - 'price3': [[3.], [4.], [5.]] # batchsize = 3 - } - with self.assertRaisesRegexp( - ValueError, - r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - model = fc.LinearModel([price1, price2, price3]) - model(features) - - def test_runtime_batch_size_mismatch(self): - price1 = fc.numeric_column('price1') - price2 = fc.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - model = fc.LinearModel([price1, price2]) - predictions = model(features) - with _initialized_session() as sess: - with self.assertRaisesRegexp(errors.OpError, - 'must have the same size and shape'): - sess.run( - predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) - - def test_runtime_batch_size_matches(self): - price1 = fc.numeric_column('price1') - price2 = fc.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 - 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 - } - model = fc.LinearModel([price1, price2]) - predictions = model(features) - with _initialized_session() as sess: - sess.run( - predictions, - feed_dict={ - features['price1']: [[1.], [5.]], - features['price2']: [[1.], [5.]], - }) - - @test_util.run_deprecated_v1 - def test_with_1d_sparse_tensor(self): - price = fc.numeric_column('price') - price_buckets = fc.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': - constant_op.constant([ - -1., - 12., - ]), - 'body-style': - sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - - model = fc.LinearModel([price_buckets, body_style]) - net = model(features) - with _initialized_session() as sess: - body_style_var, price_buckets_var, bias = model.variables - - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) - - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], - self.evaluate(net)) - - @test_util.run_deprecated_v1 - def test_with_1d_unknown_shape_sparse_tensor(self): - price = fc.numeric_column('price') - price_buckets = fc.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - country = fc.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': array_ops.placeholder(dtypes.float32), - 'body-style': array_ops.sparse_placeholder(dtypes.string), - 'country': array_ops.placeholder(dtypes.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - - price_data = np.array([-1., 12.]) - body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array(['US', 'CA']) - - model = fc.LinearModel([price_buckets, body_style, country]) - net = model(features) - body_style_var, _, price_buckets_var, bias = model.variables - with _initialized_session() as sess: - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) - - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) - - @test_util.run_deprecated_v1 - def test_with_rank_0_feature(self): - price = fc.numeric_column('price') - features = { - 'price': constant_op.constant(0), - } - self.assertEqual(0, features['price'].shape.ndims) - - # Static rank 0 should fail - with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): - model = fc.LinearModel([price]) - model(features) - - # Dynamic rank 0 should fail - features = { - 'price': array_ops.placeholder(dtypes.float32), - } - model = fc.LinearModel([price]) - net = model(features) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) - - def test_multiple_linear_models(self): - price = fc.numeric_column('price') - with ops.Graph().as_default(): - features1 = {'price': [[1.], [5.]]} - features2 = {'price': [[2.], [10.]]} - model1 = fc.LinearModel([price]) - model2 = fc.LinearModel([price]) - predictions1 = model1(features1) - predictions2 = model2(features2) - price_var1, bias1 = model1.variables - price_var2, bias2 = model2.variables - with _initialized_session() as sess: - self.assertAllClose([0.], self.evaluate(bias1)) - sess.run(price_var1.assign([[10.]])) - sess.run(bias1.assign([5.])) - self.assertAllClose([[15.], [55.]], self.evaluate(predictions1)) - self.assertAllClose([0.], self.evaluate(bias2)) - sess.run(price_var2.assign([[10.]])) - sess.run(bias2.assign([5.])) - self.assertAllClose([[25.], [105.]], self.evaluate(predictions2)) - - class OldLinearModelTest(test.TestCase): def test_raises_if_empty_feature_columns(self): @@ -4361,36 +3490,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - wire_column = fc.categorical_column_with_vocabulary_file( - key='wire', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size, - num_oov_buckets=1) - self.assertEqual(4, wire_column.num_buckets) - with ops.Graph().as_default(): - model = fc.LinearModel((wire_column,)) - predictions = model({ - wire_column.name: - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }) - wire_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) - # 'marlo' -> 2: wire_var[2] = 3 - # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 - self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - def test_old_linear_model(self): wire_column = fc.categorical_column_with_vocabulary_file( key='wire', @@ -4827,35 +3926,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - wire_column = fc.categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=('omar', 'stringer', 'marlo'), - num_oov_buckets=1) - self.assertEqual(4, wire_column.num_buckets) - with ops.Graph().as_default(): - model = fc.LinearModel((wire_column,)) - predictions = model({ - wire_column.name: - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }) - wire_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) - # 'marlo' -> 2: wire_var[2] = 3 - # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 - self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - def test_old_linear_model(self): wire_column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -5195,32 +4265,6 @@ class IdentityCategoricalColumnTest(test.TestCase): input_shape: (2, 2), })) - @test_util.run_deprecated_v1 - def test_linear_model(self): - column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) - self.assertEqual(3, column.num_buckets) - with ops.Graph().as_default(): - model = fc.LinearModel((column,)) - predictions = model({ - column.name: - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)) - }) - weight_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) - # weight_var[0] = 1 - # weight_var[2] + weight_var[1] = 3+2 = 5 - self.assertAllClose(((1.,), (5.,)), self.evaluate(predictions)) - def test_old_linear_model(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column.num_buckets) @@ -5513,30 +4557,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[0., 1., 1.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - animal = fc.indicator_column( - fc.categorical_column_with_identity('animal', num_buckets=4)) - with ops.Graph().as_default(): - features = { - 'animal': - sparse_tensor.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - - model = fc.LinearModel([animal]) - predictions = model(features) - weight_var, _ = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - # All should be zero-initialized. - self.assertAllClose([[0.], [0.], [0.], [0.]], self.evaluate(weight_var)) - self.assertAllClose([[0.]], self.evaluate(predictions)) - self.evaluate(weight_var.assign([[1.], [2.], [3.], [4.]])) - self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) - def test_old_linear_model(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) @@ -6171,88 +5191,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - # Inputs. - batch_size = 4 - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(batch_size, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - with ops.Graph().as_default(): - model = fc.LinearModel((embedding_column,)) - predictions = model({categorical_column.name: sparse_input}) - expected_var_names = ( - 'linear_model/bias_weights:0', - 'linear_model/aaa_embedding/weights:0', - 'linear_model/aaa_embedding/embedding_weights:0', - ) - self.assertCountEqual( - expected_var_names, - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - trainable_vars = { - v.name: v - for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - } - self.assertCountEqual(expected_var_names, trainable_vars.keys()) - bias = trainable_vars['linear_model/bias_weights:0'] - embedding_weights = trainable_vars[ - 'linear_model/aaa_embedding/embedding_weights:0'] - linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - # Predictions with all zero weights. - self.assertAllClose(np.zeros((1,)), self.evaluate(bias)) - self.assertAllClose(zeros_embedding_values, - self.evaluate(embedding_weights)) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights)) - self.assertAllClose(np.zeros((batch_size, 1)), self.evaluate(predictions)) - - # Predictions with all non-zero weights. - self.evaluate( - embedding_weights.assign(( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ))) - self.evaluate(linear_weights.assign(((4.,), (6.,)))) - # example 0, ids [2], embedding[0] = [7, 11] - # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] - # example 2, ids [], embedding[2] = [0, 0] - # example 3, ids [1], embedding[3] = [3, 5] - # sum(embeddings * linear_weights) - # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] - self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), - self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_input_layer(self): # Inputs. @@ -7088,104 +6026,6 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): with _initialized_session() as sess: sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) - @test_util.run_deprecated_v1 - def test_linear_model(self): - # Inputs. - batch_size = 2 - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) - - with ops.Graph().as_default(): - model = fc.LinearModel((embedding_column_a, embedding_column_b)) - predictions = model({ - categorical_column_a.name: input_a, - categorical_column_b.name: input_b - }) - - # Linear weights do not follow the column name. But this is a rare use - # case, and fixing it would add too much complexity to the code. - expected_var_names = ( - 'linear_model/bias_weights:0', - 'linear_model/aaa_shared_embedding/weights:0', - 'aaa_bbb_shared_embedding:0', - 'linear_model/bbb_shared_embedding/weights:0', - ) - self.assertCountEqual( - expected_var_names, - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - trainable_vars = { - v.name: v - for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - } - self.assertCountEqual(expected_var_names, trainable_vars.keys()) - bias = trainable_vars['linear_model/bias_weights:0'] - embedding_weights = trainable_vars['aaa_bbb_shared_embedding:0'] - linear_weights_a = trainable_vars[ - 'linear_model/aaa_shared_embedding/weights:0'] - linear_weights_b = trainable_vars[ - 'linear_model/bbb_shared_embedding/weights:0'] - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - # Predictions with all zero weights. - self.assertAllClose(np.zeros((1,)), self.evaluate(bias)) - self.assertAllClose(zeros_embedding_values, - self.evaluate(embedding_weights)) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights_a)) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), self.evaluate(linear_weights_b)) - self.assertAllClose(np.zeros((batch_size, 1)), self.evaluate(predictions)) - - # Predictions with all non-zero weights. - self.evaluate( - embedding_weights.assign(( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ))) - self.evaluate(linear_weights_a.assign(((4.,), (6.,)))) - # example 0, ids [2], embedding[0] = [7, 11] - # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] - # sum(embeddings * linear_weights) - # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29] - self.evaluate(linear_weights_b.assign(((3.,), (5.,)))) - # example 0, ids [0], embedding[0] = [1, 2] - # example 1, ids [], embedding[1] = 0, 0] - # sum(embeddings * linear_weights) - # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] - self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): @@ -7424,115 +6264,6 @@ class WeightedCategoricalColumnTest(test.TestCase): values=np.array((.5, 1., .1), dtype=np.float32), dense_shape=(2, 2)), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 - def test_linear_model(self): - column = fc.weighted_categorical_column( - categorical_column=fc.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - model = fc.LinearModel((column,)) - predictions = model({ - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(.5, 1., .1), - dense_shape=(2, 2)) - }) - weight_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) - # weight_var[0] * weights[0, 0] = 1 * .5 = .5 - # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] - # = 3*1 + 2*.1 = 3+.2 = 3.2 - self.assertAllClose(((.5,), (3.2,)), self.evaluate(predictions)) - - def test_linear_model_mismatched_shape(self): - column = fc.weighted_categorical_column( - categorical_column=fc.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, - r'Dimensions.*are not compatible'): - model = fc.LinearModel((column,)) - model({ - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (0, 1), (1, 0), (1, 1)), - values=(.5, 11., 1., .1), - dense_shape=(2, 2)) - }) - - def test_linear_model_mismatched_dense_values(self): - column = fc.weighted_categorical_column( - categorical_column=fc.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - model = fc.LinearModel((column,), sparse_combiner='mean') - predictions = model({ - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': ((.5,), (1.,)) - }) - # Disabling the constant folding optimizer here since it changes the - # error message differently on CPU and GPU. - config = config_pb2.ConfigProto() - config.graph_options.rewrite_options.constant_folding = ( - rewriter_config_pb2.RewriterConfig.OFF) - with _initialized_session(config): - with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'): - self.evaluate(predictions) - - def test_linear_model_mismatched_dense_shape(self): - column = fc.weighted_categorical_column( - categorical_column=fc.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - model = fc.LinearModel((column,)) - predictions = model({ - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': ((.5,), (1.,), (.1,)) - }) - weight_var, bias = model.variables - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllClose((0.,), self.evaluate(bias)) - self.assertAllClose(((0.,), (0.,), (0.,)), self.evaluate(weight_var)) - self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - self.evaluate(weight_var.assign(((1.,), (2.,), (3.,)))) - # weight_var[0] * weights[0, 0] = 1 * .5 = .5 - # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] - # = 3*1 + 2*.1 = 3+.2 = 3.2 - self.assertAllClose(((.5,), (3.2,)), self.evaluate(predictions)) - def test_old_linear_model(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( diff --git a/tensorflow/python/feature_column/serialization_test.py b/tensorflow/python/feature_column/serialization_test.py index 881ca0cca5e..69b954022af 100644 --- a/tensorflow/python/feature_column/serialization_test.py +++ b/tensorflow/python/feature_column/serialization_test.py @@ -18,11 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl.testing import parameterized from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import serialization -from tensorflow.python.framework import test_util from tensorflow.python.platform import test @@ -113,58 +111,5 @@ class FeatureColumnSerializationTest(test.TestCase): self.assertIs(new_price.normalizer_fn, _custom_fn) -@test_util.run_all_in_graph_and_eager_modes -class LinearModelLayerSerializationTest(test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters( - ('default', 1, 'sum', None, None), - ('trainable', 6, 'mean', True, 'trainable'), - ('not_trainable', 10, 'sum', False, 'frozen')) - def test_get_config(self, units, sparse_combiner, trainable, name): - cols = [fc.numeric_column('a'), - fc.categorical_column_with_identity(key='b', num_buckets=3)] - layer = fc._LinearModelLayer( - cols, units=units, sparse_combiner=sparse_combiner, - trainable=trainable, name=name) - config = layer.get_config() - - self.assertEqual(config['name'], layer.name) - self.assertEqual(config['trainable'], trainable) - self.assertEqual(config['units'], units) - self.assertEqual(config['sparse_combiner'], sparse_combiner) - self.assertLen(config['feature_columns'], 2) - self.assertEqual( - config['feature_columns'][0]['class_name'], 'NumericColumn') - self.assertEqual( - config['feature_columns'][1]['class_name'], 'IdentityCategoricalColumn') - - @parameterized.named_parameters( - ('default', 1, 'sum', None, None), - ('trainable', 6, 'mean', True, 'trainable'), - ('not_trainable', 10, 'sum', False, 'frozen')) - def test_from_config(self, units, sparse_combiner, trainable, name): - cols = [fc.numeric_column('a'), - fc.categorical_column_with_vocabulary_list( - 'b', vocabulary_list=('1', '2', '3')), - fc.categorical_column_with_hash_bucket( - key='c', hash_bucket_size=3)] - orig_layer = fc._LinearModelLayer( - cols, units=units, sparse_combiner=sparse_combiner, - trainable=trainable, name=name) - config = orig_layer.get_config() - - new_layer = fc._LinearModelLayer.from_config(config) - - self.assertEqual(new_layer.name, orig_layer.name) - self.assertEqual(new_layer._units, units) - self.assertEqual(new_layer._sparse_combiner, sparse_combiner) - self.assertEqual(new_layer.trainable, trainable) - self.assertLen(new_layer._feature_columns, 3) - self.assertEqual(new_layer._feature_columns[0].name, 'a') - self.assertEqual( - new_layer._feature_columns[1].vocabulary_list, ('1', '2', '3')) - self.assertEqual(new_layer._feature_columns[2].num_buckets, 3) - - if __name__ == '__main__': test.main()