From 9aa0dcbf282f119ac5f53bbb71af40f432bc3be9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 12 Jul 2017 12:18:38 -0700 Subject: [PATCH] Add shape check for MakeQuantileSummariesOp. PiperOrigin-RevId: 161698801 --- .../boosted_trees/kernels/quantile_ops.cc | 5 + .../contrib/boosted_trees/ops/quantile_ops.cc | 14 +- .../python/kernel_tests/quantile_ops_test.py | 144 ++++++++++-------- 3 files changed, 97 insertions(+), 66 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc index 546f6a30b89..ca24582d41a 100644 --- a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc @@ -382,6 +382,11 @@ class MakeQuantileSummariesOp : public OpKernel { sparse_float_feature_values_list[sparse_index].flat(); const auto sparse_indices = sparse_float_feature_indices_list[sparse_index].matrix(); + const auto dense_shape = + sparse_float_feature_shapes_list[sparse_index].flat(); + OP_REQUIRES(context, batch_size == dense_shape(0), + errors::InvalidArgument( + "Sparse column shape doesn't match the batch size.")); QuantileStream stream(epsilon_, batch_size + 1); // Run quantile summary generation. const int64 num_sparse_rows = diff --git a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc index c778a02fb6c..f03c50dcc5e 100644 --- a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc @@ -20,6 +20,7 @@ namespace tensorflow { namespace gtflow { using shape_inference::InferenceContext; +using shape_inference::DimensionHandle; using shape_inference::ShapeHandle; REGISTER_RESOURCE_HANDLE_OP(QuantileStreamResource); @@ -172,7 +173,17 @@ REGISTER_OP("MakeQuantileSummaries") int num_sparse_features; TF_RETURN_IF_ERROR( c->GetAttr("num_sparse_features", &num_sparse_features)); + ShapeHandle example_weights_shape; + int example_weights_index = num_dense_features + num_sparse_features * 3; + TF_RETURN_IF_ERROR(c->WithRank(c->input(example_weights_index), 2, + &example_weights_shape)); for (int i = 0; i < num_dense_features; ++i) { + ShapeHandle dense_feature_shape; + DimensionHandle unused_dim; + TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 2, &dense_feature_shape)); + TF_RETURN_IF_ERROR(c->Merge(c->Dim(dense_feature_shape, 0), + c->Dim(example_weights_shape, 0), + &unused_dim)); c->set_output(i, c->Scalar()); } for (int i = 0; i < num_sparse_features; ++i) { @@ -193,7 +204,8 @@ sparse_float_feature_values: List of rank 1 tensors containing the sparse float feature values. sparse_float_feature_shapes: List of rank 1 tensors containing the shape of the float feature. -example_weights: Rank 1 tensor containing the example weight tensor. +example_weights: Rank 2 (N, 1) tensor of per-example weights. Should match + dense and sparse features shape. dense_summaries: A list of serialized QuantileSummaryState for dense columns. sparse_summaries: A list of serialized QuantileSummaryState for sparse columns. )doc"); diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py index dc4495fa1f9..9ed8b3404f8 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/quantile_ops_test.py @@ -26,9 +26,12 @@ import numpy as np from tensorflow.contrib.boosted_trees.proto.quantiles_pb2 import QuantileConfig from tensorflow.contrib.boosted_trees.python.ops import quantile_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import resources from tensorflow.python.platform import googletest from tensorflow.python.training import saver @@ -56,12 +59,15 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): | 5 | 1 | 5 | 6 """ - dense_float_tensor_0 = np.array([1, 2, 3, 4, 4, 5]) - sparse_indices_0 = np.array( - [[1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dtype=np.int64) - sparse_values_0 = np.array([2, 3, 4, 5, 6]) - sparse_shape_0 = np.array([6, 1]) - example_weights = np.array([10, 1, 1, 1, 1, 1]) + dense_float_tensor_0 = constant_op.constant( + [1, 2, 3, 4, 4, 5], dtype=dtypes.float32) + sparse_indices_0 = constant_op.constant( + [[1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dtype=dtypes.int64) + sparse_values_0 = constant_op.constant( + [2, 3, 4, 5, 6], dtype=dtypes.float32) + sparse_shape_0 = constant_op.constant([6, 1], dtype=dtypes.int64) + example_weights = constant_op.constant( + [10, 1, 1, 1, 1, 1], dtype=dtypes.float32) with self.test_session(): config = self._gen_config(0.33, 3) @@ -78,40 +84,38 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): def testStreamingQuantileBuckets(self): """Sets up the quantile summary op test as follows. - Create a batch of 6 examples having a dense and sparse features. - The data looks like this - | Instance | instance weights | Dense 0 - | 0 | 10 | 1 - | 1 | 1 | 2 - | 2 | 1 | 3 - | 3 | 1 | 4 - | 4 | 1 | 4 - | 5 | 1 | 5 + 100 batches of data is added to the accumulator. The batches are in form: + [0 1 .. 99] + [100 101 .. 200] + ... + [9900 9901 .. 9999] + All the batches have 1 for all the example weights. """ - dense_float_tensor_0 = np.array([1, 2, 3, 4, 4, 5]) - example_weights = np.array([10, 1, 1, 1, 1, 1]) - with self.test_session() as sess: accumulator = quantile_ops.QuantileAccumulator( - init_stamp_token=0, num_quantiles=3, epsilon=0.33, name="q1") - + init_stamp_token=0, num_quantiles=3, epsilon=0.01, name="q1") resources.initialize_resources(resources.shared_resources()).run() + weight_placeholder = array_ops.placeholder(dtypes.float32) + dense_placeholder = array_ops.placeholder(dtypes.float32) + update = accumulator.add_summary( + stamp_token=0, + column=dense_placeholder, + example_weights=weight_placeholder) + with self.test_session() as sess: + for i in range(100): + dense_float = np.linspace( + i * 100, (i + 1) * 100 - 1, num=100).reshape(-1, 1) + sess.run(update, { + dense_placeholder: dense_float, + weight_placeholder: np.ones(shape=(100, 1), dtype=np.float32) + }) - are_ready_noflush, _, = (accumulator.get_buckets(stamp_token=0)) - - update = accumulator.add_summary( - stamp_token=0, - column=dense_float_tensor_0, - example_weights=example_weights) - with ops.control_dependencies([are_ready_noflush, update]): - reset = accumulator.flush(stamp_token=0, next_stamp_token=1) - with ops.control_dependencies([reset]): - are_ready_flush, buckets = (accumulator.get_buckets(stamp_token=1)) - buckets, are_ready_noflush, are_ready_flush = (sess.run( - [buckets, are_ready_noflush, are_ready_flush])) - self.assertEqual(False, are_ready_noflush) + with self.test_session() as sess: + sess.run(accumulator.flush(stamp_token=0, next_stamp_token=1)) + are_ready_flush, buckets = (accumulator.get_buckets(stamp_token=1)) + buckets, are_ready_flush = (sess.run([buckets, are_ready_flush])) self.assertEqual(True, are_ready_flush) - self.assertAllEqual([1, 3, 5], buckets) + self.assertAllEqual([0, 3335., 6671., 9999.], buckets) def testSaveRestoreBeforeFlush(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") @@ -124,11 +128,13 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): save = saver.Saver() resources.initialize_resources(resources.shared_resources()).run() - sparse_indices_0 = np.array( - [[1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dtype=np.int64) - sparse_values_0 = [2.0, 3.0, 4.0, 5.0, 6.0] - sparse_shape_0 = np.array([6, 1]) - example_weights = np.array([10, 1, 1, 1, 1, 1]) + sparse_indices_0 = constant_op.constant( + [[1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], dtype=dtypes.int64) + sparse_values_0 = constant_op.constant( + [2.0, 3.0, 4.0, 5.0, 6.0], dtype=dtypes.float32) + sparse_shape_0 = constant_op.constant([6, 1], dtype=dtypes.int64) + example_weights = constant_op.constant( + [10, 1, 1, 1, 1, 1], dtype=dtypes.float32, shape=[6, 1]) update = accumulator.add_summary( stamp_token=0, column=sparse_tensor.SparseTensor(sparse_indices_0, sparse_values_0, @@ -173,8 +179,10 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): save = saver.Saver() resources.initialize_resources(resources.shared_resources()).run() - example_weights = np.array([10, 1, 1, 1, 1, 1]) - dense_float_tensor_0 = np.array([1, 2, 3, 4, 4, 5]) + example_weights = constant_op.constant( + [10, 1, 1, 1, 1, 1], dtype=dtypes.float32, shape=[6, 1]) + dense_float_tensor_0 = constant_op.constant( + [1, 2, 3, 4, 4, 5], dtype=dtypes.float32, shape=[6, 1]) update = accumulator.add_summary( stamp_token=0, column=dense_float_tensor_0, @@ -206,9 +214,11 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): Creates array dividing range [0, 1] to 1<<16 elements equally spaced with weight of 1.0. """ - dense_float_tensor_0 = np.array([(1.0 * i) / math.pow( - 2.0, 16) for i in range(0, int(math.pow(2, 16)) + 1)]) - example_weights = np.array([1] * (int(math.pow(2, 16)) + 1)) + dense_float_tensor_0 = constant_op.constant( + [(1.0 * i) / math.pow(2.0, 16) + for i in range(0, int(math.pow(2, 16)) + 1)]) + example_weights = constant_op.constant( + [1] * (int(math.pow(2, 16)) + 1), dtype=dtypes.float32) config = self._gen_config(0.1, 10) with self.test_session(): @@ -228,10 +238,12 @@ class QuantileBucketsOpTest(test_util.TensorFlowTestCase): Creates array dividing range [0, 1] to 1<<16 elements equally spaced with weight same as the value. """ - dense_float_tensor_0 = np.array([(1.0 * i) / math.pow( - 2.0, 16) for i in range(0, int(math.pow(2, 16)) + 1)]) - example_weights = np.array([(1.0 * i) / math.pow(2.0, 16) - for i in range(0, int(math.pow(2, 16)) + 1)]) + dense_float_tensor_0 = constant_op.constant( + [(1.0 * i) / math.pow(2.0, 16) + for i in range(0, int(math.pow(2, 16)) + 1)]) + example_weights = constant_op.constant( + [(1.0 * i) / math.pow(2.0, 16) + for i in range(0, int(math.pow(2, 16)) + 1)]) config = self._gen_config(0.1, 10) @@ -267,28 +279,30 @@ class QuantilesOpTest(test_util.TensorFlowTestCase): Sparse 2: (-inf, 100] """ super(QuantilesOpTest, self).setUp() - self._dense_float_tensor_0 = np.array([[-0.1], [0.4], [3.2], [190]]) - self._dense_float_tensor_1 = np.array([[-1], [-15], [18], [1000]]) - + self._dense_float_tensor_0 = constant_op.constant( + [[-0.1], [0.4], [3.2], [190]], dtype=dtypes.float32) + self._dense_float_tensor_1 = constant_op.constant( + [[-1], [-15], [18], [1000]], dtype=dtypes.float32) # Sparse feature 0 - self._sparse_indices_0 = np.array([[0, 0], [1, 0], [2, 0], [3, 0]]) - self._sparse_values_0 = np.array([-2, 5.5, 16, 17.5]) - self._sparse_shape_0 = np.array([4, 1]) + self._sparse_indices_0 = constant_op.constant([[0, 0], [1, 0], [2, 0], + [3, 0]]) + self._sparse_values_0 = constant_op.constant([-2, 5.5, 16, 17.5]) + self._sparse_shape_0 = constant_op.constant([4, 1]) # Sprase feature 1 - self._sparse_indices_1 = np.array([[0, 0], [2, 0], [3, 0]]) - self._sparse_values_1 = np.array([0.1, 3, -3]) - self._sparse_shape_1 = np.array([4, 1]) + self._sparse_indices_1 = constant_op.constant([[0, 0], [2, 0], [3, 0]]) + self._sparse_values_1 = constant_op.constant([0.1, 3, -3]) + self._sparse_shape_1 = constant_op.constant([4, 1]) # Sprase feature 2 - self._sparse_indices_2 = np.array([[1, 0], [3, 0]]) - self._sparse_values_2 = np.array([2, 4]) - self._sparse_shape_2 = np.array([4, 1]) + self._sparse_indices_2 = constant_op.constant([[1, 0], [3, 0]]) + self._sparse_values_2 = constant_op.constant([2, 4], dtype=dtypes.float32) + self._sparse_shape_2 = constant_op.constant([4, 1]) # Quantiles - self._dense_thresholds_0 = np.array([0.4, 5, 190]) - self._dense_thresholds_1 = np.array([-9, 15, 1000]) + self._dense_thresholds_0 = [0.4, 5, 190] + self._dense_thresholds_1 = [-9, 15, 1000] - self._sparse_thresholds_0 = np.array([5, 16, 100]) - self._sparse_thresholds_1 = np.array([2, 5]) - self._sparse_thresholds_2 = np.array([100]) + self._sparse_thresholds_0 = [5, 16, 100] + self._sparse_thresholds_1 = [2, 5] + self._sparse_thresholds_2 = [100] def testDenseFeaturesOnly(self): with self.test_session():