Automated g4 rollback of changelist 166204326

PiperOrigin-RevId: 166234847
2017-08-23 12:05:25 -07:00 · 2017-08-23 12:05:25 -07:00 · e2a4c81247
commit e2a4c81247
parent 0bfcea4b70
3 changed files with 30 additions and 59 deletions
--- a/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/trainer_hooks.py
@ -114,7 +114,8 @@ class FeatureImportanceSummarySaver(session_run_hook.SessionRunHook):
      summary_writer = SummaryWriterCache.get(output_dir)
      usage_count_summary = Summary(value=[
          Summary.Value(
-              tag="feature_importance/usage_counts", simple_value=usage_count)
+              tag="feature_importance/usage_counts",
+              simple_value=usage_count)
      ])
      usage_fraction_summary = Summary(value=[
          Summary.Value(
@ -123,14 +124,14 @@ class FeatureImportanceSummarySaver(session_run_hook.SessionRunHook):
      ])
      summary_writer.add_summary(usage_count_summary, global_step)
      summary_writer.add_summary(usage_fraction_summary, global_step)
-      gains_summary = Summary(value=[
-          Summary.Value(tag="feature_importance/gains", simple_value=gain)
-      ])
-      gains_fraction_summary = Summary(value=[
-          Summary.Value(
+      gains_summary = Summary(
+          value=[Summary.Value(
+              tag="feature_importance/gains",
+              simple_value=gain)])
+      gains_fraction_summary = Summary(
+          value=[Summary.Value(
              tag="feature_importance/gains_fraction",
-              simple_value=gain * gain_norm)
-      ])
+              simple_value=gain * gain_norm)])
      summary_writer.add_summary(gains_summary, global_step)
      summary_writer.add_summary(gains_fraction_summary, global_step)

@ -143,7 +144,8 @@ class FeedFnHook(session_run_hook.SessionRunHook):

  def before_run(self, run_context):
    del run_context  # unused by FeedFnHook.
-    return session_run_hook.SessionRunArgs(fetches=None, feed_dict=self.feed_fn)
+    return session_run_hook.SessionRunArgs(
+        fetches=None, feed_dict=self.feed_fn)


 class StopAfterNTrees(session_run_hook.SessionRunHook):
@ -168,10 +170,8 @@ class StopAfterNTrees(session_run_hook.SessionRunHook):
    num_finalized_trees = run_values.results["num_finalized_trees"]
    assert num_attempted_trees is not None
    assert num_finalized_trees is not None
-    # Stop when the required number of finalized trees is reached, or when we
-    # try enough times to build a tree but keep failing.
    if (num_finalized_trees >= self._num_trees or
-        num_attempted_trees > 2 * self._num_trees):
+        num_attempted_trees > self._num_trees):
      logging.info("Requesting stop since we have reached %d trees.",
                   num_finalized_trees)
      run_context.request_stop()
--- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc
@ -29,7 +29,6 @@ using boosted_trees::trees::TreeNode;
 using boosted_trees::trees::TreeNodeMetadata;
 using boosted_trees::utils::DropoutUtils;
 using boosted_trees::learner::LearningRateConfig;
-using boosted_trees::trees::Leaf;

 namespace {

@ -45,11 +44,6 @@ struct SplitCandidate {
  learner::SplitInfo split_info;
 };

-// Checks that the leaf is not empty.
-bool IsLeafWellFormed(const Leaf& leaf) {
-  return leaf.has_sparse_vector() || leaf.has_vector();
-}
-
 // Helper method to update the best split per partition given
 // a current candidate.
 void UpdateBestSplit(
@ -71,14 +65,6 @@ void UpdateBestSplit(
    return;
  }

-  // If the current node is pure, one of the leafs will be empty, so the split
-  // is meaningless and we should not split.
-  if (!(IsLeafWellFormed(split->split_info.right_child()) &&
-        IsLeafWellFormed(split->split_info.left_child()))) {
-    VLOG(1) << "Split does not actually split anything";
-    return;
-  }
-
  // Take the split if we don't have a candidate yet.
  auto best_split_it = best_splits->find(partition_id);
  if (best_split_it == best_splits->end()) {
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py
@ -617,8 +617,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
      learner_config.constraints.min_node_weight = 0
      features = {}
      batch_size = 3
-      features["dense_float"] = array_ops.constant(
-          [0.3, 1.5, 1.1], dtype=dtypes.float32)
+      features["dense_float"] = array_ops.ones([batch_size, 1], dtypes.float32)

      gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
          is_chief=True,
@ -687,25 +686,19 @@ class GbdtTest(test_util.TensorFlowTestCase):
      stamp_token, serialized = model_ops.tree_ensemble_serialize(
          ensemble_handle)
      output.ParseFromString(serialized.eval())
-      self.assertEqual(len(output.trees), 1)
-      # We got 3 nodes: one parent and 2 leafs.
-      self.assertEqual(len(output.trees[0].nodes), 3)
+      self.assertEquals(len(output.trees), 1)
      self.assertAllClose(output.tree_weights, [1])
      self.assertEquals(stamp_token.eval(), 2)

-      # Leafs should have a dense vector of size 5.
-      expected_leaf_1 = [
-          -3.44797062874, -3.44291138649, 13.8490362167, -3.45003509521,
-          -3.45079040527
+      # Leaf should have a dense vector of size 5.
+      expected = [
+          -1.29972088337, -1.38769364357, 3.42812108994, 1.02690505981,
+          -1.75405228138
      ]
-      expected_leaf_2 = [
-          -1.25471234322, -1.31451582909, 1.52006089687, 2.38751673698,
-          -1.32643198967
-      ]
-      self.assertAllClose(expected_leaf_1,
-                          output.trees[0].nodes[1].leaf.vector.value)
-      self.assertAllClose(expected_leaf_2,
-                          output.trees[0].nodes[2].leaf.vector.value)
+      for i in range(learner_config.num_classes):
+        self.assertAlmostEqual(expected[i],
+                               output.trees[0].nodes[1].leaf.vector.value[i],
+                               places=2)

  def testTrainFnMulticlassDiagonalHessian(self):
    """Tests the GBDT train for multiclass diagonal hessian."""
@ -726,8 +719,7 @@ class GbdtTest(test_util.TensorFlowTestCase):
      learner_config.constraints.min_node_weight = 0
      batch_size = 3
      features = {}
-      features["dense_float"] = array_ops.constant(
-          [0.3, 1.5, 1.1], dtype=dtypes.float32)
+      features["dense_float"] = array_ops.ones([batch_size, 1], dtypes.float32)

      gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
          is_chief=True,
@ -797,24 +789,17 @@ class GbdtTest(test_util.TensorFlowTestCase):
          ensemble_handle)
      output.ParseFromString(serialized.eval())
      self.assertEqual(len(output.trees), 1)
-      # We got 3 nodes: one parent and 2 leafs.
-      self.assertEqual(len(output.trees[0].nodes), 3)
      self.assertAllClose(output.tree_weights, [1])
      self.assertEqual(stamp_token.eval(), 2)

-      # Leafs should have a dense vector of size 5.
-      expected_leaf_1 = [
-          -1.03539931774, -1.01067817211, 17.2976284027, -1.13134455681,
-          -4.50228404999
+      # Leaf should have a dense vector of size 5.
+      expected = [
+          -1.26767396927, -1.13043296337, 4.58542203903, 1.81428349018,
+          -2.43038392067
      ]
-      expected_leaf_2 = [
-          -1.29245102406, -1.13761842251, 2.20420837402, 3.10524249077,
-          -1.62693488598
-      ]
-      self.assertAllClose(expected_leaf_1,
-                          output.trees[0].nodes[1].leaf.vector.value)
-      self.assertAllClose(expected_leaf_2,
-                          output.trees[0].nodes[2].leaf.vector.value)
+      for i in range(learner_config.num_classes):
+        self.assertAlmostEqual(expected[i],
+                               output.trees[0].nodes[1].leaf.vector.value[i])

  def testTrainFnMulticlassTreePerClass(self):
    """Tests the GBDT train for multiclass tree per class strategy."""