Fix label_smoothing for sigmoid_cross_entropy_loss, and test for softmax_cross_entropy_loss with label_smoothing.

The sigmoid cross entropy with label smoothing was broken, and worse the tests for both that and the softmax cross entropy with label smoothing were broken. I've fixed both issues here and added comments walking through the two examples in the tests so as to not inadvertently check in broken tests again. Change: 127100213
2016-07-11 09:25:16 -08:00 · 2016-07-11 09:25:16 -08:00 · cb8cdf73c3
commit cb8cdf73c3
parent a9e8768274
2 changed files with 84 additions and 49 deletions
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@ -282,6 +282,15 @@ def sigmoid_cross_entropy(logits, multi_class_labels, weight=1.0,
                          label_smoothing=0, scope=None):
  """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits.

+  `weight` acts as a coefficient for the loss. If a scalar is provided,
+  then the loss is simply scaled by the given value. If `weight` is a
+  tensor of size [`batch_size`], then the loss weights apply to each
+  corresponding sample.
+
+  If `label_smoothing` is nonzero, smooth the labels towards 1/2:
+      new_multiclass_labels = multiclass_labels * (1 - label_smoothing)
+                              + 0.5 * label_smoothing
+
  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
    multi_class_labels: [batch_size, num_classes] target labels in (0, 1).
@ -292,19 +301,38 @@ def sigmoid_cross_entropy(logits, multi_class_labels, weight=1.0,

  Returns:
    A scalar `Tensor` representing the loss value.
+
+  Raises:
+    ValueError: If the shape of `predictions` doesn't match that of `targets` or
+      if the shape of `weight` is invalid or if `weight` is None.
  """
  with ops.op_scope([logits, multi_class_labels],
                    scope, "sigmoid_cross_entropy_loss"):
-    return _cross_entropy(logits, multi_class_labels, weight,
-                          label_smoothing,
-                          activation_fn=nn.sigmoid_cross_entropy_with_logits)
+    logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape())
+
+    multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype)
+
+    if label_smoothing > 0:
+      multi_class_labels = (multi_class_labels * (1 - label_smoothing) +
+                            0.5 * label_smoothing)
+
+    losses = nn.sigmoid_cross_entropy_with_logits(logits, multi_class_labels,
+                                                  name="xentropy")
+    return _compute_weighted_loss(losses, weight)


 def softmax_cross_entropy(logits, onehot_labels, weight=1.0,
                          label_smoothing=0, scope=None):
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

-  It can scale the loss by weight factor, and smooth the labels.
+  `weight` acts as a coefficient for the loss. If a scalar is provided,
+  then the loss is simply scaled by the given value. If `weight` is a
+  tensor of size [`batch_size`], then the loss weights apply to each
+  corresponding sample.
+
+  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
+      new_onehot_labels = onehot_labels * (1 - label_smoothing)
+                          + label_smoothing / num_classes

  Args:
    logits: [batch_size, num_classes] logits outputs of the network .
@ -314,35 +342,6 @@ def softmax_cross_entropy(logits, onehot_labels, weight=1.0,
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.

-  Returns:
-    A scalar `Tensor` representing the loss value.
-  """
-  with ops.op_scope([logits, onehot_labels],
-                    scope, "softmax_cross_entropy_loss"):
-    return _cross_entropy(logits, onehot_labels, weight,
-                          label_smoothing,
-                          activation_fn=nn.softmax_cross_entropy_with_logits)
-
-
-def _cross_entropy(logits, onehot_labels, weight, label_smoothing,
-                   activation_fn):
-  """Adds a CrossEntropyLoss to the losses collection.
-
-  `weight` acts as a coefficient for the loss. If a scalar is provided,
-  then the loss is simply scaled by the given value. If `weight` is a
-  tensor of size [`batch_size`], then the loss weights apply to each
-  corresponding sample.
-
-  Args:
-    logits: [batch_size, num_classes] logits outputs of the network .
-    onehot_labels: [batch_size, num_classes] target one_hot_encoded labels.
-    weight: Coefficients for the loss. If the activation is SIGMOID, then the
-      weight shape must be one of [1], [batch_size] or logits.shape().
-      Otherwise, the weight shape must be either [1] or [batch_size].
-    label_smoothing: If greater than 0 then smooth the labels.
-    activation_fn: The activation function to use. The method must take three
-      arguments, the logits, the labels, and an operation name.
-
  Returns:
    A scalar `Tensor` representing the loss value.

@ -350,20 +349,21 @@ def _cross_entropy(logits, onehot_labels, weight, label_smoothing,
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid or if `weight` is None.
  """
-  logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())
-  if weight is None:
-    raise ValueError("`weight` cannot be None")
+  with ops.op_scope([logits, onehot_labels],
+                    scope, "softmax_cross_entropy_loss"):
+    logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape())

-  onehot_labels = math_ops.cast(onehot_labels, logits.dtype)
+    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

-  if label_smoothing > 0:
-    num_classes = onehot_labels.get_shape()[1].value
-    smooth_positives = 1.0 - label_smoothing
-    smooth_negatives = label_smoothing / num_classes
-    onehot_labels = onehot_labels * smooth_positives + smooth_negatives
+    if label_smoothing > 0:
+      num_classes = math_ops.to_float(array_ops.shape(onehot_labels)[1])
+      smooth_positives = 1.0 - label_smoothing
+      smooth_negatives = label_smoothing / num_classes
+      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

-  losses = activation_fn(logits, onehot_labels, name="xentropy")
-  return _compute_weighted_loss(losses, weight)
+    losses = nn.softmax_cross_entropy_with_logits(logits, onehot_labels,
+                                                  name="xentropy")
+    return _compute_weighted_loss(losses, weight)


 def log_loss(predictions, targets, weight=1.0, epsilon=1e-7, scope=None):
--- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
@ -215,13 +215,23 @@ class SoftmaxCrossEntropyLossTest(tf.test.TestCase):

  def testSoftmaxLabelSmoothing(self):
    with self.test_session():
+      # Softmax Cross Entropy Loss is:
+      #   -\sum_i p_i \log q_i
+      # where for a softmax activation
+      # \log q_i = x_i - \log \sum_j \exp x_j
+      #          = x_i - x_max - \log \sum_j \exp (x_j - x_max)
+      # For our activations, [100, -100, -100] the log partion function becomes
+      # \log ( exp(0) + exp(-200) + exp(-200) ) = 0
+      # so our log softmaxes become: [0, -200, -200]
+      # so our cross entropy loss is:
+      # -(1 - L + L/n) * 0 + 400 * L/n = 400 L/n
      logits = tf.constant([[100.0, -100.0, -100.0]])
      labels = tf.constant([[1, 0, 0]])
      label_smoothing = 0.1
-      loss = tf.contrib.losses.sigmoid_cross_entropy(
+      loss = tf.contrib.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=label_smoothing)
-      self.assertEquals(loss.op.name, 'sigmoid_cross_entropy_loss/value')
-      expected_value = 400.0 * label_smoothing / 9.0
+      self.assertEquals(loss.op.name, 'softmax_cross_entropy_loss/value')
+      expected_value = 400.0 * label_smoothing / 3.0
      self.assertAlmostEqual(loss.eval(), expected_value, 3)


@ -283,14 +293,39 @@ class SigmoidCrossEntropyLossTest(tf.test.TestCase):
  def testSigmoidLabelSmoothingCorrect(self):
    with self.test_session():
      logits = tf.constant([[100.0, -100.0, -100.0]])
-      labels = tf.constant([[1, 0, 0]])
+      labels = tf.constant([[1, 0, 1]])
+      # Sigmoid cross entropy loss is:
+      #   max(x,0) - x*z + log(1 + exp(-abs(x)))
+      # The new labels are:
+      #    z' = z * (1 - L) + 0.5 L
+      #    1 -> 1 - 0.5 L
+      #    0 -> 0.5 L
+      # here we expect:
+      # 1/3 * (100 - 100 * (1 - 0.5 L)  + 0
+      #       + 0  + 100 * (0.5 L)      + 0
+      #       + 0  + 100 * (1 - 0.5 L)  + 0)
+      # = 1/3 * (100 + 50 L)
      label_smoothing = 0.1
      loss = tf.contrib.losses.sigmoid_cross_entropy(
          logits, labels, label_smoothing=label_smoothing)
      self.assertEquals(loss.op.name, 'sigmoid_cross_entropy_loss/value')
-      expected_value = 400.0 * label_smoothing / 9.0
+      expected_value = (100.0 + 50.0 * label_smoothing) / 3.0
      self.assertAlmostEqual(loss.eval(), expected_value, 3)

+  def testSigmoidLabelSmoothingEqualsSoftmaxTwoLabel(self):
+    with self.test_session():
+      label_smoothing = 0.1
+      sigmoid_logits = tf.constant([[100.0, -100.0, -100.0]])
+      sigmoid_labels = tf.constant([[1, 0, 1]])
+      sigmoid_loss = tf.contrib.losses.sigmoid_cross_entropy(
+          sigmoid_logits, sigmoid_labels, label_smoothing=label_smoothing)
+
+      softmax_logits = tf.constant([[0.0, 100.0], [100.0, 0.0], [100.0, 0.0]])
+      softmax_labels = tf.constant([[0, 1], [1, 0], [0, 1]])
+      softmax_loss = tf.contrib.losses.softmax_cross_entropy(
+          softmax_logits, softmax_labels, label_smoothing=label_smoothing)
+      self.assertAlmostEqual(sigmoid_loss.eval(), softmax_loss.eval(), 3)
+

 class LogLossTest(tf.test.TestCase):