From 8dc98f73247b12062b7983d2235a0407e9aaf195 Mon Sep 17 00:00:00 2001 From: James Bernardi <33673759+j-bernardi@users.noreply.github.com> Date: Fri, 27 Nov 2020 00:20:33 +0000 Subject: [PATCH] Making label smoothing documentation more helpful At present, label_smoothing documentation is vague for both Categorical and BinaryCrossentropy losses. The CategoricalCrossentropy class' documentation is currently confusing, as smoothing is implemented in a non-intuitive way. It also appears to be referring to only 2 classes (as if it were BinaryCrossentropy). The new documentation states its functionality more generally, with an example. --- tensorflow/python/keras/losses.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index d739c16f116..24d77ffd254 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -640,9 +640,9 @@ class CategoricalCrossentropy(LossFunctionWrapper): default, we assume that `y_pred` encodes a probability distribution. **Note - Using from_logits=True is more numerically stable.** label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, - meaning the confidence on label values are relaxed. e.g. - `label_smoothing=0.2` means that we will use a value of `0.1` for label - `0` and `0.9` for label `1`" + meaning the confidence on label values are relaxed. For example, if + `0.1`, use `0.1 / num_classes` for non-target labels and + `0.9 + 0.1 / num_classes` for target labels. reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss. Default value is `AUTO`. `AUTO` indicates that the reduction option will be determined by the usage context. For almost all cases @@ -1518,7 +1518,9 @@ def categorical_crossentropy(y_true, y_pred: Tensor of predicted targets. from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. Returns: Categorical crossentropy loss value. @@ -1589,7 +1591,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by + squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing` + for the target class and `0.5 * label_smoothing` for the non-target class. Returns: Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.