diff --git a/tensorflow/contrib/metrics/BUILD b/tensorflow/contrib/metrics/BUILD index 89e84ca535f..e20e494b9eb 100644 --- a/tensorflow/contrib/metrics/BUILD +++ b/tensorflow/contrib/metrics/BUILD @@ -33,19 +33,6 @@ py_test( ], ) -py_test( - name = "confusion_matrix_ops_test", - size = "medium", - srcs = ["python/kernel_tests/confusion_matrix_ops_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":metrics_py", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:platform_test", - ], -) - py_test( name = "histogram_ops_test", size = "medium", diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index 3ad53655bc1..aaa1b62d5f7 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -133,7 +133,6 @@ labels and predictions tensors and results in a weighted average of the metric. @@auc_using_histogram @@accuracy -@@confusion_matrix @@aggregate_metrics @@aggregate_metric_map diff --git a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py index dd57f0478be..81bbe935e74 100644 --- a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py +++ b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py @@ -18,93 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.framework import tensor_util from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import confusion_matrix as cm -def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, +def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32, name=None, weights=None): - """Computes the confusion matrix from predictions and labels. - - Calculate the Confusion Matrix for a pair of prediction and - label 1-D int arrays. - - The matrix rows represent the prediction labels and the columns - represents the real labels. The confusion matrix is always a 2-D array - of shape `[n, n]`, where `n` is the number of valid labels for a given - classification task. Both prediction and labels must be 1-D arrays of - the same shape in order for this function to work. - - If `num_classes` is None, then `num_classes` will be set to the one plus - the maximum value in either predictions or labels. - Class labels are expected to start at 0. E.g., if `num_classes` was - three, then the possible labels would be `[0, 1, 2]`. - - If `weights` is not `None`, then each prediction contributes its - corresponding weight to the total value of the confusion matrix cell. - - For example: - - ```python - tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==> - [[0 0 0 0 0] - [0 0 1 0 0] - [0 0 1 0 0] - [0 0 0 0 0] - [0 0 0 0 1]] - ``` - - Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`, - resulting in a 5x5 confusion matrix. - - Args: - predictions: A 1-D array representing the predictions for a given - classification. - labels: A 1-D representing the real labels for the classification task. - num_classes: The possible number of labels the classification task can - have. If this value is not provided, it will be calculated - using both predictions and labels array. - dtype: Data type of the confusion matrix. - name: Scope name. - weights: An optional `Tensor` whose shape matches `predictions`. - - Returns: - A k X k matrix representing the confusion matrix, where k is the number of - possible labels in the classification task. - - Raises: - ValueError: If both predictions and labels are not 1-D vectors and have - mismatched shapes, or if `weights` is not `None` and its shape doesn't - match `predictions`. - """ - with ops.name_scope(name, 'confusion_matrix', - [predictions, labels, num_classes]) as name: - predictions, labels = tensor_util.remove_squeezable_dimensions( - ops.convert_to_tensor( - predictions, name='predictions'), - ops.convert_to_tensor(labels, name='labels')) - predictions = math_ops.cast(predictions, dtypes.int64) - labels = math_ops.cast(labels, dtypes.int64) - - if num_classes is None: - num_classes = math_ops.maximum(math_ops.reduce_max(predictions), - math_ops.reduce_max(labels)) + 1 - - if weights is not None: - predictions.get_shape().assert_is_compatible_with(weights.get_shape()) - weights = math_ops.cast(weights, dtype) - - shape = array_ops.pack([num_classes, num_classes]) - indices = array_ops.transpose(array_ops.pack([predictions, labels])) - values = (array_ops.ones_like(predictions, dtype) - if weights is None else weights) - cm_sparse = sparse_tensor.SparseTensor( - indices=indices, values=values, shape=math_ops.to_int64(shape)) - zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) - - return sparse_ops.sparse_add(zero_matrix, cm_sparse) + """Deprecated. Use tf.confusion_matrix instead.""" + return cm.confusion_matrix(labels=labels, predictions=predictions, + num_classes=num_classes, dtype=dtype, name=name, + weights=weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 172f6976cc9..d3f7c9018fe 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -25,7 +25,6 @@ from __future__ import print_function from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import tensor_util from tensorflow.contrib.framework.python.ops import variables as contrib_variables -from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops from tensorflow.contrib.metrics.python.ops import set_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -34,6 +33,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics from tensorflow.python.ops import nn from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import state_ops @@ -178,16 +178,10 @@ def streaming_true_positives(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'true_positives', (predictions, labels, weights)): - - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_true_positive = math_ops.logical_and(math_ops.equal(labels, 1), - math_ops.equal(predictions, 1)) - return _count_condition(is_true_positive, weights, metrics_collections, - updates_collections) + return metrics.true_positives( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_true_negatives(predictions, labels, weights=None, @@ -262,16 +256,10 @@ def streaming_false_positives(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'false_positives', (predictions, labels, weights)): - - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_false_positive = math_ops.logical_and(math_ops.equal(labels, 0), - math_ops.equal(predictions, 1)) - return _count_condition(is_false_positive, weights, metrics_collections, - updates_collections) + return metrics.false_positives( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_false_negatives(predictions, labels, weights=None, @@ -303,16 +291,10 @@ def streaming_false_negatives(predictions, labels, weights=None, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'false_negatives', (predictions, labels, weights)): - - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_false_negative = math_ops.logical_and(math_ops.equal(labels, 1), - math_ops.equal(predictions, 0)) - return _count_condition(is_false_negative, weights, metrics_collections, - updates_collections) + return metrics.false_negatives( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _broadcast_weights(weights, values): @@ -376,33 +358,9 @@ def streaming_mean(values, weights=None, metrics_collections=None, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope(name, 'mean', (values, weights)): - values = math_ops.to_float(values) - - total = _create_local('total', shape=[]) - count = _create_local('count', shape=[]) - - if weights is not None: - weights = math_ops.to_float(weights) - values = math_ops.mul(values, weights) - num_values = math_ops.reduce_sum(_broadcast_weights(weights, values)) - else: - num_values = math_ops.to_float(array_ops.size(values)) - - total_compute_op = state_ops.assign_add(total, math_ops.reduce_sum(values)) - count_compute_op = state_ops.assign_add(count, num_values) - - mean = _safe_div(total, count, 'value') - with ops.control_dependencies([total_compute_op, count_compute_op]): - update_op = _safe_div(total, count, 'update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, mean) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return mean, update_op + return metrics.mean( + values=values, weights=weights, metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_mean_tensor(values, weights=None, metrics_collections=None, @@ -445,36 +403,9 @@ def streaming_mean_tensor(values, weights=None, metrics_collections=None, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope(name, 'mean', (values, weights)): - total = _create_local('total_tensor', shape=values.get_shape()) - count = _create_local('count_tensor', shape=values.get_shape()) - - num_values = array_ops.ones_like(values) - if weights is not None: - weights = math_ops.to_float(weights) - values = math_ops.mul(values, weights) - num_values = math_ops.mul(num_values, weights) - - total_compute_op = state_ops.assign_add(total, values) - count_compute_op = state_ops.assign_add(count, num_values) - - def compute_mean(total, count, name): - non_zero_count = math_ops.maximum(count, - array_ops.ones_like(count), - name=name) - return math_ops.truediv(total, non_zero_count, name=name) - - mean = compute_mean(total, count, 'value') - with ops.control_dependencies([total_compute_op, count_compute_op]): - update_op = compute_mean(total, count, 'update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, mean) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return mean, update_op + return metrics.mean_tensor( + values=values, weights=weights, metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_accuracy(predictions, labels, weights=None, @@ -520,14 +451,10 @@ def streaming_accuracy(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights=weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - if labels.dtype != predictions.dtype: - predictions = math_ops.cast(predictions, labels.dtype) - is_correct = math_ops.to_float(math_ops.equal(predictions, labels)) - return streaming_mean(is_correct, weights, metrics_collections, - updates_collections, name or 'accuracy') + return metrics.accuracy( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_precision(predictions, labels, weights=None, @@ -572,39 +499,10 @@ def streaming_precision(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'precision', (predictions, labels, weights)): - - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - - true_positives, true_positives_update_op = streaming_true_positives( - predictions, labels, weights, metrics_collections=None, - updates_collections=None, name=None) - false_positives, false_positives_update_op = streaming_false_positives( - predictions, labels, weights, metrics_collections=None, - updates_collections=None, name=None) - - def compute_precision(name): - return array_ops.where( - math_ops.greater(true_positives + false_positives, 0), - math_ops.div(true_positives, true_positives + false_positives), - 0, - name) - - precision = compute_precision('value') - with ops.control_dependencies([true_positives_update_op, - false_positives_update_op]): - update_op = compute_precision('update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, precision) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return precision, update_op + return metrics.precision( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_recall(predictions, labels, weights=None, @@ -647,38 +545,10 @@ def streaming_recall(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'recall', (predictions, labels, weights)): - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - - true_positives, true_positives_update_op = streaming_true_positives( - predictions, labels, weights, metrics_collections=None, - updates_collections=None, name=None) - false_negatives, false_negatives_update_op = streaming_false_negatives( - predictions, labels, weights, metrics_collections=None, - updates_collections=None, name=None) - - def compute_recall(true_positives, false_negatives, name): - return array_ops.where( - math_ops.greater(true_positives + false_negatives, 0), - math_ops.div(true_positives, true_positives + false_negatives), - 0, - name) - - recall = compute_recall(true_positives, false_negatives, 'value') - with ops.control_dependencies([true_positives_update_op, - false_negatives_update_op]): - update_op = compute_recall(true_positives, false_negatives, 'update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, recall) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return recall, update_op + return metrics.recall( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _streaming_confusion_matrix_at_thresholds( @@ -903,50 +773,10 @@ def streaming_auc(predictions, labels, weights=None, num_thresholds=200, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'auc', (predictions, labels, weights)): - if curve != 'ROC' and curve != 'PR': - raise ValueError('curve must be either ROC or PR, %s unknown' % - (curve)) - kepsilon = 1e-7 # to account for floating point imprecisions - thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) - for i in range(num_thresholds-2)] - thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] - - values, update_ops = _streaming_confusion_matrix_at_thresholds( - predictions, labels, thresholds, weights) - - # Add epsilons to avoid dividing by 0. - epsilon = 1.0e-6 - def compute_auc(tp, fn, tn, fp, name): - """Computes the roc-auc or pr-auc based on confusion counts.""" - recall = math_ops.div(tp + epsilon, tp + fn + epsilon) - if curve == 'ROC': - fp_rate = math_ops.div(fp, fp + tn + epsilon) - x = fp_rate - y = recall - else: # curve == 'PR'. - precision = math_ops.div(tp + epsilon, tp + fp + epsilon) - x = recall - y = precision - return math_ops.reduce_sum(math_ops.mul( - x[:num_thresholds - 1] - x[1:], - (y[:num_thresholds - 1] + y[1:]) / 2.), name=name) - - # sum up the areas of all the trapeziums - auc = compute_auc( - values['tp'], values['fn'], values['tn'], values['fp'], 'value') - update_op = compute_auc( - update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'], - 'update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, auc) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return auc, update_op + return metrics.auc( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, num_thresholds=num_thresholds, + curve=curve, updates_collections=updates_collections, name=name) def streaming_specificity_at_sensitivity( @@ -998,60 +828,11 @@ def streaming_specificity_at_sensitivity( `sensitivity` is not between 0 and 1, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - if sensitivity < 0 or sensitivity > 1: - raise ValueError('`sensitivity` must be in the range [0, 1].') - - with variable_scope.variable_scope(name, 'specificity_at_sensitivity', - (predictions, labels, weights)): - kepsilon = 1e-7 # to account for floating point imprecisions - thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) - for i in range(num_thresholds-2)] - thresholds = [0.0 - kepsilon] + thresholds + [1.0 - kepsilon] - - values, update_ops = _streaming_confusion_matrix_at_thresholds( - predictions, labels, thresholds, weights) - tp = values['tp'] - fn = values['fn'] - tn = values['tn'] - fp = values['fp'] - - def compute_specificity_at_sensitivity(name): - """Computes the specificity at the given sensitivity. - - Args: - name: The name of the operation. - - Returns: - The specificity using the aggregated values. - """ - sensitivities = math_ops.div(tp, tp + fn + kepsilon) - - # We'll need to use this trick until tf.argmax allows us to specify - # whether we should use the first or last index in case of ties. - min_val = math_ops.reduce_min(math_ops.abs(sensitivities - sensitivity)) - indices_at_minval = math_ops.equal( - math_ops.abs(sensitivities - sensitivity), min_val) - indices_at_minval = math_ops.to_int64(indices_at_minval) - indices_at_minval = math_ops.cumsum(indices_at_minval) - tf_index = math_ops.argmax(indices_at_minval, 0) - tf_index = math_ops.cast(tf_index, dtypes.int32) - - # Now, we have the implicit threshold, so compute the specificity: - return math_ops.div(tn[tf_index], - tn[tf_index] + fp[tf_index] + kepsilon, - name) - - specificity = compute_specificity_at_sensitivity('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_specificity_at_sensitivity('update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, specificity) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return specificity, update_op + return metrics.specificity_at_sensitivity( + sensitivity=sensitivity, num_thresholds=num_thresholds, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_sensitivity_at_specificity( @@ -1103,44 +884,11 @@ def streaming_sensitivity_at_specificity( `specificity` is not between 0 and 1, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - if specificity < 0 or specificity > 1: - raise ValueError('`specificity` must be in the range [0, 1].') - - with variable_scope.variable_scope(name, 'sensitivity_at_specificity', - (predictions, labels, weights)): - kepsilon = 1e-7 # to account for floating point imprecisions - thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) - for i in range(num_thresholds-2)] - thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] - - values, update_ops = _streaming_confusion_matrix_at_thresholds( - predictions, labels, thresholds, weights) - tp = values['tp'] - fn = values['fn'] - tn = values['tn'] - fp = values['fp'] - - def compute_sensitivity_at_specificity(name): - specificities = math_ops.div(tn, tn + fp + kepsilon) - tf_index = math_ops.argmin(math_ops.abs(specificities - specificity), 0) - tf_index = math_ops.cast(tf_index, dtypes.int32) - - # Now, we have the implicit threshold, so compute the sensitivity: - return math_ops.div(tp[tf_index], - tp[tf_index] + fn[tf_index] + kepsilon, - name) - - sensitivity = compute_sensitivity_at_specificity('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_sensitivity_at_specificity('update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, sensitivity) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return sensitivity, update_op + return metrics.sensitivity_at_specificity( + specificity=specificity, num_thresholds=num_thresholds, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_precision_at_thresholds(predictions, labels, thresholds, @@ -1187,29 +935,11 @@ def streaming_precision_at_thresholds(predictions, labels, thresholds, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope(name, 'precision_at_thresholds', - (predictions, labels, weights)): - values, update_ops = _streaming_confusion_matrix_at_thresholds( - predictions, labels, thresholds, weights, includes=('tp', 'fp')) - tp = values['tp'] - fp = values['fp'] - - # Avoid division by zero. - epsilon = 1e-7 - def compute_precision(name): - return math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) - - precision = compute_precision('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_precision('update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, precision) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return precision, update_op + return metrics.precision_at_thresholds( + thresholds=thresholds, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_recall_at_thresholds(predictions, labels, thresholds, @@ -1253,29 +983,11 @@ def streaming_recall_at_thresholds(predictions, labels, thresholds, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope(name, 'recall_at_thresholds', - (predictions, labels, weights)): - values, update_ops = _streaming_confusion_matrix_at_thresholds( - predictions, labels, thresholds, weights, includes=('tp', 'fn')) - tp = values['tp'] - fn = values['fn'] - - # Avoid division by zero. - epsilon = 1e-7 - def compute_recall(name): - return math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) - - recall = compute_recall('value') - with ops.control_dependencies(update_ops.values()): - update_op = compute_recall('update_op') - - if metrics_collections: - ops.add_to_collections(metrics_collections, recall) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return recall, update_op + return metrics.recall_at_thresholds( + thresholds=thresholds, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _at_k_name(name, k=None, class_id=None): @@ -1413,25 +1125,11 @@ def streaming_sparse_recall_at_k(predictions, `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - default_name = _at_k_name('recall', k, class_id=class_id) - with ops.name_scope(name, default_name, (predictions, labels)) as scope: - _, top_k_idx = nn.top_k(predictions, k) - top_k_idx = math_ops.to_int64(top_k_idx) - tp, tp_update = _streaming_sparse_true_positive_at_k( - predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, - weights=weights) - fn, fn_update = _streaming_sparse_false_negative_at_k( - predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, - weights=weights) - - metric = math_ops.div(tp, math_ops.add(tp, fn), name=scope) - update = math_ops.div( - tp_update, math_ops.add(tp_update, fn_update), name='update') - if metrics_collections: - ops.add_to_collections(metrics_collections, metric) - if updates_collections: - ops.add_to_collections(updates_collections, update) - return metric, update + return metrics.recall_at_k( + k=k, class_id=class_id, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _streaming_sparse_precision_at_k(top_k_idx, @@ -1575,19 +1273,11 @@ def streaming_sparse_precision_at_k(predictions, `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - default_name = _at_k_name('precision', k, class_id=class_id) - with ops.name_scope(name, default_name, - (predictions, labels, weights)) as scope: - _, top_k_idx = nn.top_k(predictions, k) - return _streaming_sparse_precision_at_k( - top_k_idx=top_k_idx, - labels=labels, - k=k, - class_id=class_id, - weights=weights, - metrics_collections=metrics_collections, - updates_collections=updates_collections, - name=scope) + return metrics.sparse_precision_at_k( + k=k, class_id=class_id, + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) # TODO(ptucker): Validate range of values in labels? @@ -1918,50 +1608,10 @@ def streaming_sparse_average_precision_at_k(predictions, update: `Operation` that increments variables appropriately, and whose value matches `metric`. """ - default_name = _at_k_name('average_precision', k) - with ops.name_scope(name, default_name, (predictions, labels)) as scope: - # Calculate per-example average precision, and apply weights. - average_precision = sparse_average_precision_at_k( - predictions=predictions, labels=labels, k=k) - if weights is not None: - weights = math_ops.to_double(weights) - average_precision = math_ops.mul(average_precision, weights) - - # Create accumulation variables and update ops for max average precision and - # total average precision. - with ops.name_scope(None, 'max', (average_precision,)) as max_scope: - # `max` is the max possible precision. Since max for any row is 1.0: - # - For the unweighted case, this is just the number of rows. - # - For the weighted case, it's the sum of the weights broadcast across - # `average_precision` rows. - max_var = contrib_variables.local_variable( - array_ops.zeros([], dtype=dtypes.float64), name=max_scope) - if weights is None: - batch_max = math_ops.to_double( - array_ops.size(average_precision, name='batch_max')) - else: - # TODO(ptucker): More efficient way to broadcast? - broadcast_weights = math_ops.mul( - weights, array_ops.ones_like(average_precision), - name='broadcast_weights') - batch_max = math_ops.reduce_sum(broadcast_weights, name='batch_max') - max_update = state_ops.assign_add(max_var, batch_max, name='update') - with ops.name_scope(None, 'total', (average_precision,)) as total_scope: - total_var = contrib_variables.local_variable( - array_ops.zeros([], dtype=dtypes.float64), name=total_scope) - batch_total = math_ops.reduce_sum(average_precision, name='batch_total') - total_update = state_ops.assign_add(total_var, batch_total, name='update') - - # Divide total by max to get mean, for both vars and the update ops. - mean_average_precision = _safe_scalar_div(total_var, max_var, name='mean') - update = _safe_scalar_div(total_update, max_update, name=scope) - - if metrics_collections: - ops.add_to_collections(metrics_collections, mean_average_precision) - if updates_collections: - ops.add_to_collections(updates_collections, update) - - return mean_average_precision, update + return metrics.sparse_average_precision_at_k( + k=k, predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _select_class_id(ids, selected_id): @@ -2329,12 +1979,10 @@ def streaming_mean_absolute_error(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - absolute_errors = math_ops.abs(predictions - labels) - return streaming_mean(absolute_errors, weights, metrics_collections, - updates_collections, name or 'mean_absolute_error') + return metrics.mean_absolute_error( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_mean_relative_error(predictions, labels, normalizer, weights=None, @@ -2382,19 +2030,10 @@ def streaming_mean_relative_error(predictions, labels, normalizer, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - - predictions, normalizer = tensor_util.remove_squeezable_dimensions( - predictions, normalizer) - predictions.get_shape().assert_is_compatible_with(normalizer.get_shape()) - relative_errors = array_ops.where( - math_ops.equal(normalizer, 0.0), - array_ops.zeros_like(labels), - math_ops.div(math_ops.abs(labels - predictions), normalizer)) - return streaming_mean(relative_errors, weights, metrics_collections, - updates_collections, name or 'mean_relative_error') + return metrics.mean_relative_error( + normalizer=normalizer, predictions=predictions, labels=labels, + weights=weights, metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_mean_squared_error(predictions, labels, weights=None, @@ -2441,12 +2080,10 @@ def streaming_mean_squared_error(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - squared_error = math_ops.square(labels - predictions) - return streaming_mean(squared_error, weights, metrics_collections, - updates_collections, name or 'mean_squared_error') + return metrics.mean_squared_error( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_root_mean_squared_error(predictions, labels, weights=None, @@ -2493,24 +2130,10 @@ def streaming_root_mean_squared_error(predictions, labels, weights=None, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - predictions, labels, weights = _remove_squeezable_dimensions( - predictions, labels, weights) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - value_tensor, update_op = streaming_mean_squared_error( - predictions, labels, weights, None, None, - name or 'root_mean_squared_error') - - root_mean_squared_error = math_ops.sqrt(value_tensor) - with ops.control_dependencies([update_op]): - update_op = math_ops.sqrt(update_op) - - if metrics_collections: - ops.add_to_collections(metrics_collections, root_mean_squared_error) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return root_mean_squared_error, update_op + return metrics.root_mean_squared_error( + predictions=predictions, labels=labels, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_covariance(predictions, @@ -2825,12 +2448,10 @@ def streaming_percentage_less(values, threshold, weights=None, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ - is_below_threshold = math_ops.to_float(math_ops.less(values, threshold)) - return streaming_mean(is_below_threshold, - weights, - metrics_collections, - updates_collections, - name or 'percentage_below_threshold') + return metrics.percentage_below( + values=values, threshold=threshold, weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def streaming_mean_iou(predictions, @@ -2881,65 +2502,10 @@ def streaming_mean_iou(predictions, either `metrics_collections` or `updates_collections` are not a list or tuple. """ - with variable_scope.variable_scope( - name, 'mean_iou', (predictions, labels, weights)): - # Check if shape is compatible. - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - - # Local variable to accumulate the predictions in the confusion matrix. - cm_dtype = dtypes.int64 if weights is not None else dtypes.float64 - total_cm = _create_local('total_confusion_matrix', - shape=[num_classes, num_classes], dtype=cm_dtype) - - # Cast the type to int64 required by confusion_matrix_ops. - predictions = math_ops.to_int64(predictions) - labels = math_ops.to_int64(labels) - num_classes = math_ops.to_int64(num_classes) - - # Flatten the input if its rank > 1. - predictions_rank = predictions.get_shape().ndims - if predictions_rank > 1: - predictions = array_ops.reshape(predictions, [-1]) - - labels_rank = labels.get_shape().ndims - if labels_rank > 1: - labels = array_ops.reshape(labels, [-1]) - - if weights is not None: - weights_rank = weights.get_shape().ndims - if weights_rank > 1: - weights = array_ops.reshape(weights, [-1]) - - # Accumulate the prediction to current confusion matrix. - current_cm = confusion_matrix_ops.confusion_matrix( - predictions, labels, num_classes, weights=weights, dtype=cm_dtype) - update_op = state_ops.assign_add(total_cm, current_cm) - - def compute_mean_iou(name): - """Compute the mean intersection-over-union via the confusion matrix.""" - sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) - sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) - cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) - denominator = sum_over_row + sum_over_col - cm_diag - - # If the value of the denominator is 0, set it to 1 to avoid - # zero division. - denominator = array_ops.where( - math_ops.greater(denominator, 0), - denominator, - array_ops.ones_like(denominator)) - iou = math_ops.div(cm_diag, denominator) - return math_ops.reduce_mean(iou, name=name) - - mean_iou = compute_mean_iou('mean_iou') - - if metrics_collections: - ops.add_to_collections(metrics_collections, mean_iou) - - if updates_collections: - ops.add_to_collections(updates_collections, update_op) - - return mean_iou, update_op + return metrics.mean_iou( + num_classes=num_classes, predictions=predictions, labels=labels, + weights=weights, metrics_collections=metrics_collections, + updates_collections=updates_collections, name=name) def _next_array_size(required_size, growth_factor=1.5): diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a97898d1f5e..0d305352077 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -39,6 +39,7 @@ py_library( ":platform", ":platform_test", ":summary", + ":metrics", ":layers", ":training", ":ops", @@ -1312,6 +1313,39 @@ py_library( ], ) +py_library( + name = "confusion_matrix", + srcs = ["ops/confusion_matrix.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":control_flow_ops", + ":framework", + ":math_ops", + ":sparse_ops", + ], +) + +py_library( + name = "metrics", + srcs = ["ops/metrics.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":check_ops", + ":confusion_matrix", + ":control_flow_ops", + ":framework", + ":math_ops", + ":nn", + ":sets", + ":sparse_ops", + ":state_ops", + ":variable_scope", + ":variables", + ], +) + py_library( name = "special_math_ops", srcs = ["ops/special_math_ops.py"], @@ -1334,6 +1368,7 @@ py_library( ":array_ops", ":check_ops", ":clip_ops", + ":confusion_matrix", ":control_flow_ops", ":data_flow_grad", ":data_flow_ops", diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index e323c9b6a4d..8f94fb4c9dd 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -83,6 +83,7 @@ from tensorflow.python.ops.standard_ops import * # Bring in subpackages. from tensorflow.python.layers import layers +from tensorflow.python.ops import metrics from tensorflow.python.ops import nn from tensorflow.python.ops import resources from tensorflow.python.ops import sdca_ops as sdca @@ -118,6 +119,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import framework_lib from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops +from tensorflow.python.ops import confusion_matrix as confusion_matrix_m from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import histogram_ops @@ -220,6 +222,7 @@ _allowed_symbols.extend([ 'image', 'logging', 'losses', + 'metrics', 'newaxis', 'nn', 'python_io', @@ -246,10 +249,10 @@ _allowed_symbols.extend([ # referenced in the whitelist. remove_undocumented(__name__, _allowed_symbols, [framework_lib, array_ops, client_lib, check_ops, - compat, constant_op, control_flow_ops, functional_ops, - histogram_ops, io_ops, losses, math_ops, nn, - resource_loader, resources, sets, script_ops, session_ops, - sparse_ops, state_ops, string_ops, summary, + compat, constant_op, control_flow_ops, confusion_matrix_m, + functional_ops, histogram_ops, io_ops, losses, math_ops, + metrics, nn, resource_loader, resources, sets, script_ops, + session_ops, sparse_ops, state_ops, string_ops, summary, tensor_array_ops, train, layers]) # Special dunders that we choose to export: diff --git a/tensorflow/python/framework/gen_docs_combined.py b/tensorflow/python/framework/gen_docs_combined.py index c2d97b3496d..3ede5403e1f 100644 --- a/tensorflow/python/framework/gen_docs_combined.py +++ b/tensorflow/python/framework/gen_docs_combined.py @@ -260,6 +260,7 @@ EXCLUDE = frozenset(["tf.contrib.learn.monitors.NanLossDuringTrainingError", "tf.contrib.framework.get_global_step", "tf.contrib.learn.NanLossDuringTrainingError", "tf.contrib.layers.stack", + "tf.confusion_matrix", "tf.nn.rnn_cell.RNNCell", "tf.nn.rnn_cell.BasicRNNCell", "tf.nn.rnn_cell.BasicLSTMCell", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 3185b1fd064..e825e593250 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1385,6 +1385,21 @@ tf_py_test( additional_deps = ["//tensorflow:tensorflow_py"], ) +tf_py_test( + name = "metrics_test", + size = "small", + srcs = ["metrics_test.py"], + additional_deps = ["//tensorflow:tensorflow_py"], + shard_count = 3, +) + +tf_py_test( + name = "confusion_matrix_test", + size = "small", + srcs = ["confusion_matrix_test.py"], + additional_deps = ["//tensorflow:tensorflow_py"], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py similarity index 88% rename from tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py rename to tensorflow/python/kernel_tests/confusion_matrix_test.py index a81ef6f9a2a..ff1231de42a 100644 --- a/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py +++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py @@ -28,8 +28,8 @@ class ConfusionMatrixTest(tf.test.TestCase): def _testConfMatrix(self, predictions, labels, truth, weights=None): with self.test_session(): dtype = predictions.dtype - ans = tf.contrib.metrics.confusion_matrix( - predictions, labels, dtype=dtype, weights=weights) + ans = tf.confusion_matrix( + labels, predictions, dtype=dtype, weights=weights) tf_ans = ans.eval() self.assertAllClose(tf_ans, truth, atol=1e-10) self.assertEqual(tf_ans.dtype, dtype) @@ -69,8 +69,8 @@ class ConfusionMatrixTest(tf.test.TestCase): lab = tf.concat(0, [tf.zeros([20], dtype=tf_dtype), tf.ones([20], dtype=tf_dtype)]) - cm = tf.contrib.metrics.confusion_matrix( - data, lab, dtype=tf_dtype, num_classes=2) + cm = tf.confusion_matrix( + lab, data, dtype=tf_dtype, num_classes=2) d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0, m_pos: 1.0, @@ -157,28 +157,28 @@ class ConfusionMatrixTest(tf.test.TestCase): predictions = np.asarray([[1, 2, 3]]) labels = np.asarray([1, 2, 3]) self.assertRaisesRegexp(ValueError, "an not squeeze dim", - tf.contrib.metrics.confusion_matrix, predictions, - labels) + tf.confusion_matrix, + predictions, labels) predictions = np.asarray([1, 2, 3]) labels = np.asarray([[1, 2, 3]]) self.assertRaisesRegexp(ValueError, "an not squeeze dim", - tf.contrib.metrics.confusion_matrix, predictions, - labels) + tf.confusion_matrix, + predictions, labels) def testInputDifferentSize(self): predictions = np.asarray([1, 2, 3]) labels = np.asarray([1, 2]) self.assertRaisesRegexp(ValueError, "must be equal", - tf.contrib.metrics.confusion_matrix, predictions, - labels) + tf.confusion_matrix, + predictions, labels) def testOutputIsInt32(self): predictions = np.arange(2) labels = np.arange(2) with self.test_session(): - cm = tf.contrib.metrics.confusion_matrix( - predictions, labels, dtype=dtypes.int32) + cm = tf.confusion_matrix( + labels, predictions, dtype=dtypes.int32) tf_cm = cm.eval() self.assertEqual(tf_cm.dtype, np.int32) @@ -186,8 +186,8 @@ class ConfusionMatrixTest(tf.test.TestCase): predictions = np.arange(2) labels = np.arange(2) with self.test_session(): - cm = tf.contrib.metrics.confusion_matrix( - predictions, labels, dtype=dtypes.int64) + cm = tf.confusion_matrix( + labels, predictions, dtype=dtypes.int64) tf_cm = cm.eval() self.assertEqual(tf_cm.dtype, np.int64) diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py new file mode 100644 index 00000000000..28b1811805b --- /dev/null +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -0,0 +1,3360 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for metrics.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +from tensorflow.python.ops import metrics + +NAN = float('nan') + + +def _enqueue_vector(sess, queue, values, shape=None): + if not shape: + shape = (1, len(values)) + dtype = queue.dtypes[0] + sess.run(queue.enqueue(tf.constant(values, dtype=dtype, shape=shape))) + + +def _binary_2d_label_to_sparse_value(labels): + """Convert dense 2D binary indicator tensor to sparse tensor. + + Only 1 values in `labels` are included in result. + + Args: + labels: Dense 2D binary indicator tensor. + + Returns: + `SparseTensorValue` whose values are indices along the last dimension of + `labels`. + """ + indices = [] + values = [] + batch = 0 + for row in labels: + label = 0 + xi = 0 + for x in row: + if x == 1: + indices.append([batch, xi]) + values.append(label) + xi += 1 + else: + assert x == 0 + label += 1 + batch += 1 + shape = [len(labels), len(labels[0])] + return tf.SparseTensorValue( + np.array(indices, np.int64), + np.array(values, np.int64), + np.array(shape, np.int64)) + + +def _binary_2d_label_to_sparse(labels): + """Convert dense 2D binary indicator tensor to sparse tensor. + + Only 1 values in `labels` are included in result. + + Args: + labels: Dense 2D binary indicator tensor. + + Returns: + `SparseTensor` whose values are indices along the last dimension of + `labels`. + """ + return tf.SparseTensor.from_value(_binary_2d_label_to_sparse_value(labels)) + + +def _binary_3d_label_to_sparse_value(labels): + """Convert dense 3D binary indicator tensor to sparse tensor. + + Only 1 values in `labels` are included in result. + + Args: + labels: Dense 2D binary indicator tensor. + + Returns: + `SparseTensorValue` whose values are indices along the last dimension of + `labels`. + """ + indices = [] + values = [] + for d0, labels_d0 in enumerate(labels): + for d1, labels_d1 in enumerate(labels_d0): + d2 = 0 + for class_id, label in enumerate(labels_d1): + if label == 1: + values.append(class_id) + indices.append([d0, d1, d2]) + d2 += 1 + else: + assert label == 0 + shape = [len(labels), len(labels[0]), len(labels[0][0])] + return tf.SparseTensorValue( + np.array(indices, np.int64), + np.array(values, np.int64), + np.array(shape, np.int64)) + + +def _binary_3d_label_to_sparse(labels): + """Convert dense 3D binary indicator tensor to sparse tensor. + + Only 1 values in `labels` are included in result. + + Args: + labels: Dense 2D binary indicator tensor. + + Returns: + `SparseTensor` whose values are indices along the last dimension of + `labels`. + """ + return tf.SparseTensor.from_value(_binary_3d_label_to_sparse_value(labels)) + + +def _assert_nan(test_case, actual): + test_case.assertTrue(math.isnan(actual), 'Expected NAN, got %s.' % actual) + + +def _assert_local_variables(test_case, expected): + test_case.assertEquals( + set(expected), set(v.name for v in tf.local_variables())) + + +class MeanTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean(tf.ones([4, 3])) + _assert_local_variables(self, ('mean/count:0', 'mean/total:0')) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean( + tf.ones([4, 3]), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean( + tf.ones([4, 3]), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testBasic(self): + with self.test_session() as sess: + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + mean, update_op = metrics.mean(values) + + sess.run(tf.local_variables_initializer()) + for _ in range(4): + sess.run(update_op) + self.assertAlmostEqual(1.65, sess.run(mean), 5) + + def testUpdateOpsReturnsCurrentValue(self): + with self.test_session() as sess: + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + mean, update_op = metrics.mean(values) + + sess.run(tf.local_variables_initializer()) + + self.assertAlmostEqual(0.5, sess.run(update_op), 5) + self.assertAlmostEqual(1.475, sess.run(update_op), 5) + self.assertAlmostEqual(12.4/6.0, sess.run(update_op), 5) + self.assertAlmostEqual(1.65, sess.run(update_op), 5) + + self.assertAlmostEqual(1.65, sess.run(mean), 5) + + def test1dWeightedValues(self): + with self.test_session() as sess: + # Create the queue that populates the values. + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + # Create the queue that populates the weighted labels. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, weights_queue, [1]) + _enqueue_vector(sess, weights_queue, [0]) + _enqueue_vector(sess, weights_queue, [0]) + _enqueue_vector(sess, weights_queue, [1]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean(values, weights) + + tf.local_variables_initializer().run() + for _ in range(4): + update_op.eval() + self.assertAlmostEqual((0 + 1 - 3.2 + 4.0) / 4.0, mean.eval(), 5) + + def test1dWeightedValues_placeholders(self): + with self.test_session() as sess: + # Create the queue that populates the values. + feed_values = ( + (0, 1), + (-4.2, 9.1), + (6.5, 0), + (-3.2, 4.0) + ) + values = tf.placeholder(dtype=tf.float32) + + # Create the queue that populates the weighted labels. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, weights_queue, [1]) + _enqueue_vector(sess, weights_queue, [0]) + _enqueue_vector(sess, weights_queue, [0]) + _enqueue_vector(sess, weights_queue, [1]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean(values, weights) + + tf.local_variables_initializer().run() + for i in range(4): + update_op.eval(feed_dict={values: feed_values[i]}) + self.assertAlmostEqual((0 + 1 - 3.2 + 4.0) / 4.0, mean.eval(), 5) + + def test2dWeightedValues(self): + with self.test_session() as sess: + # Create the queue that populates the values. + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + # Create the queue that populates the weighted labels. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, weights_queue, [1, 1]) + _enqueue_vector(sess, weights_queue, [1, 0]) + _enqueue_vector(sess, weights_queue, [0, 1]) + _enqueue_vector(sess, weights_queue, [0, 0]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean(values, weights) + + tf.local_variables_initializer().run() + for _ in range(4): + update_op.eval() + self.assertAlmostEqual((0 + 1 - 4.2 + 0) / 4.0, mean.eval(), 5) + + def test2dWeightedValues_placeholders(self): + with self.test_session() as sess: + # Create the queue that populates the values. + feed_values = ( + (0, 1), + (-4.2, 9.1), + (6.5, 0), + (-3.2, 4.0) + ) + values = tf.placeholder(dtype=tf.float32) + + # Create the queue that populates the weighted labels. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, weights_queue, [1, 1]) + _enqueue_vector(sess, weights_queue, [1, 0]) + _enqueue_vector(sess, weights_queue, [0, 1]) + _enqueue_vector(sess, weights_queue, [0, 0]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean(values, weights) + + tf.local_variables_initializer().run() + for i in range(4): + update_op.eval(feed_dict={values: feed_values[i]}) + self.assertAlmostEqual((0 + 1 - 4.2 + 0) / 4.0, mean.eval(), 5) + + +class StreamingMeanTensorTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean_tensor(tf.ones([4, 3])) + _assert_local_variables(self, ( + 'mean/total_tensor:0', 'mean/count_tensor:0')) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean_tensor( + tf.ones([4, 3]), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_tensor( + tf.ones([4, 3]), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testBasic(self): + with self.test_session() as sess: + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values) + + sess.run(tf.local_variables_initializer()) + for _ in range(4): + sess.run(update_op) + self.assertAllClose([[-0.9/4., 3.525]], sess.run(mean)) + + def testMultiDimensional(self): + with self.test_session() as sess: + values_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(2, 2, 2)) + _enqueue_vector(sess, + values_queue, + [[[1, 2], [1, 2]], [[1, 2], [1, 2]]], + shape=(2, 2, 2)) + _enqueue_vector(sess, + values_queue, + [[[1, 2], [1, 2]], [[3, 4], [9, 10]]], + shape=(2, 2, 2)) + values = values_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values) + + sess.run(tf.local_variables_initializer()) + for _ in range(2): + sess.run(update_op) + self.assertAllClose([[[1, 2], [1, 2]], [[2, 3], [5, 6]]], + sess.run(mean)) + + def testUpdateOpsReturnsCurrentValue(self): + with self.test_session() as sess: + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values) + + sess.run(tf.local_variables_initializer()) + + self.assertAllClose([[0, 1]], sess.run(update_op), 5) + self.assertAllClose([[-2.1, 5.05]], sess.run(update_op), 5) + self.assertAllClose([[2.3/3., 10.1/3.]], sess.run(update_op), 5) + self.assertAllClose([[-0.9/4., 3.525]], sess.run(update_op), 5) + + self.assertAllClose([[-0.9/4., 3.525]], sess.run(mean), 5) + + def testWeighted1d(self): + with self.test_session() as sess: + # Create the queue that populates the values. + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + # Create the queue that populates the weights. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, weights_queue, [[1]]) + _enqueue_vector(sess, weights_queue, [[0]]) + _enqueue_vector(sess, weights_queue, [[1]]) + _enqueue_vector(sess, weights_queue, [[0]]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values, weights) + + sess.run(tf.local_variables_initializer()) + for _ in range(4): + sess.run(update_op) + self.assertAllClose([[3.25, 0.5]], sess.run(mean), 5) + + def testWeighted2d_1(self): + with self.test_session() as sess: + # Create the queue that populates the values. + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + # Create the queue that populates the weights. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, weights_queue, [1, 1]) + _enqueue_vector(sess, weights_queue, [1, 0]) + _enqueue_vector(sess, weights_queue, [0, 1]) + _enqueue_vector(sess, weights_queue, [0, 0]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values, weights) + + sess.run(tf.local_variables_initializer()) + for _ in range(4): + sess.run(update_op) + self.assertAllClose([[-2.1, 0.5]], sess.run(mean), 5) + + def testWeighted2d_2(self): + with self.test_session() as sess: + # Create the queue that populates the values. + values_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, values_queue, [0, 1]) + _enqueue_vector(sess, values_queue, [-4.2, 9.1]) + _enqueue_vector(sess, values_queue, [6.5, 0]) + _enqueue_vector(sess, values_queue, [-3.2, 4.0]) + values = values_queue.dequeue() + + # Create the queue that populates the weights. + weights_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 2)) + _enqueue_vector(sess, weights_queue, [0, 1]) + _enqueue_vector(sess, weights_queue, [0, 0]) + _enqueue_vector(sess, weights_queue, [0, 1]) + _enqueue_vector(sess, weights_queue, [0, 0]) + weights = weights_queue.dequeue() + + mean, update_op = metrics.mean_tensor(values, weights) + + sess.run(tf.local_variables_initializer()) + for _ in range(4): + sess.run(update_op) + self.assertAllClose([[0, 0.5]], sess.run(mean), 5) + + +class AccuracyTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.accuracy( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1)), + name='my_accuracy') + _assert_local_variables(self, ( + 'my_accuracy/count:0', 'my_accuracy/total:0')) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.accuracy( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.accuracy( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testPredictionsAndLabelsOfDifferentSizeRaisesValueError(self): + predictions = tf.ones((10, 3)) + labels = tf.ones((10, 4)) + with self.assertRaises(ValueError): + metrics.accuracy(labels, predictions) + + def testPredictionsAndWeightsOfDifferentSizeRaisesValueError(self): + predictions = tf.ones((10, 3)) + labels = tf.ones((10, 3)) + weights = tf.ones((9, 3)) + with self.assertRaises(ValueError): + metrics.accuracy(labels, predictions, weights) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=3, dtype=tf.int64, seed=1) + labels = tf.random_uniform((10, 3), maxval=3, dtype=tf.int64, seed=1) + accuracy, update_op = metrics.accuracy( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_accuracy = accuracy.eval() + for _ in range(10): + self.assertEqual(initial_accuracy, accuracy.eval()) + + def testMultipleUpdates(self): + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [2]) + _enqueue_vector(sess, preds_queue, [1]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [2]) + labels = labels_queue.dequeue() + + accuracy, update_op = metrics.accuracy( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + for _ in xrange(3): + sess.run(update_op) + self.assertEqual(0.5, sess.run(update_op)) + self.assertEqual(0.5, accuracy.eval()) + + def testEffectivelyEquivalentSizes(self): + predictions = tf.ones((40, 1)) + labels = tf.ones((40,)) + with self.test_session() as sess: + accuracy, update_op = metrics.accuracy( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertEqual(1.0, update_op.eval()) + self.assertEqual(1.0, accuracy.eval()) + + def testEffectivelyEquivalentSizesWithStaicShapedWeight(self): + predictions = tf.convert_to_tensor([1, 1, 1]) # shape 3, + labels = tf.expand_dims(tf.convert_to_tensor([1, 0, 0]), 1) # shape 3, 1 + weights = tf.expand_dims(tf.convert_to_tensor([100, 1, 1]), 1) # shape 3, 1 + + with self.test_session() as sess: + accuracy, update_op = metrics.accuracy( + labels, predictions, weights) + + sess.run(tf.local_variables_initializer()) + # if streaming_accuracy does not flatten the weight, accuracy would be + # 0.33333334 due to an intended broadcast of weight. Due to flattening, + # it will be higher than .95 + self.assertGreater(update_op.eval(), .95) + self.assertGreater(accuracy.eval(), .95) + + def testEffectivelyEquivalentSizesWithDynamicallyShapedWeight(self): + predictions = tf.convert_to_tensor([1, 1, 1]) # shape 3, + labels = tf.expand_dims(tf.convert_to_tensor([1, 0, 0]), 1) # shape 3, 1 + + weights = [[100], [1], [1]] # shape 3, 1 + weights_placeholder = tf.placeholder(dtype=tf.int32, name='weights') + feed_dict = {weights_placeholder: weights} + + with self.test_session() as sess: + accuracy, update_op = metrics.accuracy( + labels, predictions, weights_placeholder) + + sess.run(tf.local_variables_initializer()) + # if streaming_accuracy does not flatten the weight, accuracy would be + # 0.33333334 due to an intended broadcast of weight. Due to flattening, + # it will be higher than .95 + self.assertGreater(update_op.eval(feed_dict=feed_dict), .95) + self.assertGreater(accuracy.eval(feed_dict=feed_dict), .95) + + def testMultipleUpdatesWithWeightedValues(self): + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [2]) + _enqueue_vector(sess, preds_queue, [1]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(4, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [2]) + labels = labels_queue.dequeue() + + # Create the queue that populates the weights. + weights_queue = tf.FIFOQueue(4, dtypes=tf.int64, shapes=(1, 1)) + _enqueue_vector(sess, weights_queue, [1]) + _enqueue_vector(sess, weights_queue, [1]) + _enqueue_vector(sess, weights_queue, [0]) + _enqueue_vector(sess, weights_queue, [0]) + weights = weights_queue.dequeue() + + accuracy, update_op = metrics.accuracy( + labels, predictions, weights) + + sess.run(tf.local_variables_initializer()) + for _ in xrange(3): + sess.run(update_op) + self.assertEqual(1.0, sess.run(update_op)) + self.assertEqual(1.0, accuracy.eval()) + + +class PrecisionTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.precision( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'precision/false_positives/count:0', + 'precision/true_positives/count:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.precision( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.precision( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + labels = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + precision, update_op = metrics.precision( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_precision = precision.eval() + for _ in range(10): + self.assertEqual(initial_precision, precision.eval()) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(inputs) + labels = tf.constant(inputs) + precision, update_op = metrics.precision( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1, sess.run(update_op)) + self.assertAlmostEqual(1, precision.eval()) + + def testSomeCorrect(self): + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4)) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + precision, update_op = metrics.precision( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.5, update_op.eval()) + self.assertAlmostEqual(0.5, precision.eval()) + + def testWeighted1d(self): + predictions = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) + labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + precision, update_op = metrics.precision( + labels, predictions, weights=tf.constant([[2], [5]])) + + with self.test_session(): + tf.local_variables_initializer().run() + weighted_tp = 2.0 + 5.0 + weighted_positives = (2.0 + 2.0) + (5.0 + 5.0) + expected_precision = weighted_tp / weighted_positives + self.assertAlmostEqual(expected_precision, update_op.eval()) + self.assertAlmostEqual(expected_precision, precision.eval()) + + def testWeighted1d_placeholders(self): + predictions = tf.placeholder(dtype=tf.float32) + labels = tf.placeholder(dtype=tf.float32) + feed_dict = { + predictions: ((1, 0, 1, 0), (1, 0, 1, 0)), + labels: ((0, 1, 1, 0), (1, 0, 0, 1)) + } + precision, update_op = metrics.precision( + labels, predictions, weights=tf.constant([[2], [5]])) + + with self.test_session(): + tf.local_variables_initializer().run() + weighted_tp = 2.0 + 5.0 + weighted_positives = (2.0 + 2.0) + (5.0 + 5.0) + expected_precision = weighted_tp / weighted_positives + self.assertAlmostEqual( + expected_precision, update_op.eval(feed_dict=feed_dict)) + self.assertAlmostEqual( + expected_precision, precision.eval(feed_dict=feed_dict)) + + def testWeighted2d(self): + predictions = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) + labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + precision, update_op = metrics.precision( + labels, predictions, weights=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) + + with self.test_session(): + tf.local_variables_initializer().run() + weighted_tp = 3.0 + 4.0 + weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) + expected_precision = weighted_tp / weighted_positives + self.assertAlmostEqual(expected_precision, update_op.eval()) + self.assertAlmostEqual(expected_precision, precision.eval()) + + def testWeighted2d_placeholders(self): + predictions = tf.placeholder(dtype=tf.float32) + labels = tf.placeholder(dtype=tf.float32) + feed_dict = { + predictions: ((1, 0, 1, 0), (1, 0, 1, 0)), + labels: ((0, 1, 1, 0), (1, 0, 0, 1)) + } + precision, update_op = metrics.precision( + labels, predictions, weights=tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]])) + + with self.test_session(): + tf.local_variables_initializer().run() + weighted_tp = 3.0 + 4.0 + weighted_positives = (1.0 + 3.0) + (4.0 + 2.0) + expected_precision = weighted_tp / weighted_positives + self.assertAlmostEqual( + expected_precision, update_op.eval(feed_dict=feed_dict)) + self.assertAlmostEqual( + expected_precision, precision.eval(feed_dict=feed_dict)) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(inputs) + labels = tf.constant(1 - inputs) + precision, update_op = metrics.precision( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(0, precision.eval()) + + def testZeroTrueAndFalsePositivesGivesZeroPrecision(self): + predictions = tf.constant([0, 0, 0, 0]) + labels = tf.constant([0, 0, 0, 0]) + precision, update_op = metrics.precision( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0.0, precision.eval()) + + +class StreamingRecallTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.recall( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'recall/false_negatives/count:0', + 'recall/true_positives/count:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.recall( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.recall( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + labels = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + recall, update_op = metrics.recall( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_recall = recall.eval() + for _ in range(10): + self.assertEqual(initial_recall, recall.eval()) + + def testAllCorrect(self): + np_inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(np_inputs) + labels = tf.constant(np_inputs) + recall, update_op = metrics.recall(labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(1, recall.eval()) + + def testSomeCorrect(self): + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4)) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + recall, update_op = metrics.recall(labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.5, update_op.eval()) + self.assertAlmostEqual(0.5, recall.eval()) + + def testWeighted1d(self): + predictions = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) + labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + weights = tf.constant([[2], [5]]) + recall, update_op = metrics.recall( + labels, predictions, weights=weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + weighted_tp = 2.0 + 5.0 + weighted_t = (2.0 + 2.0) + (5.0 + 5.0) + expected_precision = weighted_tp / weighted_t + self.assertAlmostEqual(expected_precision, update_op.eval()) + self.assertAlmostEqual(expected_precision, recall.eval()) + + def testWeighted2d(self): + predictions = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) + labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) + weights = tf.constant([[1, 2, 3, 4], [4, 3, 2, 1]]) + recall, update_op = metrics.recall( + labels, predictions, weights=weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + weighted_tp = 3.0 + 1.0 + weighted_t = (2.0 + 3.0) + (4.0 + 1.0) + expected_precision = weighted_tp / weighted_t + self.assertAlmostEqual(expected_precision, update_op.eval()) + self.assertAlmostEqual(expected_precision, recall.eval()) + + def testAllIncorrect(self): + np_inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(np_inputs) + labels = tf.constant(1 - np_inputs) + recall, update_op = metrics.recall(labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, recall.eval()) + + def testZeroTruePositivesAndFalseNegativesGivesZeroRecall(self): + predictions = tf.zeros((1, 4)) + labels = tf.zeros((1, 4)) + recall, update_op = metrics.recall(labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertEqual(0, recall.eval()) + + +class StreamingAUCTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.auc( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'auc/true_positives:0', + 'auc/false_negatives:0', + 'auc/false_positives:0', + 'auc/true_negatives:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.auc( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.auc( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + labels = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + auc, update_op = metrics.auc( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_auc = auc.eval() + for _ in range(10): + self.assertAlmostEqual(initial_auc, auc.eval(), 5) + + def testAllCorrect(self): + self.allCorrectAsExpected('ROC') + + def allCorrectAsExpected(self, curve): + inputs = np.random.randint(0, 2, size=(100, 1)) + + with self.test_session() as sess: + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(inputs) + auc, update_op = metrics.auc(labels, predictions, curve=curve) + + sess.run(tf.local_variables_initializer()) + self.assertEqual(1, sess.run(update_op)) + + self.assertEqual(1, auc.eval()) + + def testSomeCorrect(self): + with self.test_session() as sess: + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + auc, update_op = metrics.auc(labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.5, sess.run(update_op)) + + self.assertAlmostEqual(0.5, auc.eval()) + + def testWeighted1d(self): + with self.test_session() as sess: + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + weights = tf.constant([2], shape=(1, 1)) + auc, update_op = metrics.auc(labels, + predictions, weights=weights) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.5, sess.run(update_op), 5) + + self.assertAlmostEqual(0.5, auc.eval(), 5) + + def testWeighted2d(self): + with self.test_session() as sess: + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + weights = tf.constant([1, 2, 3, 4], shape=(1, 4)) + auc, update_op = metrics.auc(labels, + predictions, weights=weights) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.7, sess.run(update_op), 5) + + self.assertAlmostEqual(0.7, auc.eval(), 5) + + def testAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = tf.constant([0.1, 0.4, 0.35, 0.8], + shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 0, 1, 1], shape=(1, 4)) + auc, update_op = metrics.auc(labels, predictions, curve='PR') + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) + + self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) + + def testAnotherAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = tf.constant([0.1, 0.4, 0.35, 0.8, 0.1, 0.135, 0.81], + shape=(1, 7), dtype=tf.float32) + labels = tf.constant([0, 0, 1, 0, 1, 0, 1], shape=(1, 7)) + auc, update_op = metrics.auc(labels, predictions, curve='PR') + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) + + self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) + + def testThirdAUCPRSpecialCase(self): + with self.test_session() as sess: + predictions = tf.constant([0.0, 0.1, 0.2, 0.33, 0.3, 0.4, 0.5], + shape=(1, 7), dtype=tf.float32) + labels = tf.constant([0, 0, 0, 0, 1, 1, 1], shape=(1, 7)) + auc, update_op = metrics.auc(labels, predictions, curve='PR') + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) + + self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + with self.test_session() as sess: + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(1 - inputs, dtype=tf.float32) + auc, update_op = metrics.auc(labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0, sess.run(update_op)) + + self.assertAlmostEqual(0, auc.eval()) + + def testZeroTruePositivesAndFalseNegativesGivesOneAUC(self): + with self.test_session() as sess: + predictions = tf.zeros([4], dtype=tf.float32) + labels = tf.zeros([4]) + auc, update_op = metrics.auc(labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1, sess.run(update_op), 6) + + self.assertAlmostEqual(1, auc.eval(), 6) + + def testRecallOneAndPrecisionOneGivesOnePRAUC(self): + with self.test_session() as sess: + predictions = tf.ones([4], dtype=tf.float32) + labels = tf.ones([4]) + auc, update_op = metrics.auc(labels, + predictions, + curve='PR') + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1, sess.run(update_op), 6) + + self.assertAlmostEqual(1, auc.eval(), 6) + + def np_auc(self, predictions, labels, weights): + """Computes the AUC explicitely using Numpy. + + Args: + predictions: an ndarray with shape [N]. + labels: an ndarray with shape [N]. + weights: an ndarray with shape [N]. + + Returns: + the area under the ROC curve. + """ + if weights is None: + weights = np.ones(np.size(predictions)) + is_positive = labels > 0 + num_positives = np.sum(weights[is_positive]) + num_negatives = np.sum(weights[~is_positive]) + + # Sort descending: + inds = np.argsort(-predictions) + + sorted_labels = labels[inds] + sorted_weights = weights[inds] + is_positive = sorted_labels > 0 + + tp = np.cumsum(sorted_weights * is_positive) / num_positives + return np.sum((sorted_weights * tp)[~is_positive]) / num_negatives + + def testWithMultipleUpdates(self): + num_samples = 1000 + batch_size = 10 + num_batches = int(num_samples / batch_size) + + # Create the labels and data. + labels = np.random.randint(0, 2, size=num_samples) + noise = np.random.normal(0.0, scale=0.2, size=num_samples) + predictions = 0.4 + 0.2 * labels + noise + predictions[predictions > 1] = 1 + predictions[predictions < 0] = 0 + + def _enqueue_as_batches(x, enqueue_ops): + x_batches = x.astype(np.float32).reshape((num_batches, batch_size)) + x_queue = tf.FIFOQueue(num_batches, dtypes=tf.float32, + shapes=(batch_size,)) + for i in range(num_batches): + enqueue_ops[i].append(x_queue.enqueue(x_batches[i, :])) + return x_queue.dequeue() + + for weights in (None, + np.ones(num_samples), + np.random.exponential(scale=1.0, size=num_samples)): + expected_auc = self.np_auc(predictions, labels, weights) + + with self.test_session() as sess: + enqueue_ops = [[] for i in range(num_batches)] + tf_predictions = _enqueue_as_batches(predictions, enqueue_ops) + tf_labels = _enqueue_as_batches(labels, enqueue_ops) + tf_weights = (_enqueue_as_batches(weights, enqueue_ops) + if weights is not None else None) + + for i in range(num_batches): + sess.run(enqueue_ops[i]) + + auc, update_op = metrics.auc( + tf_labels, tf_predictions, curve='ROC', num_thresholds=500, + weights=tf_weights) + + sess.run(tf.local_variables_initializer()) + for i in range(num_batches): + sess.run(update_op) + + # Since this is only approximate, we can't expect a 6 digits match. + # Although with higher number of samples/thresholds we should see the + # accuracy improving + self.assertAlmostEqual(expected_auc, auc.eval(), 2) + + +class SpecificityAtSensitivityTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.specificity_at_sensitivity( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1)), sensitivity=0.7) + _assert_local_variables(self, ( + 'specificity_at_sensitivity/true_positives:0', + 'specificity_at_sensitivity/false_negatives:0', + 'specificity_at_sensitivity/false_positives:0', + 'specificity_at_sensitivity/true_negatives:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.specificity_at_sensitivity( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + sensitivity=0.7, + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.specificity_at_sensitivity( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + sensitivity=0.7, + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + labels = tf.random_uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, sensitivity=0.7) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_specificity = specificity.eval() + for _ in range(10): + self.assertAlmostEqual(initial_specificity, specificity.eval(), 5) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(inputs) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, sensitivity=0.7) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(1, sess.run(update_op)) + self.assertEqual(1, specificity.eval()) + + def testSomeCorrectHighSensitivity(self): + predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, + 0.1, 0.45, 0.5, 0.8, 0.9] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, sensitivity=0.8) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1.0, sess.run(update_op)) + self.assertAlmostEqual(1.0, specificity.eval()) + + def testSomeCorrectLowSensitivity(self): + predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, + 0.1, 0.2, 0.2, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, sensitivity=0.4) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + self.assertAlmostEqual(0.6, sess.run(update_op)) + self.assertAlmostEqual(0.6, specificity.eval()) + + def testWeighted1d(self): + predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, + 0.1, 0.2, 0.2, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [3] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + weights = tf.constant(weights_values) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, weights=weights, sensitivity=0.4) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + self.assertAlmostEqual(0.6, sess.run(update_op)) + self.assertAlmostEqual(0.6, specificity.eval()) + + def testWeighted2d(self): + predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, + 0.1, 0.2, 0.2, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + weights = tf.constant(weights_values) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, weights=weights, sensitivity=0.4) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + self.assertAlmostEqual(8.0 / 15.0, sess.run(update_op)) + self.assertAlmostEqual(8.0 / 15.0, specificity.eval()) + + +class StreamingSensitivityAtSpecificityTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.sensitivity_at_specificity( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1)), specificity=0.7) + _assert_local_variables(self, ( + 'sensitivity_at_specificity/true_positives:0', + 'sensitivity_at_specificity/false_negatives:0', + 'sensitivity_at_specificity/false_positives:0', + 'sensitivity_at_specificity/true_negatives:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.sensitivity_at_specificity( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + specificity=0.7, + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.sensitivity_at_specificity( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + specificity=0.7, + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + labels = tf.random_uniform((10, 3), maxval=2, dtype=tf.int64, seed=1) + sensitivity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, specificity=0.7) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_sensitivity = sensitivity.eval() + for _ in range(10): + self.assertAlmostEqual(initial_sensitivity, sensitivity.eval(), 5) + + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(inputs) + specificity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, specificity=0.7) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(1, sess.run(update_op)) + self.assertEqual(1, specificity.eval()) + + def testSomeCorrectHighSpecificity(self): + predictions_values = [0.0, 0.1, 0.2, 0.3, 0.4, + 0.1, 0.45, 0.5, 0.8, 0.9] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + specificity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, specificity=0.8) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.8, sess.run(update_op)) + self.assertAlmostEqual(0.8, specificity.eval()) + + def testSomeCorrectLowSpecificity(self): + predictions_values = [0.0, 0.1, 0.2, 0.3, 0.4, + 0.01, 0.02, 0.25, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + specificity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, specificity=0.4) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.6, sess.run(update_op)) + self.assertAlmostEqual(0.6, specificity.eval()) + + def testWeighted(self): + predictions_values = [0.0, 0.1, 0.2, 0.3, 0.4, + 0.01, 0.02, 0.25, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + + predictions = tf.constant(predictions_values, dtype=tf.float32) + labels = tf.constant(labels_values) + weights = tf.constant(weights_values) + specificity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, weights=weights, specificity=0.4) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(0.675, sess.run(update_op)) + self.assertAlmostEqual(0.675, specificity.eval()) + + +# TODO(nsilberman): Break this up into two sets of tests. +class StreamingPrecisionRecallThresholdsTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.precision_at_thresholds( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1)), + thresholds=[0, 0.5, 1.0]) + _assert_local_variables(self, ( + 'precision_at_thresholds/true_positives:0', + 'precision_at_thresholds/false_positives:0', + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + prec, _ = metrics.precision_at_thresholds( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + thresholds=[0, 0.5, 1.0], + metrics_collections=[my_collection_name]) + rec, _ = metrics.recall_at_thresholds( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + thresholds=[0, 0.5, 1.0], + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [prec, rec]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, precision_op = metrics.precision_at_thresholds( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + thresholds=[0, 0.5, 1.0], + updates_collections=[my_collection_name]) + _, recall_op = metrics.recall_at_thresholds( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + thresholds=[0, 0.5, 1.0], + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), + [precision_op, recall_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_uniform((10, 3), maxval=1, dtype=tf.float32, seed=1) + labels = tf.random_uniform((10, 3), maxval=1, dtype=tf.int64, seed=1) + thresholds = [0, 0.5, 1.0] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates, then verify idempotency. + sess.run([prec_op, rec_op]) + initial_prec = prec.eval() + initial_rec = rec.eval() + for _ in range(10): + sess.run([prec_op, rec_op]) + self.assertAllClose(initial_prec, prec.eval()) + self.assertAllClose(initial_rec, rec.eval()) + + # TODO(nsilberman): fix tests (passing but incorrect). + def testAllCorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + with self.test_session() as sess: + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(inputs) + thresholds = [0.5] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertEqual(1, prec.eval()) + self.assertEqual(1, rec.eval()) + + def testSomeCorrect(self): + with self.test_session() as sess: + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 1, 1, 0], shape=(1, 4)) + thresholds = [0.5] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(0.5, prec.eval()) + self.assertAlmostEqual(0.5, rec.eval()) + + def testAllIncorrect(self): + inputs = np.random.randint(0, 2, size=(100, 1)) + + with self.test_session() as sess: + predictions = tf.constant(inputs, dtype=tf.float32) + labels = tf.constant(1 - inputs, dtype=tf.float32) + thresholds = [0.5] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(0, prec.eval()) + self.assertAlmostEqual(0, rec.eval()) + + def testWeights1d(self): + with self.test_session() as sess: + predictions = tf.constant([[1, 0], [1, 0]], shape=(2, 2), + dtype=tf.float32) + labels = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + weights = tf.constant([[0], [1]], shape=(2, 1), dtype=tf.float32) + thresholds = [0.5, 1.1] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds, weights=weights) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds, weights=weights) + + [prec_low, prec_high] = tf.split(0, 2, prec) + prec_low = tf.reshape(prec_low, shape=()) + prec_high = tf.reshape(prec_high, shape=()) + [rec_low, rec_high] = tf.split(0, 2, rec) + rec_low = tf.reshape(rec_low, shape=()) + rec_high = tf.reshape(rec_high, shape=()) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(1.0, prec_low.eval(), places=5) + self.assertAlmostEqual(0.0, prec_high.eval(), places=5) + self.assertAlmostEqual(1.0, rec_low.eval(), places=5) + self.assertAlmostEqual(0.0, rec_high.eval(), places=5) + + def testWeights2d(self): + with self.test_session() as sess: + predictions = tf.constant([[1, 0], [1, 0]], shape=(2, 2), + dtype=tf.float32) + labels = tf.constant([[0, 1], [1, 0]], shape=(2, 2)) + weights = tf.constant([[0, 0], [1, 1]], shape=(2, 2), dtype=tf.float32) + thresholds = [0.5, 1.1] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds, weights=weights) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds, weights=weights) + + [prec_low, prec_high] = tf.split(0, 2, prec) + prec_low = tf.reshape(prec_low, shape=()) + prec_high = tf.reshape(prec_high, shape=()) + [rec_low, rec_high] = tf.split(0, 2, rec) + rec_low = tf.reshape(rec_low, shape=()) + rec_high = tf.reshape(rec_high, shape=()) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(1.0, prec_low.eval(), places=5) + self.assertAlmostEqual(0.0, prec_high.eval(), places=5) + self.assertAlmostEqual(1.0, rec_low.eval(), places=5) + self.assertAlmostEqual(0.0, rec_high.eval(), places=5) + + def testExtremeThresholds(self): + with self.test_session() as sess: + predictions = tf.constant([1, 0, 1, 0], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([0, 1, 1, 1], shape=(1, 4)) + thresholds = [-1.0, 2.0] # lower/higher than any values + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + [prec_low, prec_high] = tf.split(0, 2, prec) + [rec_low, rec_high] = tf.split(0, 2, rec) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(0.75, prec_low.eval()) + self.assertAlmostEqual(0.0, prec_high.eval()) + self.assertAlmostEqual(1.0, rec_low.eval()) + self.assertAlmostEqual(0.0, rec_high.eval()) + + def testZeroLabelsPredictions(self): + with self.test_session() as sess: + predictions = tf.zeros([4], dtype=tf.float32) + labels = tf.zeros([4]) + thresholds = [0.5] + prec, prec_op = metrics.precision_at_thresholds( + labels, predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + labels, predictions, thresholds) + + sess.run(tf.local_variables_initializer()) + sess.run([prec_op, rec_op]) + + self.assertAlmostEqual(0, prec.eval(), 6) + self.assertAlmostEqual(0, rec.eval(), 6) + + def testWithMultipleUpdates(self): + num_samples = 1000 + batch_size = 10 + num_batches = int(num_samples / batch_size) + + # Create the labels and data. + labels = np.random.randint(0, 2, size=(num_samples, 1)) + noise = np.random.normal(0.0, scale=0.2, size=(num_samples, 1)) + predictions = 0.4 + 0.2 * labels + noise + predictions[predictions > 1] = 1 + predictions[predictions < 0] = 0 + thresholds = [0.3] + + tp = 0 + fp = 0 + fn = 0 + tn = 0 + for i in range(num_samples): + if predictions[i] > thresholds[0]: + if labels[i] == 1: + tp += 1 + else: + fp += 1 + else: + if labels[i] == 1: + fn += 1 + else: + tn += 1 + epsilon = 1e-7 + expected_prec = tp / (epsilon + tp + fp) + expected_rec = tp / (epsilon + tp + fn) + + labels = labels.astype(np.float32) + predictions = predictions.astype(np.float32) + + with self.test_session() as sess: + # Reshape the data so its easy to queue up: + predictions_batches = predictions.reshape((batch_size, num_batches)) + labels_batches = labels.reshape((batch_size, num_batches)) + + # Enqueue the data: + predictions_queue = tf.FIFOQueue(num_batches, dtypes=tf.float32, + shapes=(batch_size,)) + labels_queue = tf.FIFOQueue(num_batches, dtypes=tf.float32, + shapes=(batch_size,)) + + for i in range(int(num_batches)): + tf_prediction = tf.constant(predictions_batches[:, i]) + tf_label = tf.constant(labels_batches[:, i]) + sess.run([predictions_queue.enqueue(tf_prediction), + labels_queue.enqueue(tf_label)]) + + tf_predictions = predictions_queue.dequeue() + tf_labels = labels_queue.dequeue() + + prec, prec_op = metrics.precision_at_thresholds( + tf_labels, tf_predictions, thresholds) + rec, rec_op = metrics.recall_at_thresholds( + tf_labels, tf_predictions, thresholds) + + sess.run(tf.local_variables_initializer()) + for _ in range(int(num_samples / batch_size)): + sess.run([prec_op, rec_op]) + # Since this is only approximate, we can't expect a 6 digits match. + # Although with higher number of samples/thresholds we should see the + # accuracy improving + self.assertAlmostEqual(expected_prec, prec.eval(), 2) + self.assertAlmostEqual(expected_rec, rec.eval(), 2) + + +class StreamingSparsePrecisionTest(tf.test.TestCase): + + def _test_streaming_sparse_precision_at_k(self, + predictions, + labels, + k, + expected, + class_id=None, + weights=None): + with tf.Graph().as_default() as g, self.test_session(g): + if weights is not None: + weights = tf.constant(weights, tf.float32) + metric, update = metrics.sparse_precision_at_k( + predictions=tf.constant(predictions, tf.float32), labels=labels, + k=k, class_id=class_id, weights=weights) + + # Fails without initialized vars. + self.assertRaises(tf.OpError, metric.eval) + self.assertRaises(tf.OpError, update.eval) + tf.initialize_variables(tf.local_variables()).run() + + # Run per-step op and assert expected values. + if math.isnan(expected): + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) + else: + self.assertEqual(expected, update.eval()) + self.assertEqual(expected, metric.eval()) + + def _test_streaming_sparse_average_precision_at_k( + self, predictions, labels, k, expected, weights=None): + with tf.Graph().as_default() as g, self.test_session(g): + if weights is not None: + weights = tf.constant(weights, tf.float32) + predictions = tf.constant(predictions, tf.float32) + metric, update = metrics.sparse_average_precision_at_k( + labels, predictions, k, weights=weights) + + # Fails without initialized vars. + self.assertRaises(tf.OpError, metric.eval) + self.assertRaises(tf.OpError, update.eval) + local_variables = tf.local_variables() + tf.initialize_variables(local_variables).run() + + # Run per-step op and assert expected values. + if math.isnan(expected): + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) + else: + self.assertAlmostEqual(expected, update.eval()) + self.assertAlmostEqual(expected, metric.eval()) + + def test_average_precision(self): + # Example 1. + # Matches example here: + # fastml.com/what-you-wanted-to-know-about-mean-average-precision + labels_ex1 = (0, 1, 2, 3, 4) + labels = np.array([labels_ex1], dtype=np.int64) + predictions_ex1 = (0.2, 0.1, 0.0, 0.4, 0.0, 0.5, 0.3) + predictions = (predictions_ex1,) + precision_ex1 = ( + 0.0 / 1, + 1.0 / 2, + 1.0 / 3, + 2.0 / 4 + ) + avg_precision_ex1 = ( + 0.0 / 1, + precision_ex1[1] / 2, + precision_ex1[1] / 3, + (precision_ex1[1] + precision_ex1[3]) / 4 + ) + for i in xrange(4): + k = i + 1 + self._test_streaming_sparse_precision_at_k( + predictions, labels, k, expected=precision_ex1[i]) + self._test_streaming_sparse_average_precision_at_k( + predictions, labels, k, expected=avg_precision_ex1[i]) + + # Example 2. + labels_ex2 = (0, 2, 4, 5, 6) + labels = np.array([labels_ex2], dtype=np.int64) + predictions_ex2 = (0.3, 0.5, 0.0, 0.4, 0.0, 0.1, 0.2) + predictions = (predictions_ex2,) + precision_ex2 = ( + 0.0 / 1, + 0.0 / 2, + 1.0 / 3, + 2.0 / 4 + ) + avg_precision_ex2 = ( + 0.0 / 1, + 0.0 / 2, + precision_ex2[2] / 3, + (precision_ex2[2] + precision_ex2[3]) / 4 + ) + for i in xrange(4): + k = i + 1 + self._test_streaming_sparse_precision_at_k( + predictions, labels, k, expected=precision_ex2[i]) + self._test_streaming_sparse_average_precision_at_k( + predictions, labels, k, expected=avg_precision_ex2[i]) + + # Both examples, we expect both precision and average precision to be the + # average of the 2 examples. + labels = np.array([labels_ex1, labels_ex2], dtype=np.int64) + predictions = (predictions_ex1, predictions_ex2) + streaming_precision = [ + (ex1 + ex2) / 2 + for ex1, ex2 in zip(precision_ex1, precision_ex2)] + streaming_average_precision = [ + (ex1 + ex2) / 2 + for ex1, ex2 in zip(avg_precision_ex1, avg_precision_ex2)] + for i in xrange(4): + k = i + 1 + self._test_streaming_sparse_precision_at_k( + predictions, labels, k, expected=streaming_precision[i]) + self._test_streaming_sparse_average_precision_at_k( + predictions, labels, k, expected=streaming_average_precision[i]) + + # Weighted examples, we expect streaming average precision to be the + # weighted average of the 2 examples. + weights = (0.3, 0.6) + streaming_average_precision = [ + (weights[0] * ex1 + weights[1] * ex2) / (weights[0] + weights[1]) + for ex1, ex2 in zip(avg_precision_ex1, avg_precision_ex2)] + for i in xrange(4): + k = i + 1 + self._test_streaming_sparse_average_precision_at_k( + predictions, labels, k, expected=streaming_average_precision[i], + weights=weights) + + def test_average_precision_some_labels_out_of_range(self): + """Tests that labels outside the [0, n_classes) range are ignored.""" + labels_ex1 = (-1, 0, 1, 2, 3, 4, 7) + labels = np.array([labels_ex1], dtype=np.int64) + predictions_ex1 = (0.2, 0.1, 0.0, 0.4, 0.0, 0.5, 0.3) + predictions = (predictions_ex1,) + precision_ex1 = ( + 0.0 / 1, + 1.0 / 2, + 1.0 / 3, + 2.0 / 4 + ) + avg_precision_ex1 = ( + 0.0 / 1, + precision_ex1[1] / 2, + precision_ex1[1] / 3, + (precision_ex1[1] + precision_ex1[3]) / 4 + ) + for i in xrange(4): + k = i + 1 + self._test_streaming_sparse_precision_at_k( + predictions, labels, k, expected=precision_ex1[i]) + self._test_streaming_sparse_average_precision_at_k( + predictions, labels, k, expected=avg_precision_ex1[i]) + + def test_one_label_at_k1_nan(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 0,1,2 have 0 predictions, classes -1 and 4 are out of range. + for class_id in (-1, 0, 1, 2, 4): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=NAN, class_id=class_id) + + def test_one_label_at_k1(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=1.0 / 2, class_id=3) + + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=1.0 / 2) + + def test_three_labels_at_k5_no_predictions(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 1,3,8 have 0 predictions, classes -1 and 10 are out of range. + for class_id in (-1, 1, 3, 8, 10): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) + + def test_three_labels_at_k5_no_labels(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 0,4,6,9: 0 labels, >=1 prediction. + for class_id in (0, 4, 6, 9): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0, class_id=class_id) + + def test_three_labels_at_k5(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 2: 2 labels, 2 correct predictions. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=2.0 / 2, + class_id=2) + + # Class 5: 1 label, 1 correct prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=1.0 / 1, class_id=5) + + # Class 7: 1 label, 1 incorrect prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=7) + + # All classes: 10 predictions, 3 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=3.0 / 10) + + def test_three_labels_at_k5_some_out_of_range(self): + """Tests that labels outside the [0, n_classes) range are ignored.""" + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ] + sp_labels = tf.SparseTensorValue( + indices=[[0, 0], [0, 1], [0, 2], [0, 3], + [1, 0], [1, 1], [1, 2], [1, 3]], + # values -1 and 10 are outside the [0, n_classes) range and are ignored. + values=np.array([2, 7, -1, 8, + 1, 2, 5, 10], np.int64), + shape=[2, 4]) + + # Class 2: 2 labels, 2 correct predictions. + self._test_streaming_sparse_precision_at_k( + predictions, sp_labels, k=5, expected=2.0 / 2, class_id=2) + + # Class 5: 1 label, 1 correct prediction. + self._test_streaming_sparse_precision_at_k( + predictions, sp_labels, k=5, expected=1.0 / 1, class_id=5) + + # Class 7: 1 label, 1 incorrect prediction. + self._test_streaming_sparse_precision_at_k( + predictions, sp_labels, k=5, expected=0.0 / 1, class_id=7) + + # All classes: 10 predictions, 3 correct. + self._test_streaming_sparse_precision_at_k( + predictions, sp_labels, k=5, expected=3.0 / 10) + + def test_3d_nan(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Classes 1,3,8 have 0 predictions, classes -1 and 10 are out of range. + for class_id in (-1, 1, 3, 8, 10): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) + + def test_3d_no_labels(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Classes 0,4,6,9: 0 labels, >=1 prediction. + for class_id in (0, 4, 6, 9): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0, class_id=class_id) + + def test_3d(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Class 2: 4 predictions, all correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=4.0 / 4, class_id=2) + + # Class 5: 2 predictions, both correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=2.0 / 2, class_id=5) + + # Class 7: 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=1.0 / 2, class_id=7) + + # All classes: 20 predictions, 7 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=7.0 / 20) + + def test_3d_ignore_some(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Class 2: 2 predictions, both correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, + weights=[[1], [0]]) + + # Class 2: 2 predictions, both correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, + weights=[[0], [1]]) + + # Class 7: 1 incorrect prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, + weights=[[1], [0]]) + + # Class 7: 1 correct prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, + weights=[[0], [1]]) + + # Class 7: no predictions. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=NAN, class_id=7, + weights=[[1, 0], [0, 1]]) + + # Class 7: 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=1.0 / 2.0, class_id=7, + weights=[[0, 1], [1, 0]]) + + def test_sparse_tensor_value(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + labels = [[0, 0, 0, 1], [0, 0, 1, 0]] + expected_precision = 0.5 + with self.test_session(): + _, precision = metrics.sparse_precision_at_k( + predictions=tf.constant(predictions, tf.float32), + labels=_binary_2d_label_to_sparse_value(labels), k=1) + + tf.initialize_variables(tf.local_variables()).run() + + self.assertEqual(expected_precision, precision.eval()) + + +class RecallAtkTest(tf.test.TestCase): + + def _test_streaming_sparse_recall_at_k(self, + predictions, + labels, + k, + expected, + class_id=None, + weights=None): + with tf.Graph().as_default() as g, self.test_session(g): + if weights is not None: + weights = tf.constant(weights, tf.float32) + metric, update = metrics.recall_at_k( + predictions=tf.constant(predictions, tf.float32), + labels=labels, k=k, class_id=class_id, weights=weights) + + # Fails without initialized vars. + self.assertRaises(tf.OpError, metric.eval) + self.assertRaises(tf.OpError, update.eval) + tf.initialize_variables(tf.local_variables()).run() + + # Run per-step op and assert expected values. + if math.isnan(expected): + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) + else: + self.assertEqual(expected, update.eval()) + self.assertEqual(expected, metric.eval()) + + def test_one_label_at_k1_nan(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + # Classes 0,1 have 0 labels, 0 predictions, classes -1 and 4 are out of + # range. + for labels in (sparse_labels, dense_labels): + for class_id in (-1, 0, 1, 4): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, + class_id=class_id) + + def test_one_label_at_k1_no_predictions(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 2: 0 predictions. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.0, + class_id=2) + + def test_one_label_at_k1(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, + class_id=3) + + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2) + + def test_one_label_at_k1_weighted(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, weights=(0.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(2.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, + weights=(0.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, + weights=(0.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=2.0 / 2, class_id=3, + weights=(2.0, 3.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=3.0 / 3, class_id=3, + weights=(3.0, 2.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.3 / 0.3, class_id=3, + weights=(0.3, 0.6)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.6 / 0.6, class_id=3, + weights=(0.6, 0.3)) + + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, weights=(0.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(1.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(2.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, weights=(1.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.0 / 1, weights=(0.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(1.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=2.0 / 5, weights=(2.0, 3.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=3.0 / 5, weights=(3.0, 2.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.3 / 0.9, weights=(0.3, 0.6)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.6 / 0.9, weights=(0.6, 0.3)) + + def test_three_labels_at_k5_nan(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. + for class_id in (0, 3, 4, 6, 9, 10): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) + + def test_three_labels_at_k5_no_predictions(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 8: 1 label, no predictions. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=8) + + def test_three_labels_at_k5(self): + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] + sparse_labels = _binary_2d_label_to_sparse_value([ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Class 2: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=2.0 / 2, class_id=2) + + # Class 5: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=1.0 / 1, class_id=5) + + # Class 7: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=7) + + # All classes: 6 labels, 3 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=3.0 / 6) + + def test_three_labels_at_k5_some_out_of_range(self): + """Tests that labels outside the [0, n_classes) count in denominator.""" + predictions = [ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] + sp_labels = tf.SparseTensorValue( + indices=[[0, 0], [0, 1], [0, 2], [0, 3], + [1, 0], [1, 1], [1, 2], [1, 3]], + # values -1 and 10 are outside the [0, n_classes) range. + values=np.array([2, 7, -1, 8, + 1, 2, 5, 10], np.int64), + shape=[2, 4]) + + # Class 2: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions=predictions, labels=sp_labels, k=5, expected=2.0 / 2, + class_id=2) + + # Class 5: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions=predictions, labels=sp_labels, k=5, expected=1.0 / 1, + class_id=5) + + # Class 7: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1, + class_id=7) + + # All classes: 8 labels, 3 correct. + self._test_streaming_sparse_recall_at_k( + predictions=predictions, labels=sp_labels, k=5, expected=3.0 / 8) + + def test_3d_nan(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + sparse_labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0] + ]]) + dense_labels = np.array([[ + [2, 7, 8], + [1, 2, 5] + ], [ + [1, 2, 5], + [2, 7, 8], + ]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. + for class_id in (0, 3, 4, 6, 9, 10): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) + + def test_3d_no_predictions(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + sparse_labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0] + ]]) + dense_labels = np.array([[ + [2, 7, 8], + [1, 2, 5] + ], [ + [1, 2, 5], + [2, 7, 8], + ]], dtype=np.int64) + + for labels in (sparse_labels, dense_labels): + # Classes 1,8 have 0 predictions, >=1 label. + for class_id in (1, 8): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0, class_id=class_id) + + def test_3d(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Class 2: 4 labels, all correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=4.0 / 4, class_id=2) + + # Class 5: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=2.0 / 2, class_id=5) + + # Class 7: 2 labels, 1 incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=1.0 / 2, class_id=7) + + # All classes: 12 labels, 7 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=7.0 / 12) + + def test_3d_ignore_all(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + for class_id in xrange(10): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id, + weights=[[0], [0]]) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id, + weights=[[0, 0], [0, 0]]) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, weights=[[0], [0]]) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) + + def test_3d_ignore_some(self): + predictions = [[ + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] + ], [ + [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], + [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] + ]] + labels = _binary_3d_label_to_sparse_value([[ + [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] + ], [ + [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] + ]]) + + # Class 2: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, + weights=[[1], [0]]) + + # Class 2: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, + weights=[[0], [1]]) + + # Class 7: 1 label, correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, + weights=[[0], [1]]) + + # Class 7: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, + weights=[[1], [0]]) + + # Class 7: 2 labels, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=1.0 / 2.0, class_id=7, + weights=[[1, 0], [1, 0]]) + + # Class 7: No labels. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=7, + weights=[[0, 1], [0, 1]]) + + def test_sparse_tensor_value(self): + predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] + labels = [[0, 0, 1, 0], [0, 0, 0, 1]] + expected_recall = 0.5 + with self.test_session(): + _, recall = metrics.recall_at_k( + predictions=tf.constant(predictions, tf.float32), + labels=_binary_2d_label_to_sparse_value(labels), k=1) + + tf.initialize_variables(tf.local_variables()).run() + + self.assertEqual(expected_recall, recall.eval()) + + +class MeanAbsoluteErrorTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean_absolute_error( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'mean_absolute_error/count:0', + 'mean_absolute_error/total:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean_absolute_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_absolute_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_normal((10, 3), seed=1) + labels = tf.random_normal((10, 3), seed=2) + error, update_op = metrics.mean_absolute_error( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_error = error.eval() + for _ in range(10): + self.assertEqual(initial_error, error.eval()) + + def testSingleUpdateWithErrorAndWeights(self): + predictions = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([1, 3, 2, 3], shape=(1, 4), dtype=tf.float32) + weights = tf.constant([0, 1, 0, 1], shape=(1, 4)) + + error, update_op = metrics.mean_absolute_error( + labels, predictions, weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(3, sess.run(update_op)) + self.assertEqual(3, error.eval()) + + +class MeanRelativeErrorTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean_relative_error( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1)), + normalizer=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'mean_relative_error/count:0', + 'mean_relative_error/total:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean_relative_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + normalizer=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual( + tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_relative_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + normalizer=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_normal((10, 3), seed=1) + labels = tf.random_normal((10, 3), seed=2) + normalizer = tf.random_normal((10, 3), seed=3) + error, update_op = metrics.mean_relative_error( + labels, predictions, normalizer) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_error = error.eval() + for _ in range(10): + self.assertEqual(initial_error, error.eval()) + + def testSingleUpdateNormalizedByLabels(self): + np_predictions = np.asarray([2, 4, 6, 8], dtype=np.float32) + np_labels = np.asarray([1, 3, 2, 3], dtype=np.float32) + expected_error = np.mean( + np.divide(np.absolute(np_predictions - np_labels), + np_labels)) + + predictions = tf.constant(np_predictions, shape=(1, 4), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(1, 4)) + + error, update_op = metrics.mean_relative_error( + labels, predictions, normalizer=labels) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(expected_error, sess.run(update_op)) + self.assertEqual(expected_error, error.eval()) + + def testSingleUpdateNormalizedByZeros(self): + np_predictions = np.asarray([2, 4, 6, 8], dtype=np.float32) + + predictions = tf.constant(np_predictions, shape=(1, 4), dtype=tf.float32) + labels = tf.constant([1, 3, 2, 3], shape=(1, 4), dtype=tf.float32) + + error, update_op = metrics.mean_relative_error( + labels, predictions, normalizer=tf.zeros_like(labels)) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(0.0, sess.run(update_op)) + self.assertEqual(0.0, error.eval()) + + +class MeanSquaredErrorTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean_squared_error( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'mean_squared_error/count:0', + 'mean_squared_error/total:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean_squared_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_squared_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_normal((10, 3), seed=1) + labels = tf.random_normal((10, 3), seed=2) + error, update_op = metrics.mean_squared_error( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_error = error.eval() + for _ in range(10): + self.assertEqual(initial_error, error.eval()) + + def testSingleUpdateZeroError(self): + predictions = tf.zeros((1, 3), dtype=tf.float32) + labels = tf.zeros((1, 3), dtype=tf.float32) + + error, update_op = metrics.mean_squared_error( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(0, sess.run(update_op)) + self.assertEqual(0, error.eval()) + + def testSingleUpdateWithError(self): + predictions = tf.constant([2, 4, 6], shape=(1, 3), dtype=tf.float32) + labels = tf.constant([1, 3, 2], shape=(1, 3), dtype=tf.float32) + + error, update_op = metrics.mean_squared_error( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(6, sess.run(update_op)) + self.assertEqual(6, error.eval()) + + def testSingleUpdateWithErrorAndWeights(self): + predictions = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([1, 3, 2, 3], shape=(1, 4), dtype=tf.float32) + weights = tf.constant([0, 1, 0, 1], shape=(1, 4)) + + error, update_op = metrics.mean_squared_error( + labels, predictions, weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(13, sess.run(update_op)) + self.assertEqual(13, error.eval()) + + def testMultipleBatchesOfSizeOne(self): + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, preds_queue, [10, 8, 6]) + _enqueue_vector(sess, preds_queue, [-4, 3, -1]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, labels_queue, [1, 3, 2]) + _enqueue_vector(sess, labels_queue, [2, 4, 6]) + labels = labels_queue.dequeue() + + error, update_op = metrics.mean_squared_error( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + sess.run(update_op) + self.assertAlmostEqual(208.0 / 6, sess.run(update_op), 5) + + self.assertAlmostEqual(208.0 / 6, error.eval(), 5) + + def testMetricsComputedConcurrently(self): + with self.test_session() as sess: + # Create the queue that populates one set of predictions. + preds_queue0 = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, preds_queue0, [10, 8, 6]) + _enqueue_vector(sess, preds_queue0, [-4, 3, -1]) + predictions0 = preds_queue0.dequeue() + + # Create the queue that populates one set of predictions. + preds_queue1 = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, preds_queue1, [0, 1, 1]) + _enqueue_vector(sess, preds_queue1, [1, 1, 0]) + predictions1 = preds_queue1.dequeue() + + # Create the queue that populates one set of labels. + labels_queue0 = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, labels_queue0, [1, 3, 2]) + _enqueue_vector(sess, labels_queue0, [2, 4, 6]) + labels0 = labels_queue0.dequeue() + + # Create the queue that populates another set of labels. + labels_queue1 = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, labels_queue1, [-5, -3, -1]) + _enqueue_vector(sess, labels_queue1, [5, 4, 3]) + labels1 = labels_queue1.dequeue() + + mse0, update_op0 = metrics.mean_squared_error( + labels0, predictions0, name='msd0') + mse1, update_op1 = metrics.mean_squared_error( + labels1, predictions1, name='msd1') + + sess.run(tf.local_variables_initializer()) + sess.run([update_op0, update_op1]) + sess.run([update_op0, update_op1]) + + mse0, mse1 = sess.run([mse0, mse1]) + self.assertAlmostEqual(208.0 / 6, mse0, 5) + self.assertAlmostEqual(79.0 / 6, mse1, 5) + + def testMultipleMetricsOnMultipleBatchesOfSizeOne(self): + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, preds_queue, [10, 8, 6]) + _enqueue_vector(sess, preds_queue, [-4, 3, -1]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(2, dtypes=tf.float32, shapes=(1, 3)) + _enqueue_vector(sess, labels_queue, [1, 3, 2]) + _enqueue_vector(sess, labels_queue, [2, 4, 6]) + labels = labels_queue.dequeue() + + mae, ma_update_op = metrics.mean_absolute_error( + labels, predictions) + mse, ms_update_op = metrics.mean_squared_error( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + sess.run([ma_update_op, ms_update_op]) + sess.run([ma_update_op, ms_update_op]) + + self.assertAlmostEqual(32.0 / 6, mae.eval(), 5) + self.assertAlmostEqual(208.0 / 6, mse.eval(), 5) + + +class RootMeanSquaredErrorTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.root_mean_squared_error( + predictions=tf.ones((10, 1)), labels=tf.ones((10, 1))) + _assert_local_variables(self, ( + 'root_mean_squared_error/count:0', + 'root_mean_squared_error/total:0' + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.root_mean_squared_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.root_mean_squared_error( + predictions=tf.ones((10, 1)), + labels=tf.ones((10, 1)), + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_normal((10, 3), seed=1) + labels = tf.random_normal((10, 3), seed=2) + error, update_op = metrics.root_mean_squared_error( + labels, predictions) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_error = error.eval() + for _ in range(10): + self.assertEqual(initial_error, error.eval()) + + def testSingleUpdateZeroError(self): + with self.test_session() as sess: + predictions = tf.constant(0.0, shape=(1, 3), dtype=tf.float32) + labels = tf.constant(0.0, shape=(1, 3), dtype=tf.float32) + + rmse, update_op = metrics.root_mean_squared_error( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertEqual(0, sess.run(update_op)) + + self.assertEqual(0, rmse.eval()) + + def testSingleUpdateWithError(self): + with self.test_session() as sess: + predictions = tf.constant([2, 4, 6], shape=(1, 3), dtype=tf.float32) + labels = tf.constant([1, 3, 2], shape=(1, 3), dtype=tf.float32) + + rmse, update_op = metrics.root_mean_squared_error( + labels, predictions) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(math.sqrt(6), update_op.eval(), 5) + self.assertAlmostEqual(math.sqrt(6), rmse.eval(), 5) + + def testSingleUpdateWithErrorAndWeights(self): + with self.test_session() as sess: + predictions = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) + labels = tf.constant([1, 3, 2, 3], shape=(1, 4), dtype=tf.float32) + weights = tf.constant([0, 1, 0, 1], shape=(1, 4)) + + rmse, update_op = metrics.root_mean_squared_error( + labels, predictions, weights) + + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(math.sqrt(13), sess.run(update_op)) + + self.assertAlmostEqual(math.sqrt(13), rmse.eval(), 5) + + +def _reweight(predictions, labels, weights): + return (np.concatenate([[p] * int(w) for p, w in zip(predictions, weights)]), + np.concatenate([[l] * int(w) for l, w in zip(labels, weights)])) + + +class MeanCosineDistanceTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.mean_cosine_distance( + predictions=tf.ones((10, 3)), labels=tf.ones((10, 3)), dim=1) + _assert_local_variables(self, ( + 'mean_cosine_distance/count:0', + 'mean_cosine_distance/total:0', + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.mean_cosine_distance( + predictions=tf.ones((10, 3)), + labels=tf.ones((10, 3)), + dim=1, + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_cosine_distance( + predictions=tf.ones((10, 3)), + labels=tf.ones((10, 3)), + dim=1, + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testValueTensorIsIdempotent(self): + predictions = tf.random_normal((10, 3), seed=1) + labels = tf.random_normal((10, 3), seed=2) + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=1) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_error = error.eval() + for _ in range(10): + self.assertEqual(initial_error, error.eval()) + + def testSingleUpdateZeroError(self): + np_labels = np.matrix(('1 0 0;' + '0 0 1;' + '0 1 0')) + + predictions = tf.constant(np_labels, shape=(1, 3, 3), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(1, 3, 3), dtype=tf.float32) + + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=2) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(0, sess.run(update_op)) + self.assertEqual(0, error.eval()) + + def testSingleUpdateWithError1(self): + np_labels = np.matrix(('1 0 0;' + '0 0 1;' + '0 1 0')) + np_predictions = np.matrix(('1 0 0;' + '0 0 -1;' + '1 0 0')) + + predictions = tf.constant(np_predictions, shape=(3, 1, 3), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(3, 1, 3), dtype=tf.float32) + + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=2) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1, sess.run(update_op), 5) + self.assertAlmostEqual(1, error.eval(), 5) + + def testSingleUpdateWithError2(self): + np_predictions = np.matrix(( + '0.819031913261206 0.567041924552012 0.087465312324590;' + '-0.665139432070255 -0.739487441769973 -0.103671883216994;' + '0.707106781186548 -0.707106781186548 0')) + np_labels = np.matrix(( + '0.819031913261206 0.567041924552012 0.087465312324590;' + '0.665139432070255 0.739487441769973 0.103671883216994;' + '0.707106781186548 0.707106781186548 0')) + + predictions = tf.constant(np_predictions, shape=(3, 1, 3), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(3, 1, 3), dtype=tf.float32) + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=2) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertAlmostEqual(1.0, sess.run(update_op), 5) + self.assertAlmostEqual(1.0, error.eval(), 5) + + def testSingleUpdateWithErrorAndWeights1(self): + np_predictions = np.matrix(('1 0 0;' + '0 0 -1;' + '1 0 0')) + np_labels = np.matrix(('1 0 0;' + '0 0 1;' + '0 1 0')) + + predictions = tf.constant(np_predictions, shape=(3, 1, 3), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(3, 1, 3), dtype=tf.float32) + weights = tf.constant([1, 0, 0], shape=(3, 1, 1), dtype=tf.float32) + + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=2, weights=weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(0, sess.run(update_op)) + self.assertEqual(0, error.eval()) + + def testSingleUpdateWithErrorAndWeights2(self): + np_predictions = np.matrix(('1 0 0;' + '0 0 -1;' + '1 0 0')) + np_labels = np.matrix(('1 0 0;' + '0 0 1;' + '0 1 0')) + + predictions = tf.constant(np_predictions, shape=(3, 1, 3), dtype=tf.float32) + labels = tf.constant(np_labels, shape=(3, 1, 3), dtype=tf.float32) + weights = tf.constant([0, 1, 1], shape=(3, 1, 1), dtype=tf.float32) + + error, update_op = metrics.mean_cosine_distance( + labels, predictions, dim=2, weights=weights) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + self.assertEqual(1.5, update_op.eval()) + self.assertEqual(1.5, error.eval()) + + +class PcntBelowThreshTest(tf.test.TestCase): + + def setUp(self): + tf.reset_default_graph() + + def testVars(self): + metrics.percentage_below(values=tf.ones((10,)), threshold=2) + _assert_local_variables(self, ( + 'percentage_below_threshold/count:0', + 'percentage_below_threshold/total:0', + )) + + def testMetricsCollection(self): + my_collection_name = '__metrics__' + mean, _ = metrics.percentage_below( + values=tf.ones((10,)), + threshold=2, + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.percentage_below( + values=tf.ones((10,)), + threshold=2, + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testOneUpdate(self): + with self.test_session() as sess: + values = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) + + pcnt0, update_op0 = metrics.percentage_below( + values, 100, name='high') + pcnt1, update_op1 = metrics.percentage_below( + values, 7, name='medium') + pcnt2, update_op2 = metrics.percentage_below( + values, 1, name='low') + + sess.run(tf.local_variables_initializer()) + sess.run([update_op0, update_op1, update_op2]) + + pcnt0, pcnt1, pcnt2 = sess.run([pcnt0, pcnt1, pcnt2]) + self.assertAlmostEqual(1.0, pcnt0, 5) + self.assertAlmostEqual(0.75, pcnt1, 5) + self.assertAlmostEqual(0.0, pcnt2, 5) + + def testSomePresentOneUpdate(self): + with self.test_session() as sess: + values = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) + weights = tf.constant([1, 0, 0, 1], shape=(1, 4), dtype=tf.float32) + + pcnt0, update_op0 = metrics.percentage_below( + values, 100, weights=weights, name='high') + pcnt1, update_op1 = metrics.percentage_below( + values, 7, weights=weights, name='medium') + pcnt2, update_op2 = metrics.percentage_below( + values, 1, weights=weights, name='low') + + sess.run(tf.local_variables_initializer()) + self.assertListEqual([1.0, 0.5, 0.0], + sess.run([update_op0, update_op1, update_op2])) + + pcnt0, pcnt1, pcnt2 = sess.run([pcnt0, pcnt1, pcnt2]) + self.assertAlmostEqual(1.0, pcnt0, 5) + self.assertAlmostEqual(0.5, pcnt1, 5) + self.assertAlmostEqual(0.0, pcnt2, 5) + + +class MeanIOUTest(tf.test.TestCase): + + def setUp(self): + np.random.seed(1) + tf.reset_default_graph() + + def testVars(self): + metrics.mean_iou( + predictions=tf.ones([10, 1]), labels=tf.ones([10, 1]), num_classes=2) + _assert_local_variables(self, ('mean_iou/total_confusion_matrix:0',)) + + def testMetricsCollections(self): + my_collection_name = '__metrics__' + mean_iou, _ = metrics.mean_iou( + predictions=tf.ones([10, 1]), + labels=tf.ones([10, 1]), + num_classes=2, + metrics_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [mean_iou]) + + def testUpdatesCollection(self): + my_collection_name = '__updates__' + _, update_op = metrics.mean_iou( + predictions=tf.ones([10, 1]), + labels=tf.ones([10, 1]), + num_classes=2, + updates_collections=[my_collection_name]) + self.assertListEqual(tf.get_collection(my_collection_name), [update_op]) + + def testPredictionsAndLabelsOfDifferentSizeRaisesValueError(self): + predictions = tf.ones([10, 3]) + labels = tf.ones([10, 4]) + with self.assertRaises(ValueError): + metrics.mean_iou( + labels, predictions, num_classes=2) + + def testLabelsAndWeightsOfDifferentSizeRaisesValueError(self): + predictions = tf.ones([10]) + labels = tf.ones([10]) + weights = tf.zeros([9]) + with self.assertRaises(ValueError): + metrics.mean_iou( + labels, predictions, num_classes=2, weights=weights) + + def testValueTensorIsIdempotent(self): + num_classes = 3 + predictions = tf.random_uniform([10], maxval=num_classes, + dtype=tf.int64, seed=1) + labels = tf.random_uniform([10], maxval=num_classes, + dtype=tf.int64, seed=1) + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes=num_classes) + + with self.test_session() as sess: + sess.run(tf.local_variables_initializer()) + + # Run several updates. + for _ in range(10): + sess.run(update_op) + + # Then verify idempotency. + initial_miou = miou.eval() + for _ in range(10): + self.assertEqual(initial_miou, miou.eval()) + + def testMultipleUpdates(self): + num_classes = 3 + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(5, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [2]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [0]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(5, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [2]) + _enqueue_vector(sess, labels_queue, [1]) + labels = labels_queue.dequeue() + + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes) + + sess.run(tf.local_variables_initializer()) + for _ in range(5): + sess.run(update_op) + desired_output = np.mean([1.0/2.0, 1.0/4.0, 0.]) + self.assertEqual(desired_output, miou.eval()) + + def testMultipleUpdatesWithWeights(self): + num_classes = 2 + with self.test_session() as sess: + # Create the queue that populates the predictions. + preds_queue = tf.FIFOQueue(6, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + labels_queue = tf.FIFOQueue(6, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + labels = labels_queue.dequeue() + + # Create the queue that populates the weights. + weights_queue = tf.FIFOQueue(6, dtypes=tf.float32, shapes=(1, 1)) + _enqueue_vector(sess, weights_queue, [1.0]) + _enqueue_vector(sess, weights_queue, [1.0]) + _enqueue_vector(sess, weights_queue, [1.0]) + _enqueue_vector(sess, weights_queue, [0.0]) + _enqueue_vector(sess, weights_queue, [1.0]) + _enqueue_vector(sess, weights_queue, [0.0]) + weights = weights_queue.dequeue() + + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes, weights=weights) + + sess.run(tf.local_variables_initializer()) + for _ in range(6): + sess.run(update_op) + desired_output = np.mean([2.0/3.0, 1.0/2.0]) + self.assertAlmostEqual(desired_output, miou.eval()) + + def testMultipleUpdatesWithMissingClass(self): + # Test the case where there are no predicions and labels for + # one class, and thus there is one row and one column with + # zero entries in the confusion matrix. + num_classes = 3 + with self.test_session() as sess: + # Create the queue that populates the predictions. + # There is no prediction for class 2. + preds_queue = tf.FIFOQueue(5, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, preds_queue, [0]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [1]) + _enqueue_vector(sess, preds_queue, [0]) + predictions = preds_queue.dequeue() + + # Create the queue that populates the labels. + # There is label for class 2. + labels_queue = tf.FIFOQueue(5, dtypes=tf.int32, shapes=(1, 1)) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [1]) + _enqueue_vector(sess, labels_queue, [0]) + _enqueue_vector(sess, labels_queue, [1]) + labels = labels_queue.dequeue() + + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes) + + sess.run(tf.local_variables_initializer()) + for _ in range(5): + sess.run(update_op) + desired_output = np.mean([1.0/3.0, 2.0/4.0, 0.]) + self.assertAlmostEqual(desired_output, miou.eval()) + + def testUpdateOpEvalIsAccumulatedConfusionMatrix(self): + predictions = tf.concat(0, + [tf.constant(0, shape=[5]), + tf.constant(1, shape=[5])]) + labels = tf.concat(0, + [tf.constant(0, shape=[3]), + tf.constant(1, shape=[7])]) + num_classes = 2 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes) + sess.run(tf.local_variables_initializer()) + confusion_matrix = update_op.eval() + self.assertAllEqual([[3, 2], [0, 5]], confusion_matrix) + desired_miou = np.mean([3./5., 5./7.]) + self.assertAlmostEqual(desired_miou, miou.eval()) + + def testAllCorrect(self): + predictions = tf.zeros([40]) + labels = tf.zeros([40]) + num_classes = 1 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes) + sess.run(tf.local_variables_initializer()) + self.assertEqual(40, update_op.eval()[0]) + self.assertEqual(1.0, miou.eval()) + + def testAllWrong(self): + predictions = tf.zeros([40]) + labels = tf.ones([40]) + num_classes = 2 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes) + sess.run(tf.local_variables_initializer()) + self.assertAllEqual([[0, 40], [0, 0]], update_op.eval()) + self.assertEqual(0., miou.eval()) + + def testResultsWithSomeMissing(self): + predictions = tf.concat(0, [tf.constant(0, shape=[5]), + tf.constant(1, shape=[5])]) + labels = tf.concat(0, [tf.constant(0, shape=[3]), + tf.constant(1, shape=[7])]) + num_classes = 2 + weights = tf.concat(0, [tf.constant(0, shape=[1]), + tf.constant(1, shape=[8]), + tf.constant(0, shape=[1])]) + with self.test_session() as sess: + miou, update_op = metrics.mean_iou( + labels, predictions, num_classes, weights=weights) + sess.run(tf.local_variables_initializer()) + self.assertAllEqual([[2, 2], [0, 4]], update_op.eval()) + desired_miou = np.mean([2./4., 4./6.]) + self.assertAlmostEqual(desired_miou, miou.eval()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py new file mode 100644 index 00000000000..0b9e79c640b --- /dev/null +++ b/tensorflow/python/ops/confusion_matrix.py @@ -0,0 +1,163 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Confusion matrix related utilities. + + +@@remove_squeezable_dimensions +@@confusion_matrix +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops + + +def remove_squeezable_dimensions(labels, predictions, name=None): + """Squeeze last dim if ranks of `predictions` and `labels` differ by 1. + + This will use static shape if available. Otherwise, it will add graph + operations, which could result in a performance hit. + + Args: + labels: Label values, a `Tensor` whose dimensions match `predictions`. + predictions: Predicted values, a `Tensor` of arbitrary dimensions. + name: Name of the op. + + Returns: + Tuple of `labels` and `predictions`, possibly with last dim squeezed. + """ + with ops.name_scope(name, 'remove_squeezable_dimensions', + [labels, predictions]): + predictions = ops.convert_to_tensor(predictions) + labels = ops.convert_to_tensor(labels) + predictions_shape = predictions.get_shape() + predictions_rank = predictions_shape.ndims + labels_shape = labels.get_shape() + labels_rank = labels_shape.ndims + if (labels_rank is not None) and (predictions_rank is not None): + # Use static rank. + rank_diff = predictions_rank - labels_rank + if rank_diff == -1: + labels = array_ops.squeeze(labels, [-1]) + elif rank_diff == 1: + predictions = array_ops.squeeze(predictions, [-1]) + return labels, predictions + + # Use dynamic rank. + rank_diff = array_ops.rank(predictions) - array_ops.rank(labels) + if (predictions_rank is None) or ( + predictions_shape.dims[-1].is_compatible_with(1)): + predictions = control_flow_ops.cond( + math_ops.equal(1, rank_diff), + lambda: array_ops.squeeze(predictions, [-1]), + lambda: predictions) + if (labels_rank is None) or ( + labels_shape.dims[-1].is_compatible_with(1)): + labels = control_flow_ops.cond( + math_ops.equal(-1, rank_diff), + lambda: array_ops.squeeze(labels, [-1]), + lambda: labels) + return labels, predictions + + +def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32, + name=None, weights=None): + """Computes the confusion matrix from predictions and labels. + + Calculate the Confusion Matrix for a pair of prediction and + label 1-D int arrays. + + The matrix rows represent the prediction labels and the columns + represents the real labels. The confusion matrix is always a 2-D array + of shape `[n, n]`, where `n` is the number of valid labels for a given + classification task. Both prediction and labels must be 1-D arrays of + the same shape in order for this function to work. + + If `num_classes` is None, then `num_classes` will be set to the one plus + the maximum value in either predictions or labels. + Class labels are expected to start at 0. E.g., if `num_classes` was + three, then the possible labels would be `[0, 1, 2]`. + + If `weights` is not `None`, then each prediction contributes its + corresponding weight to the total value of the confusion matrix cell. + + For example: + + ```python + tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==> + [[0 0 0 0 0] + [0 0 1 0 0] + [0 0 1 0 0] + [0 0 0 0 0] + [0 0 0 0 1]] + ``` + + Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`, + resulting in a 5x5 confusion matrix. + + Args: + labels: A 1-D representing the real labels for the classification task. + predictions: A 1-D array representing the predictions for a given + classification. + num_classes: The possible number of labels the classification task can + have. If this value is not provided, it will be calculated + using both predictions and labels array. + dtype: Data type of the confusion matrix. + name: Scope name. + weights: An optional `Tensor` whose shape matches `predictions`. + + Returns: + A k X k matrix representing the confusion matrix, where k is the number of + possible labels in the classification task. + + Raises: + ValueError: If both predictions and labels are not 1-D vectors and have + mismatched shapes, or if `weights` is not `None` and its shape doesn't + match `predictions`. + """ + with ops.name_scope(name, 'confusion_matrix', + [predictions, labels, num_classes]) as name: + labels, predictions = remove_squeezable_dimensions( + ops.convert_to_tensor(labels, name='labels'), + ops.convert_to_tensor( + predictions, name='predictions')) + predictions = math_ops.cast(predictions, dtypes.int64) + labels = math_ops.cast(labels, dtypes.int64) + + if num_classes is None: + num_classes = math_ops.maximum(math_ops.reduce_max(predictions), + math_ops.reduce_max(labels)) + 1 + + if weights is not None: + predictions.get_shape().assert_is_compatible_with(weights.get_shape()) + weights = math_ops.cast(weights, dtype) + + shape = array_ops.pack([num_classes, num_classes]) + indices = array_ops.transpose(array_ops.pack([predictions, labels])) + values = (array_ops.ones_like(predictions, dtype) + if weights is None else weights) + cm_sparse = sparse_tensor.SparseTensor( + indices=indices, values=values, shape=math_ops.to_int64(shape)) + zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype) + + return sparse_ops.sparse_add(zero_matrix, cm_sparse) diff --git a/tensorflow/python/ops/metrics.py b/tensorflow/python/ops/metrics.py new file mode 100644 index 00000000000..1394e4b7612 --- /dev/null +++ b/tensorflow/python/ops/metrics.py @@ -0,0 +1,2588 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Evaluation-related metrics. + +@@accuracy +@@auc +@@mean +@@mean_absolute_error +@@mean_cosine_distance +@mean_iou +@@mean_relative_error +@@mean_squared_error +@@mean_tensor +@@percentage_below +@@precision +@@precision_at_thresholds +@@recall +@@recall_at_k +@@recall_at_thresholds +@@root_mean_squared_error +@@sensitivity_at_specificity +@@sparse_average_precision_at_k +@@sparse_precision_at_k +@@specificity_at_sensitivity + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import confusion_matrix +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import sets +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables + + +def _local_variable(initial_value, validate_shape=True, name=None): + """Create variable and add it to `GraphKeys.LOCAL_VARIABLES` collection. + + Args: + initial_value: See variables.Variable.__init__. + validate_shape: See variables.Variable.__init__. + name: See variables.Variable.__init__. + Returns: + New variable. + """ + return variables.Variable( + initial_value, trainable=False, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + validate_shape=validate_shape, name=name) + + +def _remove_squeezable_dimensions(labels, predictions, weights): + """Internal version of _remove_squeezable_dimensions which handles weights. + + Squeezes `predictions` and `labels` if their rank differs by 1. + Squeezes `weights` if its rank is 1 more than the new rank of `predictions` + + This will use static shape if available. Otherwise, it will add graph + operations, which could result in a performance hit. + + Args: + labels: Label values, a `Tensor` whose dimensions match `predictions`. + predictions: Predicted values, a `Tensor` of arbitrary dimensions. + weights: Optional weight `Tensor`. It will be squeezed if its rank is 1 + more than the new rank of `predictions` + + Returns: + Tuple of `predictions`, `labels` and `weights`, possibly with the last + dimension squeezed. + """ + labels, predictions = confusion_matrix.remove_squeezable_dimensions( + labels, predictions) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + if weights is not None: + weights = ops.convert_to_tensor(weights) + predictions_shape = predictions.get_shape() + predictions_rank = predictions_shape.ndims + weights_shape = weights.get_shape() + weights_rank = weights_shape.ndims + + if (predictions_rank is not None) and (weights_rank is not None): + # Use static rank. + if weights_rank - predictions_rank == 1: + weights = array_ops.squeeze(weights, [-1]) + elif (weights_rank is None) or ( + weights_shape.dims[-1].is_compatible_with(1)): + # Use dynamic rank + weights = control_flow_ops.cond( + math_ops.equal(array_ops.rank(weights), + math_ops.add(array_ops.rank(predictions), 1)), + lambda: array_ops.squeeze(weights, [-1]), + lambda: weights) + return labels, predictions, weights + + +def _create_local(name, shape, collections=None, validate_shape=True, + dtype=dtypes.float32): + """Creates a new local variable. + + Args: + name: The name of the new or existing variable. + shape: Shape of the new or existing variable. + collections: A list of collection names to which the Variable will be added. + validate_shape: Whether to validate the shape of the variable. + dtype: Data type of the variables. + + Returns: + The created variable. + """ + # Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES + collections = list(collections or []) + collections += [ops.GraphKeys.LOCAL_VARIABLES] + return variables.Variable( + initial_value=array_ops.zeros(shape, dtype=dtype), + name=name, + trainable=False, + collections=collections, + validate_shape=validate_shape) + + +def _broadcast_weights(weights, values): + """Broadcast `weights` to the same shape as `values`. + + This returns a version of `weights` following the same broadcast rules as + `mul(weights, values)`. When computing a weighted average, use this function + to broadcast `weights` before summing them; e.g., + `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. + + Args: + weights: `Tensor` whose shape is broadcastable to `values`. + values: `Tensor` of any shape. + + Returns: + `weights` broadcast to `values` shape. + """ + weights_shape = weights.get_shape() + values_shape = values.get_shape() + if (weights_shape.is_fully_defined() and + values_shape.is_fully_defined() and + weights_shape.is_compatible_with(values_shape)): + return weights + return math_ops.mul( + weights, array_ops.ones_like(values), name='broadcast_weights') + + +def _safe_div(numerator, denominator, name): + """Divides two values, returning 0 if the denominator is <= 0. + + Args: + numerator: A real `Tensor`. + denominator: A real `Tensor`, with dtype matching `numerator`. + name: Name for the returned op. + + Returns: + 0 if `denominator` <= 0, else `numerator` / `denominator` + """ + return math_ops.select( + math_ops.greater(denominator, 0), + math_ops.truediv(numerator, denominator), + 0, + name=name) + + +def _safe_scalar_div(numerator, denominator, name): + """Divides two values, returning 0 if the denominator is 0. + + Args: + numerator: A scalar `float64` `Tensor`. + denominator: A scalar `float64` `Tensor`. + name: Name for the returned op. + + Returns: + 0 if `denominator` == 0, else `numerator` / `denominator` + """ + numerator.get_shape().with_rank_at_most(1) + denominator.get_shape().with_rank_at_most(1) + return control_flow_ops.cond( + math_ops.equal( + array_ops.constant(0.0, dtype=dtypes.float64), denominator), + lambda: array_ops.constant(0.0, dtype=dtypes.float64), + lambda: math_ops.div(numerator, denominator), + name=name) + + +def mean(values, weights=None, metrics_collections=None, + updates_collections=None, name=None): + """Computes the (weighted) mean of the given values. + + The `mean` function creates two local variables, `total` and `count` + that are used to compute the average of `values`. This average is ultimately + returned as `mean` which is an idempotent operation that simply divides + `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `mean`. + `update_op` increments `total` with the reduced sum of the product of `values` + and `weights`, and it increments `count` with the reduced sum of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + values: A `Tensor` of arbitrary dimensions. + weights: An optional `Tensor` whose shape is broadcastable to `values`. + metrics_collections: An optional list of collections that `mean` + should be added to. + updates_collections: An optional list of collections that `update_op` + should be added to. + name: An optional variable_scope name. + + Returns: + mean: A `Tensor` representing the current mean, the value of `total` divided + by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `mean_value`. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. + """ + with variable_scope.variable_scope(name, 'mean', (values, weights)): + values = math_ops.to_float(values) + + total = _create_local('total', shape=[]) + count = _create_local('count', shape=[]) + + if weights is not None: + weights = math_ops.to_float(weights) + values = math_ops.mul(values, weights) + num_values = math_ops.reduce_sum(_broadcast_weights(weights, values)) + else: + num_values = math_ops.to_float(array_ops.size(values)) + + total_compute_op = state_ops.assign_add(total, math_ops.reduce_sum(values)) + count_compute_op = state_ops.assign_add(count, num_values) + + mean_t = _safe_div(total, count, 'value') + with ops.control_dependencies([total_compute_op, count_compute_op]): + update_op = _safe_div(total, count, 'update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, mean_t) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return mean_t, update_op + + +def accuracy(labels, predictions, weights=None, metrics_collections=None, + updates_collections=None, name=None): + """Calculates how often `predictions` matches `labels`. + + The `accuracy` function creates two local variables, `total` and + `count` that are used to compute the frequency with which `predictions` + matches `labels`. This frequency is ultimately returned as `accuracy`: an + idempotent operation that simply divides `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `accuracy`. + Internally, an `is_correct` operation computes a `Tensor` with elements 1.0 + where the corresponding elements of `predictions` and `labels` match and 0.0 + otherwise. Then `update_op` increments `total` with the reduced sum of the + product of `weights` and `is_correct`, and it increments `count` with the + reduced sum of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `Tensor` whose shape matches + `predictions`. + predictions: The predicted values, a `Tensor` of any shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `accuracy` should + be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + accuracy: A `Tensor` representing the accuracy, the value of `total` divided + by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `accuracy`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights=weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + if labels.dtype != predictions.dtype: + predictions = math_ops.cast(predictions, labels.dtype) + is_correct = math_ops.to_float(math_ops.equal(predictions, labels)) + return mean(is_correct, weights, metrics_collections, + updates_collections, name or 'accuracy') + + +def _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights=None, includes=None): + """Computes true_positives, false_negatives, true_negatives, false_positives. + + This function creates up to four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives`. + `true_positive[i]` is defined as the total weight of values in `predictions` + above `thresholds[i]` whose corresponding entry in `labels` is `True`. + `false_negatives[i]` is defined as the total weight of values in `predictions` + at most `thresholds[i]` whose corresponding entry in `labels` is `True`. + `true_negatives[i]` is defined as the total weight of values in `predictions` + at most `thresholds[i]` whose corresponding entry in `labels` is `False`. + `false_positives[i]` is defined as the total weight of values in `predictions` + above `thresholds[i]` whose corresponding entry in `labels` is `False`. + + For estimation of these metrics over a stream of data, for each metric the + function respectively creates an `update_op` operation that updates the + variable and returns its value. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` whose shape matches `predictions`. `labels` will be cast + to `bool`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + thresholds: A python list or tuple of float thresholds in `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + includes: Tuple of keys to return, from 'tp', 'fn', 'tn', fp'. If `None`, + default to all four. + + Returns: + values: Dict of variables of shape `[len(thresholds)]`. Keys are from + `includes`. + update_ops: Dict of operations that increments the `values`. Keys are from + `includes`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + `includes` contains invalid keys. + """ + all_includes = ('tp', 'fn', 'tn', 'fp') + if includes is None: + includes = all_includes + else: + for include in includes: + if include not in all_includes: + raise ValueError('Invaild key: %s.' % include) + + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + num_thresholds = len(thresholds) + + # Reshape predictions and labels. + predictions_2d = array_ops.reshape(predictions, [-1, 1]) + labels_2d = array_ops.reshape( + math_ops.cast(labels, dtype=dtypes.bool), [1, -1]) + + # Use static shape if known. + num_predictions = predictions_2d.get_shape().as_list()[0] + + # Otherwise use dynamic shape. + if num_predictions is None: + num_predictions = array_ops.shape(predictions_2d)[0] + thresh_tiled = array_ops.tile( + array_ops.expand_dims(array_ops.constant(thresholds), [1]), + array_ops.pack([1, num_predictions])) + + # Tile the predictions after thresholding them across different thresholds. + pred_is_pos = math_ops.greater( + array_ops.tile(array_ops.transpose(predictions_2d), [num_thresholds, 1]), + thresh_tiled) + if ('fn' in includes) or ('tn' in includes): + pred_is_neg = math_ops.logical_not(pred_is_pos) + + # Tile labels by number of thresholds + label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1]) + if ('fp' in includes) or ('tn' in includes): + label_is_neg = math_ops.logical_not(label_is_pos) + + if weights is not None: + weights = math_ops.to_float(weights) + weights_tiled = array_ops.tile(array_ops.reshape(_broadcast_weights( + weights, predictions), [1, -1]), [num_thresholds, 1]) + thresh_tiled.get_shape().assert_is_compatible_with( + weights_tiled.get_shape()) + else: + weights_tiled = None + + values = {} + update_ops = {} + + if 'tp' in includes: + true_p = _create_local('true_positives', shape=[num_thresholds]) + is_true_positive = math_ops.to_float( + math_ops.logical_and(label_is_pos, pred_is_pos)) + if weights_tiled is not None: + is_true_positive *= weights_tiled + update_ops['tp'] = state_ops.assign_add( + true_p, math_ops.reduce_sum(is_true_positive, 1)) + values['tp'] = true_p + + if 'fn' in includes: + false_n = _create_local('false_negatives', shape=[num_thresholds]) + is_false_negative = math_ops.to_float( + math_ops.logical_and(label_is_pos, pred_is_neg)) + if weights_tiled is not None: + is_false_negative *= weights_tiled + update_ops['fn'] = state_ops.assign_add( + false_n, math_ops.reduce_sum(is_false_negative, 1)) + values['fn'] = false_n + + if 'tn' in includes: + true_n = _create_local('true_negatives', shape=[num_thresholds]) + is_true_negative = math_ops.to_float( + math_ops.logical_and(label_is_neg, pred_is_neg)) + if weights_tiled is not None: + is_true_negative *= weights_tiled + update_ops['tn'] = state_ops.assign_add( + true_n, math_ops.reduce_sum(is_true_negative, 1)) + values['tn'] = true_n + + if 'fp' in includes: + false_p = _create_local('false_positives', shape=[num_thresholds]) + is_false_positive = math_ops.to_float( + math_ops.logical_and(label_is_neg, pred_is_pos)) + if weights_tiled is not None: + is_false_positive *= weights_tiled + update_ops['fp'] = state_ops.assign_add( + false_p, math_ops.reduce_sum(is_false_positive, 1)) + values['fp'] = false_p + + return values, update_ops + + +def auc(labels, predictions, weights=None, num_thresholds=200, + metrics_collections=None, updates_collections=None, + curve='ROC', name=None): + """Computes the approximate AUC via a Riemann sum. + + The `auc` function creates four local variables, `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the AUC. To discretize the AUC curve, a linearly spaced set of + thresholds is used to compute pairs of recall and precision values. The area + under the ROC-curve is therefore computed using the height of the recall + values by the false positive rate, while the area under the PR-curve is the + computed using the height of the precision values by the recall. + + This value is ultimately returned as `auc`, an idempotent operation that + computes the area under a discretized curve of precision versus recall values + (computed using the aforementioned variables). The `num_thresholds` variable + controls the degree of discretization with larger numbers of thresholds more + closely approximating the true AUC. The quality of the approximation may vary + dramatically depending on `num_thresholds`. + + For best results, `predictions` should be distributed approximately uniformly + in the range [0, 1] and not peaked around 0 or 1. The quality of the AUC + approximation may be poor if this is not the case. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `auc`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `bool` `Tensor` whose shape matches `predictions`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + num_thresholds: The number of thresholds to use when discretizing the roc + curve. + metrics_collections: An optional list of collections that `auc` should be + added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + curve: Specifies the name of the curve to be computed, 'ROC' [default] or + 'PR' for the Precision-Recall-curve. + name: An optional variable_scope name. + + Returns: + auc: A scalar `Tensor` representing the current area-under-curve. + update_op: An operation that increments the `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` variables + appropriately and whose value matches `auc`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'auc', (labels, predictions, weights)): + if curve != 'ROC' and curve != 'PR': + raise ValueError('curve must be either ROC or PR, %s unknown' % + (curve)) + kepsilon = 1e-7 # to account for floating point imprecisions + thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) + for i in range(num_thresholds-2)] + thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] + + values, update_ops = _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights) + + # Add epsilons to avoid dividing by 0. + epsilon = 1.0e-6 + def compute_auc(tp, fn, tn, fp, name): + """Computes the roc-auc or pr-auc based on confusion counts.""" + rec = math_ops.div(tp + epsilon, tp + fn + epsilon) + if curve == 'ROC': + fp_rate = math_ops.div(fp, fp + tn + epsilon) + x = fp_rate + y = rec + else: # curve == 'PR'. + prec = math_ops.div(tp + epsilon, tp + fp + epsilon) + x = rec + y = prec + return math_ops.reduce_sum(math_ops.mul( + x[:num_thresholds - 1] - x[1:], + (y[:num_thresholds - 1] + y[1:]) / 2.), name=name) + + # sum up the areas of all the trapeziums + auc_value = compute_auc( + values['tp'], values['fn'], values['tn'], values['fp'], 'value') + update_op = compute_auc( + update_ops['tp'], update_ops['fn'], update_ops['tn'], update_ops['fp'], + 'update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, auc_value) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return auc_value, update_op + + +def mean_absolute_error(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the mean absolute error between the labels and predictions. + + The `mean_absolute_error` function creates two local variables, + `total` and `count` that are used to compute the mean absolute error. This + average is weighted by `weights`, and it is ultimately returned as + `mean_absolute_error`: an idempotent operation that simply divides `total` by + `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `mean_absolute_error`. Internally, an `absolute_errors` operation computes the + absolute value of the differences between `predictions` and `labels`. Then + `update_op` increments `total` with the reduced sum of the product of + `weights` and `absolute_errors`, and it increments `count` with the reduced + sum of `weights` + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of the same shape as `predictions`. + predictions: A `Tensor` of arbitrary shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that + `mean_absolute_error` should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + mean_absolute_error: A `Tensor` representing the current mean, the value of + `total` divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `mean_absolute_error`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + predictions, labels, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + absolute_errors = math_ops.abs(predictions - labels) + return mean(absolute_errors, weights, metrics_collections, + updates_collections, name or 'mean_absolute_error') + + +def mean_cosine_distance(labels, predictions, dim, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the cosine distance between the labels and predictions. + + The `mean_cosine_distance` function creates two local variables, + `total` and `count` that are used to compute the average cosine distance + between `predictions` and `labels`. This average is weighted by `weights`, + and it is ultimately returned as `mean_distance`, which is an idempotent + operation that simply divides `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `mean_distance`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of arbitrary shape. + predictions: A `Tensor` of the same shape as `labels`. + dim: The dimension along which the cosine distance is computed. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`, + and whose dimension `dim` is 1. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + mean_distance: A `Tensor` representing the current mean, the value of + `total` divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + radial_diffs = math_ops.mul(predictions, labels) + radial_diffs = math_ops.reduce_sum(radial_diffs, + reduction_indices=[dim,], + keep_dims=True) + mean_distance, update_op = mean(radial_diffs, weights, + None, + None, + name or 'mean_cosine_distance') + mean_distance = math_ops.sub(1.0, mean_distance) + update_op = math_ops.sub(1.0, update_op) + + if metrics_collections: + ops.add_to_collections(metrics_collections, mean_distance) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return mean_distance, update_op + + +def mean_iou(labels, + predictions, + num_classes, + weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Calculate per-step mean Intersection-Over-Union (mIOU). + + Mean Intersection-Over-Union is a common evaluation metric for + semantic image segmentation, which first computes the IOU for each + semantic class and then computes the average over classes. + IOU is defined as follows: + IOU = true_positive / (true_positive + false_positive + false_negative). + The predictions are accumulated in a confusion matrix, weighted by `weights`, + and mIOU is then calculated from it. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `mean_iou`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of ground truth labels with shape [batch size] and of + type `int32` or `int64`. The tensor will be flattened, if its rank > 1. + predictions: A `Tensor` of prediction results for semantic labels, whose + shape is [batch size] and type `int32` or `int64`. The tensor will be + flattened, if its rank > 1. + num_classes: The possible number of labels the prediction task can + have. This value must be provided, since a confusion matrix of + dimension = [num_classes, num_classes] will be allocated. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `mean_iou` + should be added to. + updates_collections: An optional list of collections `update_op` should be + added to. + name: An optional variable_scope name. + + Returns: + mean_iou: A `Tensor` representing the mean intersection-over-union. + update_op: An operation that increments the confusion matrix. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'mean_iou', (predictions, labels, weights)): + # Check if shape is compatible. + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + # Local variable to accumulate the predictions in the confusion matrix. + cm_dtype = dtypes.int64 if weights is not None else dtypes.float64 + total_cm = _create_local('total_confusion_matrix', + shape=[num_classes, num_classes], dtype=cm_dtype) + + # Cast the type to int64 required by confusion_matrix_ops. + predictions = math_ops.to_int64(predictions) + labels = math_ops.to_int64(labels) + num_classes = math_ops.to_int64(num_classes) + + # Flatten the input if its rank > 1. + predictions_rank = predictions.get_shape().ndims + if predictions_rank > 1: + predictions = array_ops.reshape(predictions, [-1]) + + labels_rank = labels.get_shape().ndims + if labels_rank > 1: + labels = array_ops.reshape(labels, [-1]) + + if weights is not None: + weights_rank = weights.get_shape().ndims + if weights_rank > 1: + weights = array_ops.reshape(weights, [-1]) + + # Accumulate the prediction to current confusion matrix. + current_cm = confusion_matrix.confusion_matrix( + labels, predictions, num_classes, weights=weights, dtype=cm_dtype) + update_op = state_ops.assign_add(total_cm, current_cm) + + def compute_mean_iou(name): + """Compute the mean intersection-over-union via the confusion matrix.""" + sum_over_row = math_ops.to_float(math_ops.reduce_sum(total_cm, 0)) + sum_over_col = math_ops.to_float(math_ops.reduce_sum(total_cm, 1)) + cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) + denominator = sum_over_row + sum_over_col - cm_diag + + # If the value of the denominator is 0, set it to 1 to avoid + # zero division. + denominator = math_ops.select( + math_ops.greater(denominator, 0), + denominator, + array_ops.ones_like(denominator)) + iou = math_ops.div(cm_diag, denominator) + return math_ops.reduce_mean(iou, name=name) + + mean_iou_v = compute_mean_iou('mean_iou') + + if metrics_collections: + ops.add_to_collections(metrics_collections, mean_iou_v) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return mean_iou_v, update_op + + +def mean_relative_error(labels, predictions, normalizer, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the mean relative error by normalizing with the given values. + + The `mean_relative_error` function creates two local variables, + `total` and `count` that are used to compute the mean relative absolute error. + This average is weighted by `weights`, and it is ultimately returned as + `mean_relative_error`: an idempotent operation that simply divides `total` by + `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `mean_reative_error`. Internally, a `relative_errors` operation divides the + absolute value of the differences between `predictions` and `labels` by the + `normalizer`. Then `update_op` increments `total` with the reduced sum of the + product of `weights` and `relative_errors`, and it increments `count` with the + reduced sum of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of the same shape as `predictions`. + predictions: A `Tensor` of arbitrary shape. + normalizer: A `Tensor` of the same shape as `predictions`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that + `mean_relative_error` should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + mean_relative_error: A `Tensor` representing the current mean, the value of + `total` divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `mean_relative_error`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + predictions, normalizer = confusion_matrix.remove_squeezable_dimensions( + predictions, normalizer) + predictions.get_shape().assert_is_compatible_with(normalizer.get_shape()) + relative_errors = math_ops.select( + math_ops.equal(normalizer, 0.0), + array_ops.zeros_like(labels), + math_ops.div(math_ops.abs(labels - predictions), normalizer)) + return mean(relative_errors, weights, metrics_collections, + updates_collections, name or 'mean_relative_error') + + +def mean_squared_error(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the mean squared error between the labels and predictions. + + The `mean_squared_error` function creates two local variables, + `total` and `count` that are used to compute the mean squared error. + This average is weighted by `weights`, and it is ultimately returned as + `mean_squared_error`: an idempotent operation that simply divides `total` by + `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `mean_squared_error`. Internally, a `squared_error` operation computes the + element-wise square of the difference between `predictions` and `labels`. Then + `update_op` increments `total` with the reduced sum of the product of + `weights` and `squared_error`, and it increments `count` with the reduced sum + of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of the same shape as `predictions`. + predictions: A `Tensor` of arbitrary shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that + `mean_squared_error` should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + mean_squared_error: A `Tensor` representing the current mean, the value of + `total` divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `mean_squared_error`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + squared_error = math_ops.square(labels - predictions) + return mean(squared_error, weights, metrics_collections, + updates_collections, name or 'mean_squared_error') + + +def mean_tensor(values, weights=None, metrics_collections=None, + updates_collections=None, name=None): + """Computes the element-wise (weighted) mean of the given tensors. + + In contrast to the `mean` function which returns a scalar with the + mean, this function returns an average tensor with the same shape as the + input tensors. + + The `mean_tensor` function creates two local variables, + `total_tensor` and `count_tensor` that are used to compute the average of + `values`. This average is ultimately returned as `mean` which is an idempotent + operation that simply divides `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `mean`. + `update_op` increments `total` with the reduced sum of the product of `values` + and `weights`, and it increments `count` with the reduced sum of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + values: A `Tensor` of arbitrary dimensions. + weights: An optional `Tensor` whose shape is broadcastable to `values`. + metrics_collections: An optional list of collections that `mean` + should be added to. + updates_collections: An optional list of collections that `update_op` + should be added to. + name: An optional variable_scope name. + + Returns: + mean: A float `Tensor` representing the current mean, the value of `total` + divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `mean_value`. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. + """ + with variable_scope.variable_scope(name, 'mean', (values, weights)): + total = _create_local('total_tensor', shape=values.get_shape()) + count = _create_local('count_tensor', shape=values.get_shape()) + + num_values = array_ops.ones_like(values) + if weights is not None: + weights = math_ops.to_float(weights) + values = math_ops.mul(values, weights) + num_values = math_ops.mul(num_values, weights) + + total_compute_op = state_ops.assign_add(total, values) + count_compute_op = state_ops.assign_add(count, num_values) + + def compute_mean(total, count, name): + non_zero_count = math_ops.maximum(count, + array_ops.ones_like(count), + name=name) + return math_ops.truediv(total, non_zero_count, name=name) + + mean_t = compute_mean(total, count, 'value') + with ops.control_dependencies([total_compute_op, count_compute_op]): + update_op = compute_mean(total, count, 'update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, mean_t) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return mean_t, update_op + + +def percentage_below(values, threshold, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the percentage of values less than the given threshold. + + The `percentage_below` function creates two local variables, + `total` and `count` that are used to compute the percentage of `values` that + fall below `threshold`. This rate is weighted by `weights`, and it is + ultimately returned as `percentage` which is an idempotent operation that + simply divides `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `percentage`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + values: A numeric `Tensor` of arbitrary size. + threshold: A scalar threshold. + weights: An optional `Tensor` whose shape is broadcastable to `values`. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + percentage: A `Tensor` representing the current mean, the value of `total` + divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. + """ + is_below_threshold = math_ops.to_float(math_ops.less(values, threshold)) + return mean(is_below_threshold, + weights, + metrics_collections, + updates_collections, + name or 'percentage_below_threshold') + + +def _count_condition(values, weights=None, metrics_collections=None, + updates_collections=None): + """Sums the weights of cases where the given values are True. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + values: A `bool` `Tensor` of arbitrary size. + weights: An optional `Tensor` whose shape is broadcastable to `values`. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + + Returns: + value_tensor: A `Tensor` representing the current value of the metric. + update_op: An operation that accumulates the error from a batch of data. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. + """ + check_ops.assert_type(values, dtypes.bool) + count = _create_local('count', shape=[]) + + values = math_ops.to_float(values) + if weights is not None: + weights = math_ops.to_float(weights) + values = math_ops.mul(values, weights) + + value_tensor = array_ops.identity(count) + update_op = state_ops.assign_add(count, math_ops.reduce_sum(values)) + + if metrics_collections: + ops.add_to_collections(metrics_collections, value_tensor) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return value_tensor, update_op + + +def true_positives(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Sum the weights of true_positives. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. + predictions: The predicted values, a `bool` `Tensor` of arbitrary + dimensions. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + value_tensor: A `Tensor` representing the current value of the metric. + update_op: An operation that accumulates the error from a batch of data. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'true_positives', (predictions, labels, weights)): + + predictions = ops.convert_to_tensor(predictions) + labels = ops.convert_to_tensor(labels) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + is_true_positive = math_ops.logical_and(math_ops.equal(labels, 1), + math_ops.equal(predictions, 1)) + return _count_condition(is_true_positive, weights, metrics_collections, + updates_collections) + + +def false_positives(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Sum the weights of false positives. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. + predictions: The predicted values, a `bool` `Tensor` of arbitrary + dimensions. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + value_tensor: A `Tensor` representing the current value of the metric. + update_op: An operation that accumulates the error from a batch of data. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'false_positives', (predictions, labels, weights)): + + predictions = ops.convert_to_tensor(predictions) + labels = ops.convert_to_tensor(labels) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + is_false_positive = math_ops.logical_and(math_ops.equal(labels, 0), + math_ops.equal(predictions, 1)) + return _count_condition(is_false_positive, weights, metrics_collections, + updates_collections) + + +def precision(labels, predictions, weights=None, + metrics_collections=None, updates_collections=None, + name=None): + """Computes the precision of the predictions with respect to the labels. + + The `precision` function creates two local variables, + `true_positives` and `false_positives`, that are used to compute the + precision. This value is ultimately returned as `precision`, an idempotent + operation that simply divides `true_positives` by the sum of `true_positives` + and `false_positives`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `precision`. `update_op` weights each prediction by the corresponding value in + `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. + predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `precision` should + be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + precision: Scalar float `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. + update_op: `Operation` that increments `true_positives` and + `false_positives` variables appropriately and whose value matches + `precision`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'precision', (predictions, labels, weights)): + + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + true_p, true_positives_update_op = true_positives( + labels, predictions, weights, metrics_collections=None, + updates_collections=None, name=None) + false_p, false_positives_update_op = false_positives( + labels, predictions, weights, metrics_collections=None, + updates_collections=None, name=None) + + def compute_precision(name): + return math_ops.select( + math_ops.greater(true_p + false_p, 0), + math_ops.div(true_p, true_p + false_p), + 0, + name) + + p = compute_precision('value') + with ops.control_dependencies([true_positives_update_op, + false_positives_update_op]): + update_op = compute_precision('update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, p) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return p, update_op + + +def precision_at_thresholds(labels, predictions, thresholds, + weights=None, + metrics_collections=None, + updates_collections=None, name=None): + """Computes precision values for different `thresholds` on `predictions`. + + The `precision_at_thresholds` function creates four local variables, + `true_positives`, `true_negatives`, `false_positives` and `false_negatives` + for various values of thresholds. `precision[i]` is defined as the total + weight of values in `predictions` above `thresholds[i]` whose corresponding + entry in `labels` is `True`, divided by the total weight of values in + `predictions` above `thresholds[i]` (`true_positives[i] / (true_positives[i] + + false_positives[i])`). + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `precision`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `bool` `Tensor` whose shape matches `predictions`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + thresholds: A python list or tuple of float thresholds in `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `auc` should be + added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + precision: A float `Tensor` of shape `[len(thresholds)]`. + update_op: An operation that increments the `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` variables that + are used in the computation of `precision`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope(name, 'precision_at_thresholds', + (predictions, labels, weights)): + values, update_ops = _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights, includes=('tp', 'fp')) + tp = values['tp'] + fp = values['fp'] + + # Avoid division by zero. + epsilon = 1e-7 + def compute_precision(name): + return math_ops.div(tp, epsilon + tp + fp, name='precision_' + name) + + prec = compute_precision('value') + with ops.control_dependencies(update_ops.values()): + update_op = compute_precision('update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, prec) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return prec, update_op + + +def false_negatives(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the total number of false positives. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. + predictions: The predicted values, a `bool` `Tensor` of arbitrary + dimensions. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that the metric + value variable should be added to. + updates_collections: An optional list of collections that the metric update + ops should be added to. + name: An optional variable_scope name. + + Returns: + value_tensor: A `Tensor` representing the current value of the metric. + update_op: An operation that accumulates the error from a batch of data. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. + """ + with variable_scope.variable_scope( + name, 'false_negatives', (predictions, labels, weights)): + + predictions = ops.convert_to_tensor(predictions) + labels = ops.convert_to_tensor(labels) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + is_false_negative = math_ops.logical_and(math_ops.equal(labels, 1), + math_ops.equal(predictions, 0)) + return _count_condition(is_false_negative, weights, metrics_collections, + updates_collections) + + +def recall(labels, predictions, weights=None, + metrics_collections=None, updates_collections=None, + name=None): + """Computes the recall of the predictions with respect to the labels. + + The `recall` function creates two local variables, `true_positives` + and `false_negatives`, that are used to compute the recall. This value is + ultimately returned as `recall`, an idempotent operation that simply divides + `true_positives` by the sum of `true_positives` and `false_negatives`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` that updates these variables and returns the `recall`. `update_op` + weights each prediction by the corresponding value in `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. + predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `recall` should + be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + recall: Scalar float `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. + update_op: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately and whose value matches + `recall`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope( + name, 'recall', (predictions, labels, weights)): + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + + true_p, true_positives_update_op = true_positives( + labels, predictions, weights, metrics_collections=None, + updates_collections=None, name=None) + false_n, false_negatives_update_op = false_negatives( + labels, predictions, weights, metrics_collections=None, + updates_collections=None, name=None) + + def compute_recall(true_p, false_n, name): + return math_ops.select( + math_ops.greater(true_p + false_n, 0), + math_ops.div(true_p, true_p + false_n), + 0, + name) + + rec = compute_recall(true_p, false_n, 'value') + with ops.control_dependencies([true_positives_update_op, + false_negatives_update_op]): + update_op = compute_recall(true_p, false_n, 'update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, rec) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return rec, update_op + + +def _at_k_name(name, k=None, class_id=None): + if k is not None: + name = '%s_at_%d' % (name, k) + else: + name = '%s_at_k' % (name) + if class_id is not None: + name = '%s_class%d' % (name, class_id) + return name + + +def _select_class_id(ids, selected_id): + """Filter all but `selected_id` out of `ids`. + + Args: + ids: `int64` `Tensor` or `SparseTensor` of IDs. + selected_id: Int id to select. + + Returns: + `SparseTensor` of same dimensions as `ids`. This contains only the entries + equal to `selected_id`. + """ + if isinstance( + ids, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): + return sparse_ops.sparse_retain( + ids, math_ops.equal(ids.values, selected_id)) + + # TODO(ptucker): Make this more efficient, maybe add a sparse version of + # tf.equal and tf.reduce_any? + + # Shape of filled IDs is the same as `ids` with the last dim collapsed to 1. + ids_shape = array_ops.shape(ids, out_type=dtypes.int64) + ids_last_dim = array_ops.size(ids_shape) - 1 + filled_selected_id_shape = math_ops.reduced_shape( + ids_shape, array_ops.reshape(ids_last_dim, [1])) + + # Intersect `ids` with the selected ID. + filled_selected_id = array_ops.fill( + filled_selected_id_shape, math_ops.to_int64(selected_id)) + result = sets.set_intersection(filled_selected_id, ids) + return sparse_tensor.SparseTensor( + indices=result.indices, values=result.values, shape=ids_shape) + + +def _maybe_select_class_id(labels, predictions_idx, selected_id=None): + """If class ID is specified, filter all other classes. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: `int64` `Tensor` of class IDs, with shape [D1, ... DN, k] + where N >= 1. Commonly, N=1 and `predictions_idx` has shape + [batch size, k]. + selected_id: Int id to select. + + Returns: + Tuple of `labels` and `predictions_idx`, possibly with classes removed. + """ + if selected_id is None: + return labels, predictions_idx + return (_select_class_id(labels, selected_id), + _select_class_id(predictions_idx, selected_id)) + + +def _sparse_true_positive_at_k(labels, + predictions_idx, + class_id=None, + weights=None, + name=None): + """Calculates true positives for recall@k and precision@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels_sparse`. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + name: Name of operation. + + Returns: + A [D1, ... DN] `Tensor` of true positive counts. + """ + with ops.name_scope(name, 'true_positives', (predictions_idx, labels)): + labels, predictions_idx = _maybe_select_class_id( + labels, predictions_idx, class_id) + tp = sets.set_size(sets.set_intersection(predictions_idx, labels)) + tp = math_ops.to_double(tp) + if weights is not None: + weights = math_ops.to_double(weights) + tp = math_ops.mul(tp, weights) + return tp + + +def _streaming_sparse_true_positive_at_k(labels, + predictions_idx, + k=None, + class_id=None, + weights=None, + name=None): + """Calculates weighted per step true positives for recall@k and precision@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + k: Integer, k for @k metric. This is only used for default op name. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + name: Name of new variable, and namespace for other dependent ops. + + Returns: + A tuple of `Variable` and update `Operation`. + + Raises: + ValueError: If `weights` is not `None` and has an incomptable shape. + """ + default_name = _at_k_name('true_positive', k, class_id=class_id) + with ops.name_scope(name, default_name, (predictions_idx, labels)) as scope: + tp = _sparse_true_positive_at_k( + predictions_idx=predictions_idx, labels=labels, class_id=class_id, + weights=weights) + batch_total_tp = math_ops.to_double(math_ops.reduce_sum(tp)) + + var = _local_variable(array_ops.zeros([], dtype=dtypes.float64), name=scope) + return var, state_ops.assign_add(var, batch_total_tp, name='update') + + +def _sparse_false_negative_at_k(labels, + predictions_idx, + class_id=None, + weights=None): + """Calculates false negatives for recall@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels_sparse`. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + + Returns: + A [D1, ... DN] `Tensor` of false negative counts. + """ + with ops.name_scope(None, 'false_negatives', (predictions_idx, labels)): + labels, predictions_idx = _maybe_select_class_id(labels, + predictions_idx, + class_id) + fn = sets.set_size(sets.set_difference(predictions_idx, + labels, + aminusb=False)) + fn = math_ops.to_double(fn) + if weights is not None: + weights = math_ops.to_double(weights) + fn = math_ops.mul(fn, weights) + return fn + + +def _streaming_sparse_false_negative_at_k(labels, + predictions_idx, + k, + class_id=None, + weights=None, + name=None): + """Calculates weighted per step false negatives for recall@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + k: Integer, k for @k metric. This is only used for default op name. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + name: Name of new variable, and namespace for other dependent ops. + + Returns: + A tuple of `Variable` and update `Operation`. + + Raises: + ValueError: If `weights` is not `None` and has an incomptable shape. + """ + default_name = _at_k_name('false_negative', k, class_id=class_id) + with ops.name_scope(name, default_name, (predictions_idx, labels)) as scope: + fn = _sparse_false_negative_at_k( + predictions_idx=predictions_idx, labels=labels, class_id=class_id, + weights=weights) + batch_total_fn = math_ops.to_double(math_ops.reduce_sum(fn)) + + var = _local_variable(array_ops.zeros([], dtype=dtypes.float64), name=scope) + return var, state_ops.assign_add(var, batch_total_fn, name='update') + + +def recall_at_k(labels, + predictions, + k, + class_id=None, + weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes recall@k of the predictions with respect to sparse labels. + + If `class_id` is specified, we calculate recall by considering only the + entries in the batch for which `class_id` is in the label, and computing + the fraction of them for which `class_id` is in the top-k `predictions`. + If `class_id` is not specified, we'll calculate recall as how often on + average a class among the labels of a batch entry is in the top-k + `predictions`. + + `sparse_recall_at_k` creates two local variables, + `true_positive_at_` and `false_negative_at_`, that are used to compute + the recall_at_k frequency. This frequency is ultimately returned as + `recall_at_`: an idempotent operation that simply divides + `true_positive_at_` by total (`true_positive_at_` + + `false_negative_at_`). + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `recall_at_`. Internally, a `top_k` operation computes a `Tensor` + indicating the top `k` `predictions`. Set operations applied to `top_k` and + `labels` calculate the true positives and false negatives weighted by + `weights`. Then `update_op` increments `true_positive_at_` and + `false_negative_at_` using these values. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. + Values should be in range [0, num_classes), where num_classes is the last + dimension of `predictions`. Values outside this range always count + towards `false_negative_at_`. + predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. + k: Integer, k for @k metric. + class_id: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If class_id is outside this range, the method returns NAN. + weights: An optional `Tensor` whose shape is broadcastable to the first + [D1, ... DN] dimensions of `predictions` and `labels`. + metrics_collections: An optional list of collections that values should + be added to. + updates_collections: An optional list of collections that updates should + be added to. + name: Name of new update operation, and namespace for other dependent ops. + + Returns: + recall: Scalar `float64` `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. + update_op: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately, and whose value matches + `recall`. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. + """ + default_name = _at_k_name('recall', k, class_id=class_id) + with ops.name_scope(name, default_name, (predictions, labels)) as scope: + _, top_k_idx = nn.top_k(predictions, k) + top_k_idx = math_ops.to_int64(top_k_idx) + tp, tp_update = _streaming_sparse_true_positive_at_k( + predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, + weights=weights) + fn, fn_update = _streaming_sparse_false_negative_at_k( + predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, + weights=weights) + + metric = math_ops.div(tp, math_ops.add(tp, fn), name=scope) + update = math_ops.div( + tp_update, math_ops.add(tp_update, fn_update), name='update') + if metrics_collections: + ops.add_to_collections(metrics_collections, metric) + if updates_collections: + ops.add_to_collections(updates_collections, update) + return metric, update + + +def recall_at_thresholds(labels, predictions, thresholds, + weights=None, metrics_collections=None, + updates_collections=None, name=None): + """Computes various recall values for different `thresholds` on `predictions`. + + The `recall_at_thresholds` function creates four local variables, + `true_positives`, `true_negatives`, `false_positives` and `false_negatives` + for various values of thresholds. `recall[i]` is defined as the total weight + of values in `predictions` above `thresholds[i]` whose corresponding entry in + `labels` is `True`, divided by the total weight of `True` values in `labels` + (`true_positives[i] / (true_positives[i] + false_negatives[i])`). + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the `recall`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `bool` `Tensor` whose shape matches `predictions`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + thresholds: A python list or tuple of float thresholds in `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that `recall` should be + added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + recall: A float `Tensor` of shape `[len(thresholds)]`. + update_op: An operation that increments the `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` variables that + are used in the computation of `recall`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + with variable_scope.variable_scope(name, 'recall_at_thresholds', + (predictions, labels, weights)): + values, update_ops = _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights, includes=('tp', 'fn')) + tp = values['tp'] + fn = values['fn'] + + # Avoid division by zero. + epsilon = 1e-7 + def compute_recall(name): + return math_ops.div(tp, epsilon + tp + fn, name='recall_' + name) + + rec = compute_recall('value') + with ops.control_dependencies(update_ops.values()): + update_op = compute_recall('update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, rec) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return rec, update_op + + +def root_mean_squared_error(labels, predictions, weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes the root mean squared error between the labels and predictions. + + The `root_mean_squared_error` function creates two local variables, + `total` and `count` that are used to compute the root mean squared error. + This average is weighted by `weights`, and it is ultimately returned as + `root_mean_squared_error`: an idempotent operation that takes the square root + of the division of `total` by `count`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `root_mean_squared_error`. Internally, a `squared_error` operation computes + the element-wise square of the difference between `predictions` and `labels`. + Then `update_op` increments `total` with the reduced sum of the product of + `weights` and `squared_error`, and it increments `count` with the reduced sum + of `weights`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: A `Tensor` of the same shape as `predictions`. + predictions: A `Tensor` of arbitrary shape. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + metrics_collections: An optional list of collections that + `root_mean_squared_error` should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + root_mean_squared_error: A `Tensor` representing the current mean, the value + of `total` divided by `count`. + update_op: An operation that increments the `total` and `count` variables + appropriately and whose value matches `root_mean_squared_error`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. + """ + labels, predictions, weights = _remove_squeezable_dimensions( + labels, predictions, weights) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) + value_tensor, update_op = mean_squared_error( + labels, predictions, weights, None, None, + name or 'root_mean_squared_error') + + rmse = math_ops.sqrt(value_tensor) + with ops.control_dependencies([update_op]): + update_op = math_ops.sqrt(update_op) + + if metrics_collections: + ops.add_to_collections(metrics_collections, rmse) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return rmse, update_op + + +def sensitivity_at_specificity( + labels, predictions, specificity, weights=None, num_thresholds=200, + metrics_collections=None, updates_collections=None, name=None): + """Computes the specificity at a given sensitivity. + + The `sensitivity_at_specificity` function creates four local + variables, `true_positives`, `true_negatives`, `false_positives` and + `false_negatives` that are used to compute the sensitivity at the given + specificity value. The threshold for the given specificity value is computed + and used to evaluate the corresponding sensitivity. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `sensitivity`. `update_op` increments the `true_positives`, `true_negatives`, + `false_positives` and `false_negatives` counts with the weight of each case + found in the `predictions` and `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + For additional information about specificity and sensitivity, see the + following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity + + Args: + labels: A `bool` `Tensor` whose shape matches `predictions`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + specificity: A scalar value in range `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + num_thresholds: The number of thresholds to use for matching the given + specificity. + metrics_collections: An optional list of collections that `sensitivity` + should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + sensitivity: A scalar `Tensor` representing the sensitivity at the given + `specificity` value. + update_op: An operation that increments the `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` variables + appropriately and whose value matches `sensitivity`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, if + `weights` is not `None` and its shape doesn't match `predictions`, or if + `specificity` is not between 0 and 1, or if either `metrics_collections` + or `updates_collections` are not a list or tuple. + """ + if specificity < 0 or specificity > 1: + raise ValueError('`specificity` must be in the range [0, 1].') + + with variable_scope.variable_scope(name, 'sensitivity_at_specificity', + (predictions, labels, weights)): + kepsilon = 1e-7 # to account for floating point imprecisions + thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) + for i in range(num_thresholds-2)] + thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon] + + values, update_ops = _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights) + tp = values['tp'] + fn = values['fn'] + tn = values['tn'] + fp = values['fp'] + + def compute_sensitivity_at_specificity(name): + specificities = math_ops.div(tn, tn + fp + kepsilon) + tf_index = math_ops.argmin(math_ops.abs(specificities - specificity), 0) + tf_index = math_ops.cast(tf_index, dtypes.int32) + + # Now, we have the implicit threshold, so compute the sensitivity: + return math_ops.div(tp[tf_index], + tp[tf_index] + fn[tf_index] + kepsilon, + name) + + sensitivity = compute_sensitivity_at_specificity('value') + with ops.control_dependencies(update_ops.values()): + update_op = compute_sensitivity_at_specificity('update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, sensitivity) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return sensitivity, update_op + + +def _expand_and_tile(tensor, multiple, dim=0, name=None): + """Slice `tensor` shape in 2, then tile along the sliced dimension. + + A new dimension is inserted in shape of `tensor` before `dim`, then values are + tiled `multiple` times along the new dimension. + + Args: + tensor: Input `Tensor` or `SparseTensor`. + multiple: Integer, number of times to tile. + dim: Integer, dimension along which to tile. + name: Name of operation. + + Returns: + `Tensor` result of expanding and tiling `tensor`. + + Raises: + ValueError: if `multiple` is less than 1, or `dim` is not in + `[-rank(tensor), rank(tensor)]`. + """ + if multiple < 1: + raise ValueError('Invalid multiple %s, must be > 0.' % multiple) + with ops.name_scope( + name, 'expand_and_tile', (tensor, multiple, dim)) as scope: + # Sparse. + if isinstance(tensor, sparse_tensor.SparseTensorValue): + tensor = sparse_tensor.SparseTensor.from_value(tensor) + if isinstance(tensor, sparse_tensor.SparseTensor): + if dim < 0: + expand_dims = array_ops.reshape( + array_ops.size(tensor.shape) + dim, [1]) + else: + expand_dims = [dim] + expanded_shape = array_ops.concat( + 0, (array_ops.slice(tensor.shape, [0], expand_dims), [1], + array_ops.slice(tensor.shape, expand_dims, [-1])), + name='expanded_shape') + expanded = sparse_ops.sparse_reshape( + tensor, shape=expanded_shape, name='expand') + if multiple == 1: + return expanded + return sparse_ops.sparse_concat( + dim - 1 if dim < 0 else dim, [expanded] * multiple, name=scope) + + # Dense. + expanded = array_ops.expand_dims( + tensor, dim if (dim >= 0) else (dim - 1), name='expand') + if multiple == 1: + return expanded + ones = array_ops.ones_like(array_ops.shape(tensor)) + tile_multiples = array_ops.concat( + 0, (ones[:dim], (multiple,), ones[dim:]), name='multiples') + return array_ops.tile(expanded, tile_multiples, name=scope) + + +def _num_relevant(labels, k): + """Computes number of relevant values for each row in labels. + + For labels with shape [D1, ... DN, num_labels], this is the minimum of + `num_labels` and `k`. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. + k: Integer, k for @k metric. + + Returns: + Integer `Tensor` of shape [D1, ... DN], where each value is the number of + relevant values for that row. + + Raises: + ValueError: if inputs have invalid dtypes or values. + """ + if k < 1: + raise ValueError('Invalid k=%s.' % k) + with ops.name_scope(None, 'num_relevant', (labels,)) as scope: + # For SparseTensor, calculate separate count for each row. + if isinstance( + labels, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): + labels_sizes = sets.set_size(labels) + return math_ops.minimum(labels_sizes, k, name=scope) + + # For dense Tensor, calculate scalar count based on last dimension, and + # tile across labels shape. + labels_shape = array_ops.shape(labels) + labels_size = labels_shape[-1] + num_relevant_scalar = math_ops.minimum(labels_size, k) + return array_ops.fill(labels_shape[0:-1], num_relevant_scalar, name=scope) + + +def _sparse_average_precision_at_k(labels, predictions, k): + """Computes average precision@k of predictions with respect to sparse labels. + + From en.wikipedia.org/wiki/Information_retrieval#Average_precision, formula + for each row is: + + AveP = sum_{i=1...k} P_{i} * rel_{i} / num_relevant_items + + A "row" is the elements in dimension [D1, ... DN] of `predictions`, `labels`, + and the result `Tensors`. In the common case, this is [batch_size]. Each row + of the results contains the average precision for that row. + + Internally, a `top_k` operation computes a `Tensor` indicating the top `k` + `predictions`. Set operations applied to `top_k` and `labels` calculate the + true positives, which are used to calculate the precision ("P_{i}" term, + above). + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. + predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and `predictions` has shape + [batch size, num_classes]. The final dimension contains the logit values + for each class. [D1, ... DN] must match `labels`. + k: Integer, k for @k metric. This will calculate an average precision for + range `[1,k]`, as documented above. + + Returns: + `float64` `Tensor` of shape [D1, ... DN], where each value is the average + precision for that row. + + Raises: + ValueError: if k is invalid. + """ + if k < 1: + raise ValueError('Invalid k=%s.' % k) + with ops.name_scope( + None, 'average_precision', (predictions, labels, k)) as scope: + # Calculate top k indices to produce [D1, ... DN, k] tensor. + _, predictions_idx = nn.top_k(predictions, k) + predictions_idx = math_ops.to_int64(predictions_idx, name='predictions_idx') + + # Expand dims to produce [D1, ... DN, k, 1] tensor. This gives us a separate + # prediction for each k, so we can calculate separate true positive values + # for each k. + predictions_idx_per_k = array_ops.expand_dims( + predictions_idx, -1, name='predictions_idx_per_k') + + # Replicate labels k times to produce [D1, ... DN, k, num_labels] tensor. + labels_per_k = _expand_and_tile( + labels, multiple=k, dim=-1, name='labels_per_k') + + # The following tensors are all of shape [D1, ... DN, k], containing values + # per row, per k value. + # `relevant_per_k` (int32) - Relevance indicator, 1 if the prediction at + # that k value is correct, 0 otherwise. This is the "rel_{i}" term from + # the formula above. + # `tp_per_k` (int32) - True positive counts. + # `retrieved_per_k` (int32) - Number of predicted values at each k. This is + # the precision denominator. + # `precision_per_k` (float64) - Precision at each k. This is the "P_{i}" + # term from the formula above. + # `relevant_precision_per_k` (float64) - Relevant precisions; i.e., + # precisions at all k for which relevance indicator is true. + relevant_per_k = _sparse_true_positive_at_k( + predictions_idx_per_k, labels_per_k, name='relevant_per_k') + tp_per_k = math_ops.cumsum(relevant_per_k, axis=-1, name='tp_per_k') + retrieved_per_k = math_ops.cumsum( + array_ops.ones_like(relevant_per_k), axis=-1, name='retrieved_per_k') + precision_per_k = math_ops.div( + math_ops.to_double(tp_per_k), math_ops.to_double(retrieved_per_k), + name='precision_per_k') + relevant_precision_per_k = math_ops.mul( + precision_per_k, math_ops.to_double(relevant_per_k), + name='relevant_precision_per_k') + + # Reduce along k dimension to get the sum, yielding a [D1, ... DN] tensor. + precision_sum = math_ops.reduce_sum( + relevant_precision_per_k, reduction_indices=(-1,), name='precision_sum') + + # Divide by number of relevant items to get average precision. These are + # the "num_relevant_items" and "AveP" terms from the formula above. + num_relevant_items = math_ops.to_double(_num_relevant(labels, k)) + return math_ops.div(precision_sum, num_relevant_items, name=scope) + + +def sparse_average_precision_at_k(labels, + predictions, + k, + weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes average precision@k of predictions with respect to sparse labels. + + `sparse_average_precision_at_k` creates two local variables, + `average_precision_at_/total` and `average_precision_at_/max`, that + are used to compute the frequency. This frequency is ultimately returned as + `average_precision_at_`: an idempotent operation that simply divides + `average_precision_at_/total` by `average_precision_at_/max`. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `precision_at_`. Internally, a `top_k` operation computes a `Tensor` + indicating the top `k` `predictions`. Set operations applied to `top_k` and + `labels` calculate the true positives and false positives weighted by + `weights`. Then `update_op` increments `true_positive_at_` and + `false_positive_at_` using these values. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. + predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and `predictions` has shape + [batch size, num_classes]. The final dimension contains the logit values + for each class. [D1, ... DN] must match `labels`. + k: Integer, k for @k metric. This will calculate an average precision for + range `[1,k]`, as documented above. + weights: An optional `Tensor` whose shape is broadcastable to the first + [D1, ... DN] dimensions of `predictions` and `labels`. + metrics_collections: An optional list of collections that values should + be added to. + updates_collections: An optional list of collections that updates should + be added to. + name: Name of new update operation, and namespace for other dependent ops. + + Returns: + mean_average_precision: Scalar `float64` `Tensor` with the mean average + precision values. + update: `Operation` that increments variables appropriately, and whose + value matches `metric`. + """ + default_name = _at_k_name('average_precision', k) + with ops.name_scope(name, default_name, (predictions, labels)) as scope: + # Calculate per-example average precision, and apply weights. + average_precision = _sparse_average_precision_at_k( + predictions=predictions, labels=labels, k=k) + if weights is not None: + weights = math_ops.to_double(weights) + average_precision = math_ops.mul(average_precision, weights) + + # Create accumulation variables and update ops for max average precision and + # total average precision. + with ops.name_scope(None, 'max', (average_precision,)) as max_scope: + # `max` is the max possible precision. Since max for any row is 1.0: + # - For the unweighted case, this is just the number of rows. + # - For the weighted case, it's the sum of the weights broadcast across + # `average_precision` rows. + max_var = _local_variable( + array_ops.zeros([], dtype=dtypes.float64), name=max_scope) + if weights is None: + batch_max = math_ops.to_double( + array_ops.size(average_precision, name='batch_max')) + else: + # TODO(ptucker): More efficient way to broadcast? + broadcast_weights = math_ops.mul( + weights, array_ops.ones_like(average_precision), + name='broadcast_weights') + batch_max = math_ops.reduce_sum(broadcast_weights, name='batch_max') + max_update = state_ops.assign_add(max_var, batch_max, name='update') + with ops.name_scope(None, 'total', (average_precision,)) as total_scope: + total_var = _local_variable( + array_ops.zeros([], dtype=dtypes.float64), name=total_scope) + batch_total = math_ops.reduce_sum(average_precision, name='batch_total') + total_update = state_ops.assign_add(total_var, batch_total, name='update') + + # Divide total by max to get mean, for both vars and the update ops. + mean_average_precision = _safe_scalar_div(total_var, max_var, name='mean') + update = _safe_scalar_div(total_update, max_update, name=scope) + + if metrics_collections: + ops.add_to_collections(metrics_collections, mean_average_precision) + if updates_collections: + ops.add_to_collections(updates_collections, update) + + return mean_average_precision, update + + +def _sparse_false_positive_at_k(labels, + predictions_idx, + class_id=None, + weights=None): + """Calculates false positives for precision@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels_sparse`. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + + Returns: + A [D1, ... DN] `Tensor` of false positive counts. + """ + with ops.name_scope(None, 'false_positives', (predictions_idx, labels)): + labels, predictions_idx = _maybe_select_class_id(labels, + predictions_idx, + class_id) + fp = sets.set_size(sets.set_difference( + predictions_idx, labels, aminusb=True)) + fp = math_ops.to_double(fp) + if weights is not None: + weights = math_ops.to_double(weights) + fp = math_ops.mul(fp, weights) + return fp + + +def _streaming_sparse_false_positive_at_k(labels, + predictions_idx, + k=None, + class_id=None, + weights=None, + name=None): + """Calculates weighted per step false positives for precision@k. + + If `class_id` is specified, calculate binary true positives for `class_id` + only. + If `class_id` is not specified, calculate metrics for `k` predicted vs + `n` label classes, where `n` is the 2nd dimension of `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. + predictions_idx: 1-D or higher `int64` `Tensor` with last dimension `k`, + top `k` predicted classes. For rank `n`, the first `n-1` dimensions must + match `labels`. + k: Integer, k for @k metric. This is only used for default op name. + class_id: Class for which we want binary metrics. + weights: `Tensor` whose shape is broadcastable to the first [D1, ... DN] + dimensions of `predictions_idx` and `labels`. + name: Name of new variable, and namespace for other dependent ops. + + Returns: + A tuple of `Variable` and update `Operation`. + + Raises: + ValueError: If `weights` is not `None` and has an incomptable shape. + """ + default_name = _at_k_name('false_positive', k, class_id=class_id) + with ops.name_scope(name, default_name, (predictions_idx, labels)) as scope: + fp = _sparse_false_positive_at_k( + predictions_idx=predictions_idx, labels=labels, class_id=class_id, + weights=weights) + batch_total_fp = math_ops.to_double(math_ops.reduce_sum(fp)) + + var = _local_variable(array_ops.zeros([], dtype=dtypes.float64), name=scope) + return var, state_ops.assign_add(var, batch_total_fp, name='update') + + +def _sparse_precision_at_k(labels, + top_k_idx, + k=None, + class_id=None, + weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes precision@k of the top-k indices with respect to sparse labels. + + This method contains the code shared by streaming_sparse_precision_at_k and + streaming_sparse_precision_at_top_k. Refer to those methods for more details. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions_idx`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. + top_k_idx: Integer `Tensor` with shape [D1, ... DN, k] where + N >= 1. Commonly, N=1 and top_k_idx has shape [batch size, k]. + The final dimension contains the indices of top-k labels. [D1, ... DN] + must match `labels`. + k: Integer, k for @k metric or `None`. Only used for default op name. + class_id: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. + weights: An optional `Tensor` whose shape is broadcastable to the first + [D1, ... DN] dimensions of `predictions` and `labels`. + metrics_collections: An optional list of collections that values should + be added to. + updates_collections: An optional list of collections that updates should + be added to. + name: Name of the metric and of the enclosing scope. + + Returns: + precision: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. + update_op: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. + """ + top_k_idx = math_ops.to_int64(top_k_idx) + tp, tp_update = _streaming_sparse_true_positive_at_k( + predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, + weights=weights) + fp, fp_update = _streaming_sparse_false_positive_at_k( + predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, + weights=weights) + + metric = math_ops.div(tp, math_ops.add(tp, fp), name=name) + update = math_ops.div( + tp_update, math_ops.add(tp_update, fp_update), name='update') + if metrics_collections: + ops.add_to_collections(metrics_collections, metric) + if updates_collections: + ops.add_to_collections(updates_collections, update) + return metric, update + + +def sparse_precision_at_k(labels, + predictions, + k, + class_id=None, + weights=None, + metrics_collections=None, + updates_collections=None, + name=None): + """Computes precision@k of the predictions with respect to sparse labels. + + If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is in the top-k highest + `predictions`, and computing the fraction of them for which `class_id` is + indeed a correct label. + If `class_id` is not specified, we'll calculate precision as how often on + average a class among the top-k classes with the highest predicted values + of a batch entry is correct and can be found in the label for that entry. + + `sparse_precision_at_k` creates two local variables, + `true_positive_at_` and `false_positive_at_`, that are used to compute + the precision@k frequency. This frequency is ultimately returned as + `precision_at_`: an idempotent operation that simply divides + `true_positive_at_` by total (`true_positive_at_` + + `false_positive_at_`). + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `precision_at_`. Internally, a `top_k` operation computes a `Tensor` + indicating the top `k` `predictions`. Set operations applied to `top_k` and + `labels` calculate the true positives and false positives weighted by + `weights`. Then `update_op` increments `true_positive_at_` and + `false_positive_at_` using these values. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + Args: + labels: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. + predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. + k: Integer, k for @k metric. + class_id: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes], where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. + weights: An optional `Tensor` whose shape is broadcastable to the first + [D1, ... DN] dimensions of `predictions` and `labels`. + metrics_collections: An optional list of collections that values should + be added to. + updates_collections: An optional list of collections that updates should + be added to. + name: Name of new update operation, and namespace for other dependent ops. + + Returns: + precision: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. + update_op: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + + Raises: + ValueError: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. + """ + default_name = _at_k_name('precision', k, class_id=class_id) + with ops.name_scope(name, default_name, + (predictions, labels, weights)) as scope: + _, top_k_idx = nn.top_k(predictions, k) + return _sparse_precision_at_k( + top_k_idx=top_k_idx, + labels=labels, + k=k, + class_id=class_id, + weights=weights, + metrics_collections=metrics_collections, + updates_collections=updates_collections, + name=scope) + + +def specificity_at_sensitivity( + labels, predictions, sensitivity, weights=None, num_thresholds=200, + metrics_collections=None, updates_collections=None, name=None): + """Computes the specificity at a given sensitivity. + + The `specificity_at_sensitivity` function creates four local + variables, `true_positives`, `true_negatives`, `false_positives` and + `false_negatives` that are used to compute the specificity at the given + sensitivity value. The threshold for the given sensitivity value is computed + and used to evaluate the corresponding specificity. + + For estimation of the metric over a stream of data, the function creates an + `update_op` operation that updates these variables and returns the + `specificity`. `update_op` increments the `true_positives`, `true_negatives`, + `false_positives` and `false_negatives` counts with the weight of each case + found in the `predictions` and `labels`. + + If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. + + For additional information about specificity and sensitivity, see the + following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity + + Args: + labels: A `bool` `Tensor` whose shape matches `predictions`. + predictions: A floating point `Tensor` of arbitrary shape and whose values + are in the range `[0, 1]`. + sensitivity: A scalar value in range `[0, 1]`. + weights: An optional `Tensor` whose shape is broadcastable to `predictions`. + num_thresholds: The number of thresholds to use for matching the given + sensitivity. + metrics_collections: An optional list of collections that `specificity` + should be added to. + updates_collections: An optional list of collections that `update_op` should + be added to. + name: An optional variable_scope name. + + Returns: + specificity: A scalar `Tensor` representing the specificity at the given + `specificity` value. + update_op: An operation that increments the `true_positives`, + `true_negatives`, `false_positives` and `false_negatives` variables + appropriately and whose value matches `specificity`. + + Raises: + ValueError: If `predictions` and `labels` have mismatched shapes, if + `weights` is not `None` and its shape doesn't match `predictions`, or if + `sensitivity` is not between 0 and 1, or if either `metrics_collections` + or `updates_collections` are not a list or tuple. + """ + if sensitivity < 0 or sensitivity > 1: + raise ValueError('`sensitivity` must be in the range [0, 1].') + + with variable_scope.variable_scope(name, 'specificity_at_sensitivity', + (predictions, labels, weights)): + kepsilon = 1e-7 # to account for floating point imprecisions + thresholds = [(i + 1) * 1.0 / (num_thresholds - 1) + for i in range(num_thresholds-2)] + thresholds = [0.0 - kepsilon] + thresholds + [1.0 - kepsilon] + + values, update_ops = _confusion_matrix_at_thresholds( + labels, predictions, thresholds, weights) + tp = values['tp'] + fn = values['fn'] + tn = values['tn'] + fp = values['fp'] + + def compute_specificity_at_sensitivity(name): + """Computes the specificity at the given sensitivity. + + Args: + name: The name of the operation. + + Returns: + The specificity using the aggregated values. + """ + sensitivities = math_ops.div(tp, tp + fn + kepsilon) + + # We'll need to use this trick until tf.argmax allows us to specify + # whether we should use the first or last index in case of ties. + min_val = math_ops.reduce_min(math_ops.abs(sensitivities - sensitivity)) + indices_at_minval = math_ops.equal( + math_ops.abs(sensitivities - sensitivity), min_val) + indices_at_minval = math_ops.to_int64(indices_at_minval) + indices_at_minval = math_ops.cumsum(indices_at_minval) + tf_index = math_ops.argmax(indices_at_minval, 0) + tf_index = math_ops.cast(tf_index, dtypes.int32) + + # Now, we have the implicit threshold, so compute the specificity: + return math_ops.div(tn[tf_index], + tn[tf_index] + fp[tf_index] + kepsilon, + name) + + specificity = compute_specificity_at_sensitivity('value') + with ops.control_dependencies(update_ops.values()): + update_op = compute_specificity_at_sensitivity('update_op') + + if metrics_collections: + ops.add_to_collections(metrics_collections, specificity) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + + return specificity, update_op diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index 73208a350b9..cc94cf1f38e 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -39,6 +39,7 @@ from tensorflow.python.ops.check_ops import * from tensorflow.python.ops.clip_ops import * from tensorflow.python.ops.special_math_ops import * # TODO(vrv): Switch to import * once we're okay with exposing the module. +from tensorflow.python.ops.confusion_matrix import confusion_matrix from tensorflow.python.ops.control_flow_ops import Assert from tensorflow.python.ops.control_flow_ops import group from tensorflow.python.ops.control_flow_ops import no_op @@ -91,6 +92,7 @@ from tensorflow.python.framework import constant_op as _constant_op from tensorflow.python.ops import array_ops as _array_ops from tensorflow.python.ops import check_ops as _check_ops from tensorflow.python.ops import clip_ops as _clip_ops +from tensorflow.python.ops import confusion_matrix as _confusion_matrix from tensorflow.python.ops import control_flow_ops as _control_flow_ops from tensorflow.python.ops import data_flow_ops as _data_flow_ops from tensorflow.python.ops import functional_ops as _functional_ops @@ -244,6 +246,7 @@ _allowed_symbols_misc = [ "parse_single_sequence_example", "serialize_many_sparse", "serialize_sparse", + "confusion_matrix", ] _allowed_symbols = (_allowed_symbols_array_ops + @@ -262,6 +265,7 @@ remove_undocumented(__name__, _allowed_symbols, _array_ops, _check_ops, _clip_ops, + _confusion_matrix, _control_flow_ops, _constant_op, _data_flow_ops,