Fix functions in CRF when sequence_lengths contains zero (#18487)

* Fix computation of crf_log_norm when sequence length is zero * fix _single_seq_fn in crf when sequence_lengths contain zero
2018-04-29 03:05:34 +08:00 · 2018-04-29 03:05:34 +08:00 · 17cb3cdd30
commit 17cb3cdd30
parent c65ad957b8
2 changed files with 39 additions and 8 deletions
--- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
+++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
@ -152,6 +152,22 @@ class CrfTest(test.TestCase):

        self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)

+  def testCrfLogNormZeroSeqLength(self):
+    """
+    Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
+    """
+    with self.test_session() as sess:
+      inputs = constant_op.constant(np.ones([2, 10, 5],
+                                            dtype=np.float32))
+      transition_params = constant_op.constant(np.ones([5, 5],
+                                                       dtype=np.float32))
+      sequence_lengths = constant_op.constant(np.zeros([2],
+                                                       dtype=np.int32))
+      expected_log_norm = np.zeros([2], dtype=np.float32)
+      log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
+      tf_log_norm = sess.run(log_norm)
+      self.assertAllClose(tf_log_norm, expected_log_norm)
+
  def testCrfLogLikelihood(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
                                                       dtype=np.float32))
      sequence_lengths = constant_op.constant(np.zeros([2],
                                                       dtype=np.int32))
-      values = crf.crf_decode(inputs, transition_params, sequence_lengths)
-      tags, scores = sess.run(values)
-      self.assertEqual(len(tags.shape), 2)
-      self.assertEqual(len(scores.shape), 1)
+      tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
+      tf_tags, tf_scores = sess.run([tags, scores])
+      self.assertEqual(len(tf_tags.shape), 2)
+      self.assertEqual(len(tf_scores.shape), 1)

 if __name__ == "__main__":
  test.main()
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
    batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
    example_inds = array_ops.reshape(
        math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
-    return array_ops.gather_nd(
+    sequence_scores = array_ops.gather_nd(
        array_ops.squeeze(inputs, [1]),
        array_ops.concat([example_inds, tag_indices], axis=1))
+    sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
+                                      array_ops.zeros_like(sequence_scores),
+                                      sequence_scores)
+    return sequence_scores

  def _multi_seq_fn():
    # Compute the scores of the given tag sequence.
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
  # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
  # the "initial state" (the unary potentials).
  def _single_seq_fn():
-    return math_ops.reduce_logsumexp(first_input, [1])
+    log_norm = math_ops.reduce_logsumexp(first_input, [1])
+    # Mask `log_norm` of the sequences with length <= zero.
+    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
+                               array_ops.zeros_like(log_norm),
+                               log_norm)
+    return log_norm

  def _multi_seq_fn():
    """Forward computation of alpha values."""
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
    # Compute the alpha values in the forward algorithm in order to get the
    # partition function.
    forward_cell = CrfForwardRnnCell(transition_params)
+    # Sequence length is not allowed to be less than zero.
+    sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
    _, alphas = rnn.dynamic_rnn(
        cell=forward_cell,
        inputs=rest_of_input,
-        sequence_length=sequence_lengths - 1,
+        sequence_length=sequence_lengths_less_one,
        initial_state=first_input,
        dtype=dtypes.float32)
    log_norm = math_ops.reduce_logsumexp(alphas, [1])
+    # Mask `log_norm` of the sequences with length <= zero.
+    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
+                               array_ops.zeros_like(log_norm),
+                               log_norm)
    return log_norm

  max_seq_len = array_ops.shape(inputs)[1]
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
    initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
    initial_state = array_ops.squeeze(initial_state, axis=[1])  # [B, O]
    inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])  # [B, T-1, O]
-    # sequence length is not allowed to be less than zero
+    # Sequence length is not allowed to be less than zero.
    sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
    backpointers, last_score = rnn.dynamic_rnn(  # [B, T - 1, O], [B, O]
        crf_fwd_cell,