Fix functions in CRF when sequence_lengths contains zero (#18487)

* Fix computation of crf_log_norm when sequence length is zero

* fix _single_seq_fn in crf when sequence_lengths contain zero
This commit is contained in:
QingYing Chen 2018-04-29 03:05:34 +08:00 committed by drpngx
parent c65ad957b8
commit 17cb3cdd30
2 changed files with 39 additions and 8 deletions

View File

@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
def testCrfLogNormZeroSeqLength(self):
"""
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
"""
with self.test_session() as sess:
inputs = constant_op.constant(np.ones([2, 10, 5],
dtype=np.float32))
transition_params = constant_op.constant(np.ones([5, 5],
dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32))
expected_log_norm = np.zeros([2], dtype=np.float32)
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
tf_log_norm = sess.run(log_norm)
self.assertAllClose(tf_log_norm, expected_log_norm)
def testCrfLogLikelihood(self):
inputs = np.array(
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
dtype=np.float32))
sequence_lengths = constant_op.constant(np.zeros([2],
dtype=np.int32))
values = crf.crf_decode(inputs, transition_params, sequence_lengths)
tags, scores = sess.run(values)
self.assertEqual(len(tags.shape), 2)
self.assertEqual(len(scores.shape), 1)
tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
tf_tags, tf_scores = sess.run([tags, scores])
self.assertEqual(len(tf_tags.shape), 2)
self.assertEqual(len(tf_scores.shape), 1)
if __name__ == "__main__":
test.main()

View File

@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
example_inds = array_ops.reshape(
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
return array_ops.gather_nd(
sequence_scores = array_ops.gather_nd(
array_ops.squeeze(inputs, [1]),
array_ops.concat([example_inds, tag_indices], axis=1))
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(sequence_scores),
sequence_scores)
return sequence_scores
def _multi_seq_fn():
# Compute the scores of the given tag sequence.
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
# the "initial state" (the unary potentials).
def _single_seq_fn():
return math_ops.reduce_logsumexp(first_input, [1])
log_norm = math_ops.reduce_logsumexp(first_input, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm
def _multi_seq_fn():
"""Forward computation of alpha values."""
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
# Compute the alpha values in the forward algorithm in order to get the
# partition function.
forward_cell = CrfForwardRnnCell(transition_params)
# Sequence length is not allowed to be less than zero.
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
_, alphas = rnn.dynamic_rnn(
cell=forward_cell,
inputs=rest_of_input,
sequence_length=sequence_lengths - 1,
sequence_length=sequence_lengths_less_one,
initial_state=first_input,
dtype=dtypes.float32)
log_norm = math_ops.reduce_logsumexp(alphas, [1])
# Mask `log_norm` of the sequences with length <= zero.
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
array_ops.zeros_like(log_norm),
log_norm)
return log_norm
max_seq_len = array_ops.shape(inputs)[1]
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
# sequence length is not allowed to be less than zero
# Sequence length is not allowed to be less than zero.
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
crf_fwd_cell,