Fix functions in CRF when sequence_lengths contains zero (#18487)
* Fix computation of crf_log_norm when sequence length is zero * fix _single_seq_fn in crf when sequence_lengths contain zero
This commit is contained in:
parent
c65ad957b8
commit
17cb3cdd30
@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
|
||||
|
||||
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
|
||||
|
||||
def testCrfLogNormZeroSeqLength(self):
|
||||
"""
|
||||
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
|
||||
"""
|
||||
with self.test_session() as sess:
|
||||
inputs = constant_op.constant(np.ones([2, 10, 5],
|
||||
dtype=np.float32))
|
||||
transition_params = constant_op.constant(np.ones([5, 5],
|
||||
dtype=np.float32))
|
||||
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||
dtype=np.int32))
|
||||
expected_log_norm = np.zeros([2], dtype=np.float32)
|
||||
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
|
||||
tf_log_norm = sess.run(log_norm)
|
||||
self.assertAllClose(tf_log_norm, expected_log_norm)
|
||||
|
||||
def testCrfLogLikelihood(self):
|
||||
inputs = np.array(
|
||||
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
|
||||
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
|
||||
dtype=np.float32))
|
||||
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||
dtype=np.int32))
|
||||
values = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
||||
tags, scores = sess.run(values)
|
||||
self.assertEqual(len(tags.shape), 2)
|
||||
self.assertEqual(len(scores.shape), 1)
|
||||
tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
||||
tf_tags, tf_scores = sess.run([tags, scores])
|
||||
self.assertEqual(len(tf_tags.shape), 2)
|
||||
self.assertEqual(len(tf_scores.shape), 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
|
||||
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
|
||||
example_inds = array_ops.reshape(
|
||||
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
|
||||
return array_ops.gather_nd(
|
||||
sequence_scores = array_ops.gather_nd(
|
||||
array_ops.squeeze(inputs, [1]),
|
||||
array_ops.concat([example_inds, tag_indices], axis=1))
|
||||
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(sequence_scores),
|
||||
sequence_scores)
|
||||
return sequence_scores
|
||||
|
||||
def _multi_seq_fn():
|
||||
# Compute the scores of the given tag sequence.
|
||||
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
||||
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
|
||||
# the "initial state" (the unary potentials).
|
||||
def _single_seq_fn():
|
||||
return math_ops.reduce_logsumexp(first_input, [1])
|
||||
log_norm = math_ops.reduce_logsumexp(first_input, [1])
|
||||
# Mask `log_norm` of the sequences with length <= zero.
|
||||
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(log_norm),
|
||||
log_norm)
|
||||
return log_norm
|
||||
|
||||
def _multi_seq_fn():
|
||||
"""Forward computation of alpha values."""
|
||||
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
||||
# Compute the alpha values in the forward algorithm in order to get the
|
||||
# partition function.
|
||||
forward_cell = CrfForwardRnnCell(transition_params)
|
||||
# Sequence length is not allowed to be less than zero.
|
||||
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
|
||||
_, alphas = rnn.dynamic_rnn(
|
||||
cell=forward_cell,
|
||||
inputs=rest_of_input,
|
||||
sequence_length=sequence_lengths - 1,
|
||||
sequence_length=sequence_lengths_less_one,
|
||||
initial_state=first_input,
|
||||
dtype=dtypes.float32)
|
||||
log_norm = math_ops.reduce_logsumexp(alphas, [1])
|
||||
# Mask `log_norm` of the sequences with length <= zero.
|
||||
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||
array_ops.zeros_like(log_norm),
|
||||
log_norm)
|
||||
return log_norm
|
||||
|
||||
max_seq_len = array_ops.shape(inputs)[1]
|
||||
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
|
||||
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
|
||||
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
|
||||
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
|
||||
# sequence length is not allowed to be less than zero
|
||||
# Sequence length is not allowed to be less than zero.
|
||||
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
|
||||
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
|
||||
crf_fwd_cell,
|
||||
|
Loading…
Reference in New Issue
Block a user