Fix functions in CRF when sequence_lengths contains zero (#18487)
* Fix computation of crf_log_norm when sequence length is zero * fix _single_seq_fn in crf when sequence_lengths contain zero
This commit is contained in:
parent
c65ad957b8
commit
17cb3cdd30
@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
|
|||||||
|
|
||||||
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
|
self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
|
||||||
|
|
||||||
|
def testCrfLogNormZeroSeqLength(self):
|
||||||
|
"""
|
||||||
|
Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
|
||||||
|
"""
|
||||||
|
with self.test_session() as sess:
|
||||||
|
inputs = constant_op.constant(np.ones([2, 10, 5],
|
||||||
|
dtype=np.float32))
|
||||||
|
transition_params = constant_op.constant(np.ones([5, 5],
|
||||||
|
dtype=np.float32))
|
||||||
|
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||||
|
dtype=np.int32))
|
||||||
|
expected_log_norm = np.zeros([2], dtype=np.float32)
|
||||||
|
log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
|
||||||
|
tf_log_norm = sess.run(log_norm)
|
||||||
|
self.assertAllClose(tf_log_norm, expected_log_norm)
|
||||||
|
|
||||||
def testCrfLogLikelihood(self):
|
def testCrfLogLikelihood(self):
|
||||||
inputs = np.array(
|
inputs = np.array(
|
||||||
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
|
[[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
|
||||||
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
|
|||||||
dtype=np.float32))
|
dtype=np.float32))
|
||||||
sequence_lengths = constant_op.constant(np.zeros([2],
|
sequence_lengths = constant_op.constant(np.zeros([2],
|
||||||
dtype=np.int32))
|
dtype=np.int32))
|
||||||
values = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
|
||||||
tags, scores = sess.run(values)
|
tf_tags, tf_scores = sess.run([tags, scores])
|
||||||
self.assertEqual(len(tags.shape), 2)
|
self.assertEqual(len(tf_tags.shape), 2)
|
||||||
self.assertEqual(len(scores.shape), 1)
|
self.assertEqual(len(tf_scores.shape), 1)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
|
|||||||
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
|
batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
|
||||||
example_inds = array_ops.reshape(
|
example_inds = array_ops.reshape(
|
||||||
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
|
math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
|
||||||
return array_ops.gather_nd(
|
sequence_scores = array_ops.gather_nd(
|
||||||
array_ops.squeeze(inputs, [1]),
|
array_ops.squeeze(inputs, [1]),
|
||||||
array_ops.concat([example_inds, tag_indices], axis=1))
|
array_ops.concat([example_inds, tag_indices], axis=1))
|
||||||
|
sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||||
|
array_ops.zeros_like(sequence_scores),
|
||||||
|
sequence_scores)
|
||||||
|
return sequence_scores
|
||||||
|
|
||||||
def _multi_seq_fn():
|
def _multi_seq_fn():
|
||||||
# Compute the scores of the given tag sequence.
|
# Compute the scores of the given tag sequence.
|
||||||
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
|||||||
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
|
# If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
|
||||||
# the "initial state" (the unary potentials).
|
# the "initial state" (the unary potentials).
|
||||||
def _single_seq_fn():
|
def _single_seq_fn():
|
||||||
return math_ops.reduce_logsumexp(first_input, [1])
|
log_norm = math_ops.reduce_logsumexp(first_input, [1])
|
||||||
|
# Mask `log_norm` of the sequences with length <= zero.
|
||||||
|
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||||
|
array_ops.zeros_like(log_norm),
|
||||||
|
log_norm)
|
||||||
|
return log_norm
|
||||||
|
|
||||||
def _multi_seq_fn():
|
def _multi_seq_fn():
|
||||||
"""Forward computation of alpha values."""
|
"""Forward computation of alpha values."""
|
||||||
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
|
|||||||
# Compute the alpha values in the forward algorithm in order to get the
|
# Compute the alpha values in the forward algorithm in order to get the
|
||||||
# partition function.
|
# partition function.
|
||||||
forward_cell = CrfForwardRnnCell(transition_params)
|
forward_cell = CrfForwardRnnCell(transition_params)
|
||||||
|
# Sequence length is not allowed to be less than zero.
|
||||||
|
sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
|
||||||
_, alphas = rnn.dynamic_rnn(
|
_, alphas = rnn.dynamic_rnn(
|
||||||
cell=forward_cell,
|
cell=forward_cell,
|
||||||
inputs=rest_of_input,
|
inputs=rest_of_input,
|
||||||
sequence_length=sequence_lengths - 1,
|
sequence_length=sequence_lengths_less_one,
|
||||||
initial_state=first_input,
|
initial_state=first_input,
|
||||||
dtype=dtypes.float32)
|
dtype=dtypes.float32)
|
||||||
log_norm = math_ops.reduce_logsumexp(alphas, [1])
|
log_norm = math_ops.reduce_logsumexp(alphas, [1])
|
||||||
|
# Mask `log_norm` of the sequences with length <= zero.
|
||||||
|
log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
|
||||||
|
array_ops.zeros_like(log_norm),
|
||||||
|
log_norm)
|
||||||
return log_norm
|
return log_norm
|
||||||
|
|
||||||
max_seq_len = array_ops.shape(inputs)[1]
|
max_seq_len = array_ops.shape(inputs)[1]
|
||||||
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
|
|||||||
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
|
initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
|
||||||
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
|
initial_state = array_ops.squeeze(initial_state, axis=[1]) # [B, O]
|
||||||
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
|
inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1]) # [B, T-1, O]
|
||||||
# sequence length is not allowed to be less than zero
|
# Sequence length is not allowed to be less than zero.
|
||||||
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
|
sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
|
||||||
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
|
backpointers, last_score = rnn.dynamic_rnn( # [B, T - 1, O], [B, O]
|
||||||
crf_fwd_cell,
|
crf_fwd_cell,
|
||||||
|
Loading…
Reference in New Issue
Block a user