Merge pull request #2111 from mozilla/test-epoch-oom
Revert to a pipelined approach for test epochs to avoid CPU OOM with large alphabets
This commit is contained in:
commit
df5bb31046
40
evaluate.py
40
evaluate.py
@ -19,7 +19,6 @@ from util.evaluate_tools import calculate_report
|
|||||||
from util.feeding import create_dataset
|
from util.feeding import create_dataset
|
||||||
from util.flags import create_flags, FLAGS
|
from util.flags import create_flags, FLAGS
|
||||||
from util.logging import log_error, log_progress, create_progressbar
|
from util.logging import log_error, log_progress, create_progressbar
|
||||||
from util.text import levenshtein
|
|
||||||
|
|
||||||
|
|
||||||
def sparse_tensor_value_to_texts(value, alphabet):
|
def sparse_tensor_value_to_texts(value, alphabet):
|
||||||
@ -88,14 +87,13 @@ def evaluate(test_csvs, create_model, try_loading):
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
def run_test(init_op, dataset):
|
def run_test(init_op, dataset):
|
||||||
logitses = []
|
|
||||||
losses = []
|
losses = []
|
||||||
seq_lengths = []
|
predictions = []
|
||||||
ground_truths = []
|
ground_truths = []
|
||||||
|
|
||||||
bar = create_progressbar(prefix='Computing acoustic model predictions | ',
|
bar = create_progressbar(prefix='Test epoch | ',
|
||||||
widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
|
widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
|
||||||
log_progress('Computing acoustic model predictions...')
|
log_progress('Test epoch...')
|
||||||
|
|
||||||
step_count = 0
|
step_count = 0
|
||||||
|
|
||||||
@ -105,35 +103,23 @@ def evaluate(test_csvs, create_model, try_loading):
|
|||||||
# First pass, compute losses and transposed logits for decoding
|
# First pass, compute losses and transposed logits for decoding
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y])
|
batch_logits, batch_loss, batch_lengths, batch_transcripts = \
|
||||||
|
session.run([transposed, loss, batch_x_len, batch_y])
|
||||||
except tf.errors.OutOfRangeError:
|
except tf.errors.OutOfRangeError:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
decoded = ctc_beam_search_decoder_batch(batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width,
|
||||||
|
num_processes=num_processes, scorer=scorer)
|
||||||
|
predictions.extend(d[0][1] for d in decoded)
|
||||||
|
ground_truths.extend(sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet))
|
||||||
|
losses.extend(batch_loss)
|
||||||
|
|
||||||
step_count += 1
|
step_count += 1
|
||||||
bar.update(step_count)
|
bar.update(step_count)
|
||||||
|
|
||||||
logitses.append(logits)
|
|
||||||
losses.extend(loss_)
|
|
||||||
seq_lengths.append(lengths)
|
|
||||||
ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet))
|
|
||||||
|
|
||||||
bar.finish()
|
bar.finish()
|
||||||
|
|
||||||
predictions = []
|
wer, cer, samples = calculate_report(ground_truths, predictions, losses)
|
||||||
|
|
||||||
bar = create_progressbar(max_value=step_count,
|
|
||||||
prefix='Decoding predictions | ').start()
|
|
||||||
log_progress('Decoding predictions...')
|
|
||||||
|
|
||||||
# Second pass, decode logits and compute WER and edit distance metrics
|
|
||||||
for logits, seq_length in bar(zip(logitses, seq_lengths)):
|
|
||||||
decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width,
|
|
||||||
num_processes=num_processes, scorer=scorer)
|
|
||||||
predictions.extend(d[0][1] for d in decoded)
|
|
||||||
|
|
||||||
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
|
||||||
|
|
||||||
wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
|
||||||
mean_loss = np.mean(losses)
|
mean_loss = np.mean(losses)
|
||||||
|
|
||||||
# Take only the first report_count items
|
# Take only the first report_count items
|
||||||
@ -144,7 +130,7 @@ def evaluate(test_csvs, create_model, try_loading):
|
|||||||
print('-' * 80)
|
print('-' * 80)
|
||||||
for sample in report_samples:
|
for sample in report_samples:
|
||||||
print('WER: %f, CER: %f, loss: %f' %
|
print('WER: %f, CER: %f, loss: %f' %
|
||||||
(sample.wer, sample.distance, sample.loss))
|
(sample.wer, sample.cer, sample.loss))
|
||||||
print(' - src: "%s"' % sample.src)
|
print(' - src: "%s"' % sample.src)
|
||||||
print(' - res: "%s"' % sample.res)
|
print(' - res: "%s"' % sample.res)
|
||||||
print('-' * 80)
|
print('-' * 80)
|
||||||
|
@ -13,7 +13,6 @@ from six.moves import zip, range
|
|||||||
from multiprocessing import JoinableQueue, Pool, Process, Queue, cpu_count
|
from multiprocessing import JoinableQueue, Pool, Process, Queue, cpu_count
|
||||||
from deepspeech import Model
|
from deepspeech import Model
|
||||||
|
|
||||||
from util.text import levenshtein
|
|
||||||
from util.evaluate_tools import process_decode_result, calculate_report
|
from util.evaluate_tools import process_decode_result, calculate_report
|
||||||
|
|
||||||
r'''
|
r'''
|
||||||
@ -96,9 +95,7 @@ def main():
|
|||||||
ground_truths.append(msg['ground_truth'])
|
ground_truths.append(msg['ground_truth'])
|
||||||
predictions.append(msg['prediction'])
|
predictions.append(msg['prediction'])
|
||||||
|
|
||||||
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
wer, cer, samples = calculate_report(ground_truths, predictions, losses)
|
||||||
|
|
||||||
wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
|
||||||
mean_loss = np.mean(losses)
|
mean_loss = np.mean(losses)
|
||||||
|
|
||||||
print('Test - WER: %f, CER: %f, loss: %f' %
|
print('Test - WER: %f, CER: %f, loss: %f' %
|
||||||
|
@ -6,7 +6,8 @@ from multiprocessing.dummy import Pool
|
|||||||
|
|
||||||
from attrdict import AttrDict
|
from attrdict import AttrDict
|
||||||
|
|
||||||
from util.text import wer_cer_batch, levenshtein
|
from util.text import levenshtein
|
||||||
|
|
||||||
|
|
||||||
def pmap(fun, iterable):
|
def pmap(fun, iterable):
|
||||||
pool = Pool()
|
pool = Pool()
|
||||||
@ -14,29 +15,53 @@ def pmap(fun, iterable):
|
|||||||
pool.close()
|
pool.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def wer_cer_batch(samples):
|
||||||
|
r"""
|
||||||
|
The WER is defined as the edit/Levenshtein distance on word level divided by
|
||||||
|
the amount of words in the original text.
|
||||||
|
In case of the original having more words (N) than the result and both
|
||||||
|
being totally different (all N words resulting in 1 edit operation each),
|
||||||
|
the WER will always be 1 (N / N = 1).
|
||||||
|
"""
|
||||||
|
wer = sum(s.word_distance for s in samples) / sum(s.word_length for s in samples)
|
||||||
|
cer = sum(s.char_distance for s in samples) / sum(s.char_length for s in samples)
|
||||||
|
|
||||||
|
wer = min(wer, 1.0)
|
||||||
|
cer = min(cer, 1.0)
|
||||||
|
|
||||||
|
return wer, cer
|
||||||
|
|
||||||
|
|
||||||
def process_decode_result(item):
|
def process_decode_result(item):
|
||||||
label, decoding, distance, loss = item
|
ground_truth, prediction, loss = item
|
||||||
word_distance = levenshtein(label.split(), decoding.split())
|
char_distance = levenshtein(ground_truth, prediction)
|
||||||
word_length = float(len(label.split()))
|
char_length = len(ground_truth)
|
||||||
|
word_distance = levenshtein(ground_truth.split(), prediction.split())
|
||||||
|
word_length = len(ground_truth.split())
|
||||||
return AttrDict({
|
return AttrDict({
|
||||||
'src': label,
|
'src': ground_truth,
|
||||||
'res': decoding,
|
'res': prediction,
|
||||||
'loss': loss,
|
'loss': loss,
|
||||||
'distance': distance,
|
'char_distance': char_distance,
|
||||||
|
'char_length': char_length,
|
||||||
|
'word_distance': word_distance,
|
||||||
|
'word_length': word_length,
|
||||||
|
'cer': char_distance / char_length,
|
||||||
'wer': word_distance / word_length,
|
'wer': word_distance / word_length,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def calculate_report(labels, decodings, distances, losses):
|
def calculate_report(labels, decodings, losses):
|
||||||
r'''
|
r'''
|
||||||
This routine will calculate a WER report.
|
This routine will calculate a WER report.
|
||||||
It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest
|
It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest
|
||||||
loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER).
|
loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER).
|
||||||
'''
|
'''
|
||||||
samples = pmap(process_decode_result, zip(labels, decodings, distances, losses))
|
samples = pmap(process_decode_result, zip(labels, decodings, losses))
|
||||||
|
|
||||||
# Getting the WER and CER from the accumulated edit distances and lengths
|
# Getting the WER and CER from the accumulated edit distances and lengths
|
||||||
samples_wer, samples_cer = wer_cer_batch(labels, decodings)
|
samples_wer, samples_cer = wer_cer_batch(samples)
|
||||||
|
|
||||||
# Order the remaining items by their loss (lowest loss on top)
|
# Order the remaining items by their loss (lowest loss on top)
|
||||||
samples.sort(key=lambda s: s.loss)
|
samples.sort(key=lambda s: s.loss)
|
||||||
|
28
util/text.py
28
util/text.py
@ -55,34 +55,6 @@ def text_to_char_array(original, alphabet):
|
|||||||
return np.asarray([alphabet.label_from_string(c) for c in original])
|
return np.asarray([alphabet.label_from_string(c) for c in original])
|
||||||
|
|
||||||
|
|
||||||
def wer_cer_batch(originals, results):
|
|
||||||
r"""
|
|
||||||
The WER is defined as the editing/Levenshtein distance on word level
|
|
||||||
divided by the amount of words in the original text.
|
|
||||||
In case of the original having more words (N) than the result and both
|
|
||||||
being totally different (all N words resulting in 1 edit operation each),
|
|
||||||
the WER will always be 1 (N / N = 1).
|
|
||||||
"""
|
|
||||||
# The WER is calculated on word (and NOT on character) level.
|
|
||||||
# Therefore we split the strings into words first
|
|
||||||
assert len(originals) == len(results)
|
|
||||||
|
|
||||||
total_cer = 0.0
|
|
||||||
total_char_length = 0.0
|
|
||||||
|
|
||||||
total_wer = 0.0
|
|
||||||
total_word_length = 0.0
|
|
||||||
|
|
||||||
for original, result in zip(originals, results):
|
|
||||||
total_cer += levenshtein(original, result)
|
|
||||||
total_char_length += len(original)
|
|
||||||
|
|
||||||
total_wer += levenshtein(original.split(), result.split())
|
|
||||||
total_word_length += len(original.split())
|
|
||||||
|
|
||||||
return total_wer / total_word_length, total_cer / total_char_length
|
|
||||||
|
|
||||||
|
|
||||||
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
||||||
|
|
||||||
# This is a straightforward implementation of a well-known algorithm, and thus
|
# This is a straightforward implementation of a well-known algorithm, and thus
|
||||||
|
Loading…
x
Reference in New Issue
Block a user