Revert to a pipelined approach for test epochs to avoid CPU OOM with large alphabets
This commit is contained in:
parent
a4b35d2f24
commit
699e4ebcd7
40
evaluate.py
40
evaluate.py
@ -19,7 +19,6 @@ from util.evaluate_tools import calculate_report
|
||||
from util.feeding import create_dataset
|
||||
from util.flags import create_flags, FLAGS
|
||||
from util.logging import log_error, log_progress, create_progressbar
|
||||
from util.text import levenshtein
|
||||
|
||||
|
||||
def sparse_tensor_value_to_texts(value, alphabet):
|
||||
@ -88,14 +87,13 @@ def evaluate(test_csvs, create_model, try_loading):
|
||||
exit(1)
|
||||
|
||||
def run_test(init_op, dataset):
|
||||
logitses = []
|
||||
losses = []
|
||||
seq_lengths = []
|
||||
predictions = []
|
||||
ground_truths = []
|
||||
|
||||
bar = create_progressbar(prefix='Computing acoustic model predictions | ',
|
||||
bar = create_progressbar(prefix='Test epoch | ',
|
||||
widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start()
|
||||
log_progress('Computing acoustic model predictions...')
|
||||
log_progress('Test epoch...')
|
||||
|
||||
step_count = 0
|
||||
|
||||
@ -105,35 +103,23 @@ def evaluate(test_csvs, create_model, try_loading):
|
||||
# First pass, compute losses and transposed logits for decoding
|
||||
while True:
|
||||
try:
|
||||
logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y])
|
||||
batch_logits, batch_loss, batch_lengths, batch_transcripts = \
|
||||
session.run([transposed, loss, batch_x_len, batch_y])
|
||||
except tf.errors.OutOfRangeError:
|
||||
break
|
||||
|
||||
decoded = ctc_beam_search_decoder_batch(batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width,
|
||||
num_processes=num_processes, scorer=scorer)
|
||||
predictions.extend(d[0][1] for d in decoded)
|
||||
ground_truths.extend(sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet))
|
||||
losses.extend(batch_loss)
|
||||
|
||||
step_count += 1
|
||||
bar.update(step_count)
|
||||
|
||||
logitses.append(logits)
|
||||
losses.extend(loss_)
|
||||
seq_lengths.append(lengths)
|
||||
ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet))
|
||||
|
||||
bar.finish()
|
||||
|
||||
predictions = []
|
||||
|
||||
bar = create_progressbar(max_value=step_count,
|
||||
prefix='Decoding predictions | ').start()
|
||||
log_progress('Decoding predictions...')
|
||||
|
||||
# Second pass, decode logits and compute WER and edit distance metrics
|
||||
for logits, seq_length in bar(zip(logitses, seq_lengths)):
|
||||
decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width,
|
||||
num_processes=num_processes, scorer=scorer)
|
||||
predictions.extend(d[0][1] for d in decoded)
|
||||
|
||||
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
||||
|
||||
wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
||||
wer, cer, samples = calculate_report(ground_truths, predictions, losses)
|
||||
mean_loss = np.mean(losses)
|
||||
|
||||
# Take only the first report_count items
|
||||
@ -144,7 +130,7 @@ def evaluate(test_csvs, create_model, try_loading):
|
||||
print('-' * 80)
|
||||
for sample in report_samples:
|
||||
print('WER: %f, CER: %f, loss: %f' %
|
||||
(sample.wer, sample.distance, sample.loss))
|
||||
(sample.wer, sample.cer, sample.loss))
|
||||
print(' - src: "%s"' % sample.src)
|
||||
print(' - res: "%s"' % sample.res)
|
||||
print('-' * 80)
|
||||
|
@ -13,7 +13,6 @@ from six.moves import zip, range
|
||||
from multiprocessing import JoinableQueue, Pool, Process, Queue, cpu_count
|
||||
from deepspeech import Model
|
||||
|
||||
from util.text import levenshtein
|
||||
from util.evaluate_tools import process_decode_result, calculate_report
|
||||
|
||||
r'''
|
||||
@ -96,9 +95,7 @@ def main():
|
||||
ground_truths.append(msg['ground_truth'])
|
||||
predictions.append(msg['prediction'])
|
||||
|
||||
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
||||
|
||||
wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
||||
wer, cer, samples = calculate_report(ground_truths, predictions, losses)
|
||||
mean_loss = np.mean(losses)
|
||||
|
||||
print('Test - WER: %f, CER: %f, loss: %f' %
|
||||
|
@ -6,7 +6,8 @@ from multiprocessing.dummy import Pool
|
||||
|
||||
from attrdict import AttrDict
|
||||
|
||||
from util.text import wer_cer_batch, levenshtein
|
||||
from util.text import levenshtein
|
||||
|
||||
|
||||
def pmap(fun, iterable):
|
||||
pool = Pool()
|
||||
@ -14,29 +15,53 @@ def pmap(fun, iterable):
|
||||
pool.close()
|
||||
return results
|
||||
|
||||
|
||||
def wer_cer_batch(samples):
|
||||
r"""
|
||||
The WER is defined as the edit/Levenshtein distance on word level divided by
|
||||
the amount of words in the original text.
|
||||
In case of the original having more words (N) than the result and both
|
||||
being totally different (all N words resulting in 1 edit operation each),
|
||||
the WER will always be 1 (N / N = 1).
|
||||
"""
|
||||
wer = sum(s.word_distance for s in samples) / sum(s.word_length for s in samples)
|
||||
cer = sum(s.char_distance for s in samples) / sum(s.char_length for s in samples)
|
||||
|
||||
wer = min(wer, 1.0)
|
||||
cer = min(cer, 1.0)
|
||||
|
||||
return wer, cer
|
||||
|
||||
|
||||
def process_decode_result(item):
|
||||
label, decoding, distance, loss = item
|
||||
word_distance = levenshtein(label.split(), decoding.split())
|
||||
word_length = float(len(label.split()))
|
||||
ground_truth, prediction, loss = item
|
||||
char_distance = levenshtein(ground_truth, prediction)
|
||||
char_length = len(ground_truth)
|
||||
word_distance = levenshtein(ground_truth.split(), prediction.split())
|
||||
word_length = len(ground_truth.split())
|
||||
return AttrDict({
|
||||
'src': label,
|
||||
'res': decoding,
|
||||
'src': ground_truth,
|
||||
'res': prediction,
|
||||
'loss': loss,
|
||||
'distance': distance,
|
||||
'char_distance': char_distance,
|
||||
'char_length': char_length,
|
||||
'word_distance': word_distance,
|
||||
'word_length': word_length,
|
||||
'cer': char_distance / char_length,
|
||||
'wer': word_distance / word_length,
|
||||
})
|
||||
|
||||
|
||||
def calculate_report(labels, decodings, distances, losses):
|
||||
def calculate_report(labels, decodings, losses):
|
||||
r'''
|
||||
This routine will calculate a WER report.
|
||||
It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest
|
||||
loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER).
|
||||
'''
|
||||
samples = pmap(process_decode_result, zip(labels, decodings, distances, losses))
|
||||
samples = pmap(process_decode_result, zip(labels, decodings, losses))
|
||||
|
||||
# Getting the WER and CER from the accumulated edit distances and lengths
|
||||
samples_wer, samples_cer = wer_cer_batch(labels, decodings)
|
||||
samples_wer, samples_cer = wer_cer_batch(samples)
|
||||
|
||||
# Order the remaining items by their loss (lowest loss on top)
|
||||
samples.sort(key=lambda s: s.loss)
|
||||
|
28
util/text.py
28
util/text.py
@ -55,34 +55,6 @@ def text_to_char_array(original, alphabet):
|
||||
return np.asarray([alphabet.label_from_string(c) for c in original])
|
||||
|
||||
|
||||
def wer_cer_batch(originals, results):
|
||||
r"""
|
||||
The WER is defined as the editing/Levenshtein distance on word level
|
||||
divided by the amount of words in the original text.
|
||||
In case of the original having more words (N) than the result and both
|
||||
being totally different (all N words resulting in 1 edit operation each),
|
||||
the WER will always be 1 (N / N = 1).
|
||||
"""
|
||||
# The WER is calculated on word (and NOT on character) level.
|
||||
# Therefore we split the strings into words first
|
||||
assert len(originals) == len(results)
|
||||
|
||||
total_cer = 0.0
|
||||
total_char_length = 0.0
|
||||
|
||||
total_wer = 0.0
|
||||
total_word_length = 0.0
|
||||
|
||||
for original, result in zip(originals, results):
|
||||
total_cer += levenshtein(original, result)
|
||||
total_char_length += len(original)
|
||||
|
||||
total_wer += levenshtein(original.split(), result.split())
|
||||
total_word_length += len(original.split())
|
||||
|
||||
return total_wer / total_word_length, total_cer / total_char_length
|
||||
|
||||
|
||||
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
||||
|
||||
# This is a straightforward implementation of a well-known algorithm, and thus
|
||||
|
Loading…
x
Reference in New Issue
Block a user