From 12c62756c77a850cc0726f0d6cd45dff2ba84bdc Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 5 Feb 2019 09:29:47 -0200 Subject: [PATCH] Switch wer_cer_batch to compute real CER over corpus --- util/text.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/util/text.py b/util/text.py index a3629429..c39d6b9c 100644 --- a/util/text.py +++ b/util/text.py @@ -76,17 +76,20 @@ def wer_cer_batch(originals, results): assert len(originals) == len(results) total_cer = 0.0 + total_char_length = 0.0 total_wer = 0.0 total_word_length = 0.0 for original, result in zip(originals, results): total_cer += levenshtein(original, result) + total_char_length += len(original) total_wer += levenshtein(original.split(), result.split()) total_word_length += len(original.split()) - return total_wer / total_word_length, total_cer / len(originals) + return total_wer / total_word_length, total_cer / total_char_length + # The following code is from: http://hetland.org/coding/python/levenshtein.py