Clean up and split TensorFlow deps of text.py
This commit is contained in:
parent
3378008f5d
commit
7a14bcc4de
24
evaluate.py
24
evaluate.py
@ -19,10 +19,11 @@ from multiprocessing import Pool, cpu_count
|
|||||||
from six.moves import zip, range
|
from six.moves import zip, range
|
||||||
from util.audio import audiofile_to_input_vector
|
from util.audio import audiofile_to_input_vector
|
||||||
from util.config import Config, initialize_globals
|
from util.config import Config, initialize_globals
|
||||||
|
from util.ctc import ctc_label_dense_to_sparse
|
||||||
from util.flags import create_flags, FLAGS
|
from util.flags import create_flags, FLAGS
|
||||||
from util.logging import log_error
|
from util.logging import log_error
|
||||||
from util.preprocess import pmap, preprocess
|
from util.preprocess import pmap, preprocess
|
||||||
from util.text import Alphabet, ctc_label_dense_to_sparse, wer, levenshtein
|
from util.text import Alphabet, wer_cer_batch, levenshtein
|
||||||
|
|
||||||
|
|
||||||
def split_data(dataset, batch_size):
|
def split_data(dataset, batch_size):
|
||||||
@ -47,15 +48,14 @@ def pad_to_dense(jagged):
|
|||||||
|
|
||||||
def process_decode_result(item):
|
def process_decode_result(item):
|
||||||
label, decoding, distance, loss = item
|
label, decoding, distance, loss = item
|
||||||
sample_wer = wer(label, decoding)
|
word_distance = levenshtein(label.split(), decoding.split())
|
||||||
|
word_length = float(len(label.split()))
|
||||||
return AttrDict({
|
return AttrDict({
|
||||||
'src': label,
|
'src': label,
|
||||||
'res': decoding,
|
'res': decoding,
|
||||||
'loss': loss,
|
'loss': loss,
|
||||||
'distance': distance,
|
'distance': distance,
|
||||||
'wer': sample_wer,
|
'wer': word_distance / word_length,
|
||||||
'levenshtein': levenshtein(label.split(), decoding.split()),
|
|
||||||
'label_length': float(len(label.split())),
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@ -67,11 +67,8 @@ def calculate_report(labels, decodings, distances, losses):
|
|||||||
'''
|
'''
|
||||||
samples = pmap(process_decode_result, zip(labels, decodings, distances, losses))
|
samples = pmap(process_decode_result, zip(labels, decodings, distances, losses))
|
||||||
|
|
||||||
total_levenshtein = sum(s.levenshtein for s in samples)
|
# Getting the WER and CER from the accumulated edit distances and lengths
|
||||||
total_label_length = sum(s.label_length for s in samples)
|
samples_wer, samples_cer = wer_cer_batch(labels, decodings)
|
||||||
|
|
||||||
# Getting the WER from the accumulated levenshteins and lengths
|
|
||||||
samples_wer = total_levenshtein / total_label_length
|
|
||||||
|
|
||||||
# Order the remaining items by their loss (lowest loss on top)
|
# Order the remaining items by their loss (lowest loss on top)
|
||||||
samples.sort(key=lambda s: s.loss)
|
samples.sort(key=lambda s: s.loss)
|
||||||
@ -79,7 +76,7 @@ def calculate_report(labels, decodings, distances, losses):
|
|||||||
# Then order by WER (highest WER on top)
|
# Then order by WER (highest WER on top)
|
||||||
samples.sort(key=lambda s: s.wer, reverse=True)
|
samples.sort(key=lambda s: s.wer, reverse=True)
|
||||||
|
|
||||||
return samples_wer, samples
|
return samples_wer, samples_cer, samples
|
||||||
|
|
||||||
|
|
||||||
def evaluate(test_data, inference_graph):
|
def evaluate(test_data, inference_graph):
|
||||||
@ -183,15 +180,14 @@ def evaluate(test_data, inference_graph):
|
|||||||
|
|
||||||
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)]
|
||||||
|
|
||||||
wer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses)
|
||||||
mean_edit_distance = np.mean(distances)
|
|
||||||
mean_loss = np.mean(losses)
|
mean_loss = np.mean(losses)
|
||||||
|
|
||||||
# Take only the first report_count items
|
# Take only the first report_count items
|
||||||
report_samples = itertools.islice(samples, FLAGS.report_count)
|
report_samples = itertools.islice(samples, FLAGS.report_count)
|
||||||
|
|
||||||
print('Test - WER: %f, CER: %f, loss: %f' %
|
print('Test - WER: %f, CER: %f, loss: %f' %
|
||||||
(wer, mean_edit_distance, mean_loss))
|
(wer, cer, mean_loss))
|
||||||
print('-' * 80)
|
print('-' * 80)
|
||||||
for sample in report_samples:
|
for sample in report_samples:
|
||||||
print('WER: %f, CER: %f, loss: %f' %
|
print('WER: %f, CER: %f, loss: %f' %
|
||||||
|
57
util/ctc.py
Normal file
57
util/ctc.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from functools import reduce
|
||||||
|
from six.moves import range
|
||||||
|
|
||||||
|
|
||||||
|
# gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962
|
||||||
|
#
|
||||||
|
# Unfortunately we can't just use tf.gather_nd because it does not have gradients
|
||||||
|
# implemented yet, so we need this workaround.
|
||||||
|
#
|
||||||
|
def gather_nd(params, indices, shape):
|
||||||
|
rank = len(shape)
|
||||||
|
flat_params = tf.reshape(params, [-1])
|
||||||
|
multipliers = [reduce(lambda x, y: x*y, shape[i+1:], 1) for i in range(0, rank)]
|
||||||
|
indices_unpacked = tf.unstack(tf.transpose(indices, [rank - 1] + list(range(0, rank - 1))))
|
||||||
|
flat_indices = sum([a*b for a,b in zip(multipliers, indices_unpacked)])
|
||||||
|
return tf.gather(flat_params, flat_indices)
|
||||||
|
|
||||||
|
|
||||||
|
# ctc_label_dense_to_sparse is taken from https://github.com/tensorflow/tensorflow/issues/1742#issuecomment-205291527
|
||||||
|
#
|
||||||
|
# The CTC implementation in TensorFlow needs labels in a sparse representation,
|
||||||
|
# but sparse data and queues don't mix well, so we store padded tensors in the
|
||||||
|
# queue and convert to a sparse representation after dequeuing a batch.
|
||||||
|
#
|
||||||
|
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
|
||||||
|
# The second dimension of labels must be equal to the longest label length in the batch
|
||||||
|
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
|
||||||
|
with tf.control_dependencies([correct_shape_assert]):
|
||||||
|
labels = tf.identity(labels)
|
||||||
|
|
||||||
|
label_shape = tf.shape(labels)
|
||||||
|
num_batches_tns = tf.stack([label_shape[0]])
|
||||||
|
max_num_labels_tns = tf.stack([label_shape[1]])
|
||||||
|
def range_less_than(previous_state, current_input):
|
||||||
|
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
|
||||||
|
|
||||||
|
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
|
||||||
|
init = tf.expand_dims(init, 0)
|
||||||
|
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
|
||||||
|
dense_mask = dense_mask[:, 0, :]
|
||||||
|
|
||||||
|
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
|
||||||
|
label_shape)
|
||||||
|
label_ind = tf.boolean_mask(label_array, dense_mask)
|
||||||
|
|
||||||
|
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
|
||||||
|
batch_ind = tf.boolean_mask(batch_array, dense_mask)
|
||||||
|
|
||||||
|
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
|
||||||
|
shape = [batch_size, tf.reduce_max(label_lengths)]
|
||||||
|
vals_sparse = gather_nd(labels, indices, shape)
|
||||||
|
|
||||||
|
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
|
@ -4,8 +4,8 @@ import tensorflow as tf
|
|||||||
from math import ceil
|
from math import ceil
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
from util.ctc import ctc_label_dense_to_sparse
|
||||||
from util.gpu import get_available_gpus
|
from util.gpu import get_available_gpus
|
||||||
from util.text import ctc_label_dense_to_sparse
|
|
||||||
|
|
||||||
|
|
||||||
class ModelFeeder(object):
|
class ModelFeeder(object):
|
||||||
|
120
util/text.py
120
util/text.py
@ -2,12 +2,10 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
from functools import reduce
|
|
||||||
|
|
||||||
class Alphabet(object):
|
class Alphabet(object):
|
||||||
def __init__(self, config_file):
|
def __init__(self, config_file):
|
||||||
@ -56,6 +54,7 @@ class Alphabet(object):
|
|||||||
def config_file(self):
|
def config_file(self):
|
||||||
return self._config_file
|
return self._config_file
|
||||||
|
|
||||||
|
|
||||||
def text_to_char_array(original, alphabet):
|
def text_to_char_array(original, alphabet):
|
||||||
r"""
|
r"""
|
||||||
Given a Python string ``original``, remove unsupported characters, map characters
|
Given a Python string ``original``, remove unsupported characters, map characters
|
||||||
@ -63,44 +62,8 @@ def text_to_char_array(original, alphabet):
|
|||||||
"""
|
"""
|
||||||
return np.asarray([alphabet.label_from_string(c) for c in original])
|
return np.asarray([alphabet.label_from_string(c) for c in original])
|
||||||
|
|
||||||
def sparse_tuple_from(sequences, dtype=np.int32):
|
|
||||||
r"""Creates a sparse representention of ``sequences``.
|
|
||||||
Args:
|
|
||||||
* sequences: a list of lists of type dtype where each element is a sequence
|
|
||||||
|
|
||||||
Returns a tuple with (indices, values, shape)
|
def wer_cer_batch(originals, results):
|
||||||
"""
|
|
||||||
indices = []
|
|
||||||
values = []
|
|
||||||
|
|
||||||
for n, seq in enumerate(sequences):
|
|
||||||
indices.extend(zip([n]*len(seq), range(len(seq))))
|
|
||||||
values.extend(seq)
|
|
||||||
|
|
||||||
indices = np.asarray(indices, dtype=np.int64)
|
|
||||||
values = np.asarray(values, dtype=dtype)
|
|
||||||
shape = np.asarray([len(sequences), indices.max(0)[1]+1], dtype=np.int64)
|
|
||||||
|
|
||||||
return tf.SparseTensor(indices=indices, values=values, shape=shape)
|
|
||||||
|
|
||||||
def sparse_tensor_value_to_texts(value, alphabet):
|
|
||||||
r"""
|
|
||||||
Given a :class:`tf.SparseTensor` ``value``, return an array of Python strings
|
|
||||||
representing its values.
|
|
||||||
"""
|
|
||||||
return sparse_tuple_to_texts((value.indices, value.values, value.dense_shape), alphabet)
|
|
||||||
|
|
||||||
def sparse_tuple_to_texts(tuple, alphabet):
|
|
||||||
indices = tuple[0]
|
|
||||||
values = tuple[1]
|
|
||||||
results = [''] * tuple[2][0]
|
|
||||||
for i in range(len(indices)):
|
|
||||||
index = indices[i][0]
|
|
||||||
results[index] += alphabet.string_from_label(values[i])
|
|
||||||
# List of strings
|
|
||||||
return results
|
|
||||||
|
|
||||||
def wer(original, result):
|
|
||||||
r"""
|
r"""
|
||||||
The WER is defined as the editing/Levenshtein distance on word level
|
The WER is defined as the editing/Levenshtein distance on word level
|
||||||
divided by the amount of words in the original text.
|
divided by the amount of words in the original text.
|
||||||
@ -108,22 +71,22 @@ def wer(original, result):
|
|||||||
being totally different (all N words resulting in 1 edit operation each),
|
being totally different (all N words resulting in 1 edit operation each),
|
||||||
the WER will always be 1 (N / N = 1).
|
the WER will always be 1 (N / N = 1).
|
||||||
"""
|
"""
|
||||||
# The WER ist calculated on word (and NOT on character) level.
|
# The WER is calculated on word (and NOT on character) level.
|
||||||
# Therefore we split the strings into words first:
|
# Therefore we split the strings into words first
|
||||||
original = original.split()
|
assert len(originals) == len(results)
|
||||||
result = result.split()
|
|
||||||
return levenshtein(original, result) / float(len(original))
|
|
||||||
|
|
||||||
def wers(originals, results):
|
total_cer = 0.0
|
||||||
count = len(originals)
|
|
||||||
rates = []
|
total_wer = 0.0
|
||||||
mean = 0.0
|
total_word_length = 0.0
|
||||||
assert count == len(results)
|
|
||||||
for i in range(count):
|
for original, result in zip(originals, results):
|
||||||
rate = wer(originals[i], results[i])
|
total_cer += levenshtein(original, result)
|
||||||
mean = mean + rate
|
|
||||||
rates.append(rate)
|
total_wer += levenshtein(original.split(), result.split())
|
||||||
return rates, mean / float(count)
|
total_word_length += len(original.split())
|
||||||
|
|
||||||
|
return total_wer / total_word_length, total_cer / len(originals)
|
||||||
|
|
||||||
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
# The following code is from: http://hetland.org/coding/python/levenshtein.py
|
||||||
|
|
||||||
@ -155,55 +118,6 @@ def levenshtein(a,b):
|
|||||||
|
|
||||||
return current[n]
|
return current[n]
|
||||||
|
|
||||||
# gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962
|
|
||||||
#
|
|
||||||
# Unfortunately we can't just use tf.gather_nd because it does not have gradients
|
|
||||||
# implemented yet, so we need this workaround.
|
|
||||||
#
|
|
||||||
def gather_nd(params, indices, shape):
|
|
||||||
rank = len(shape)
|
|
||||||
flat_params = tf.reshape(params, [-1])
|
|
||||||
multipliers = [reduce(lambda x, y: x*y, shape[i+1:], 1) for i in range(0, rank)]
|
|
||||||
indices_unpacked = tf.unstack(tf.transpose(indices, [rank - 1] + list(range(0, rank - 1))))
|
|
||||||
flat_indices = sum([a*b for a,b in zip(multipliers, indices_unpacked)])
|
|
||||||
return tf.gather(flat_params, flat_indices)
|
|
||||||
|
|
||||||
# ctc_label_dense_to_sparse is taken from https://github.com/tensorflow/tensorflow/issues/1742#issuecomment-205291527
|
|
||||||
#
|
|
||||||
# The CTC implementation in TensorFlow needs labels in a sparse representation,
|
|
||||||
# but sparse data and queues don't mix well, so we store padded tensors in the
|
|
||||||
# queue and convert to a sparse representation after dequeuing a batch.
|
|
||||||
#
|
|
||||||
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
|
|
||||||
# The second dimension of labels must be equal to the longest label length in the batch
|
|
||||||
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
|
|
||||||
with tf.control_dependencies([correct_shape_assert]):
|
|
||||||
labels = tf.identity(labels)
|
|
||||||
|
|
||||||
label_shape = tf.shape(labels)
|
|
||||||
num_batches_tns = tf.stack([label_shape[0]])
|
|
||||||
max_num_labels_tns = tf.stack([label_shape[1]])
|
|
||||||
def range_less_than(previous_state, current_input):
|
|
||||||
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
|
|
||||||
|
|
||||||
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
|
|
||||||
init = tf.expand_dims(init, 0)
|
|
||||||
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
|
|
||||||
dense_mask = dense_mask[:, 0, :]
|
|
||||||
|
|
||||||
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
|
|
||||||
label_shape)
|
|
||||||
label_ind = tf.boolean_mask(label_array, dense_mask)
|
|
||||||
|
|
||||||
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
|
|
||||||
batch_ind = tf.boolean_mask(batch_array, dense_mask)
|
|
||||||
|
|
||||||
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
|
|
||||||
shape = [batch_size, tf.reduce_max(label_lengths)]
|
|
||||||
vals_sparse = gather_nd(labels, indices, shape)
|
|
||||||
|
|
||||||
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
|
|
||||||
|
|
||||||
# Validate and normalize transcriptions. Returns a cleaned version of the label
|
# Validate and normalize transcriptions. Returns a cleaned version of the label
|
||||||
# or None if it's invalid.
|
# or None if it's invalid.
|
||||||
def validate_label(label):
|
def validate_label(label):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user