Use tf.contrib.layers.dense_to_sparse instead of util/ctc.py

This commit is contained in:
Reuben Morais 2019-02-04 09:19:48 -02:00
parent 7a14bcc4de
commit f3613da82a
3 changed files with 17 additions and 64 deletions

View File

@ -19,7 +19,6 @@ from multiprocessing import Pool, cpu_count
from six.moves import zip, range
from util.audio import audiofile_to_input_vector
from util.config import Config, initialize_globals
from util.ctc import ctc_label_dense_to_sparse
from util.flags import create_flags, FLAGS
from util.logging import log_error
from util.preprocess import pmap, preprocess
@ -111,7 +110,14 @@ def evaluate(test_data, inference_graph):
labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")
sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32)
# We add 1 to all elements of the transcript to avoid any zero values
# since we use that as an end-of-sequence token for converting the batch
# into a SparseTensor. So here we convert the placeholder back into a
# SparseTensor and subtract ones to get the real labels.
sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph)
neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
sparse_labels = tf.sparse_add(sparse_labels, neg_ones)
loss = tf.nn.ctc_loss(labels=sparse_labels,
inputs=layers['raw_logits'],
sequence_length=inputs['input_lengths'])
@ -143,7 +149,7 @@ def evaluate(test_data, inference_graph):
features = pad_to_dense(batch['features'].values)
features_len = batch['features_len'].values
labels = pad_to_dense(batch['transcript'].values)
labels = pad_to_dense(batch['transcript'].values + 1)
label_lengths = batch['transcript_len'].values
logits, loss_ = session.run([transposed, loss], feed_dict={

View File

@ -1,57 +0,0 @@
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from functools import reduce
from six.moves import range
# gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962
#
# Unfortunately we can't just use tf.gather_nd because it does not have gradients
# implemented yet, so we need this workaround.
#
def gather_nd(params, indices, shape):
rank = len(shape)
flat_params = tf.reshape(params, [-1])
multipliers = [reduce(lambda x, y: x*y, shape[i+1:], 1) for i in range(0, rank)]
indices_unpacked = tf.unstack(tf.transpose(indices, [rank - 1] + list(range(0, rank - 1))))
flat_indices = sum([a*b for a,b in zip(multipliers, indices_unpacked)])
return tf.gather(flat_params, flat_indices)
# ctc_label_dense_to_sparse is taken from https://github.com/tensorflow/tensorflow/issues/1742#issuecomment-205291527
#
# The CTC implementation in TensorFlow needs labels in a sparse representation,
# but sparse data and queues don't mix well, so we store padded tensors in the
# queue and convert to a sparse representation after dequeuing a batch.
#
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
# The second dimension of labels must be equal to the longest label length in the batch
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
with tf.control_dependencies([correct_shape_assert]):
labels = tf.identity(labels)
label_shape = tf.shape(labels)
num_batches_tns = tf.stack([label_shape[0]])
max_num_labels_tns = tf.stack([label_shape[1]])
def range_less_than(previous_state, current_input):
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
init = tf.expand_dims(init, 0)
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
dense_mask = dense_mask[:, 0, :]
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
label_shape)
label_ind = tf.boolean_mask(label_array, dense_mask)
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
batch_ind = tf.boolean_mask(batch_array, dense_mask)
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
shape = [batch_size, tf.reduce_max(label_lengths)]
vals_sparse = gather_nd(labels, indices, shape)
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))

View File

@ -4,7 +4,6 @@ import tensorflow as tf
from math import ceil
from six.moves import range
from threading import Thread
from util.ctc import ctc_label_dense_to_sparse
from util.gpu import get_available_gpus
@ -143,11 +142,14 @@ class _DataSetLoader(object):
(features.strides[0], features.strides[0], features.strides[1]),
writeable=False)
# We add 1 to all elements of the transcript here to avoid any zero
# values since we use that as an end-of-sequence token for converting
# the batch into a SparseTensor.
try:
session.run(self._enqueue_op, feed_dict={
self._model_feeder.ph_x: features,
self._model_feeder.ph_x_length: num_strides,
self._model_feeder.ph_y: transcript,
self._model_feeder.ph_y: transcript + 1,
self._model_feeder.ph_y_length: transcript_len
})
except tf.errors.CancelledError:
@ -173,8 +175,10 @@ class _TowerFeeder(object):
Draw the next batch from from the combined switchable queue.
'''
source, source_lengths, target, target_lengths = self._queue.dequeue_many(self._model_feeder.ph_batch_size)
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._model_feeder.ph_batch_size)
return source, source_lengths, sparse_labels
# Back to sparse, then subtract one to get the real labels
sparse_labels = tf.contrib.layers.dense_to_sparse(target)
neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
def start_queue_threads(self, session, coord):
'''