Use tf.contrib.layers.dense_to_sparse instead of util/ctc.py
This commit is contained in:
parent
7a14bcc4de
commit
f3613da82a
12
evaluate.py
12
evaluate.py
@ -19,7 +19,6 @@ from multiprocessing import Pool, cpu_count
|
||||
from six.moves import zip, range
|
||||
from util.audio import audiofile_to_input_vector
|
||||
from util.config import Config, initialize_globals
|
||||
from util.ctc import ctc_label_dense_to_sparse
|
||||
from util.flags import create_flags, FLAGS
|
||||
from util.logging import log_error
|
||||
from util.preprocess import pmap, preprocess
|
||||
@ -111,7 +110,14 @@ def evaluate(test_data, inference_graph):
|
||||
labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
|
||||
label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")
|
||||
|
||||
sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32)
|
||||
# We add 1 to all elements of the transcript to avoid any zero values
|
||||
# since we use that as an end-of-sequence token for converting the batch
|
||||
# into a SparseTensor. So here we convert the placeholder back into a
|
||||
# SparseTensor and subtract ones to get the real labels.
|
||||
sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph)
|
||||
neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
|
||||
sparse_labels = tf.sparse_add(sparse_labels, neg_ones)
|
||||
|
||||
loss = tf.nn.ctc_loss(labels=sparse_labels,
|
||||
inputs=layers['raw_logits'],
|
||||
sequence_length=inputs['input_lengths'])
|
||||
@ -143,7 +149,7 @@ def evaluate(test_data, inference_graph):
|
||||
|
||||
features = pad_to_dense(batch['features'].values)
|
||||
features_len = batch['features_len'].values
|
||||
labels = pad_to_dense(batch['transcript'].values)
|
||||
labels = pad_to_dense(batch['transcript'].values + 1)
|
||||
label_lengths = batch['transcript_len'].values
|
||||
|
||||
logits, loss_ = session.run([transposed, loss], feed_dict={
|
||||
|
57
util/ctc.py
57
util/ctc.py
@ -1,57 +0,0 @@
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from functools import reduce
|
||||
from six.moves import range
|
||||
|
||||
|
||||
# gather_nd is taken from https://github.com/tensorflow/tensorflow/issues/206#issuecomment-229678962
|
||||
#
|
||||
# Unfortunately we can't just use tf.gather_nd because it does not have gradients
|
||||
# implemented yet, so we need this workaround.
|
||||
#
|
||||
def gather_nd(params, indices, shape):
|
||||
rank = len(shape)
|
||||
flat_params = tf.reshape(params, [-1])
|
||||
multipliers = [reduce(lambda x, y: x*y, shape[i+1:], 1) for i in range(0, rank)]
|
||||
indices_unpacked = tf.unstack(tf.transpose(indices, [rank - 1] + list(range(0, rank - 1))))
|
||||
flat_indices = sum([a*b for a,b in zip(multipliers, indices_unpacked)])
|
||||
return tf.gather(flat_params, flat_indices)
|
||||
|
||||
|
||||
# ctc_label_dense_to_sparse is taken from https://github.com/tensorflow/tensorflow/issues/1742#issuecomment-205291527
|
||||
#
|
||||
# The CTC implementation in TensorFlow needs labels in a sparse representation,
|
||||
# but sparse data and queues don't mix well, so we store padded tensors in the
|
||||
# queue and convert to a sparse representation after dequeuing a batch.
|
||||
#
|
||||
def ctc_label_dense_to_sparse(labels, label_lengths, batch_size):
|
||||
# The second dimension of labels must be equal to the longest label length in the batch
|
||||
correct_shape_assert = tf.assert_equal(tf.shape(labels)[1], tf.reduce_max(label_lengths))
|
||||
with tf.control_dependencies([correct_shape_assert]):
|
||||
labels = tf.identity(labels)
|
||||
|
||||
label_shape = tf.shape(labels)
|
||||
num_batches_tns = tf.stack([label_shape[0]])
|
||||
max_num_labels_tns = tf.stack([label_shape[1]])
|
||||
def range_less_than(previous_state, current_input):
|
||||
return tf.expand_dims(tf.range(label_shape[1]), 0) < current_input
|
||||
|
||||
init = tf.cast(tf.fill(max_num_labels_tns, 0), tf.bool)
|
||||
init = tf.expand_dims(init, 0)
|
||||
dense_mask = tf.scan(range_less_than, label_lengths, initializer=init, parallel_iterations=1)
|
||||
dense_mask = dense_mask[:, 0, :]
|
||||
|
||||
label_array = tf.reshape(tf.tile(tf.range(0, label_shape[1]), num_batches_tns),
|
||||
label_shape)
|
||||
label_ind = tf.boolean_mask(label_array, dense_mask)
|
||||
|
||||
batch_array = tf.transpose(tf.reshape(tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), tf.reverse(label_shape, [0])))
|
||||
batch_ind = tf.boolean_mask(batch_array, dense_mask)
|
||||
|
||||
indices = tf.transpose(tf.reshape(tf.concat([batch_ind, label_ind], 0), [2, -1]))
|
||||
shape = [batch_size, tf.reduce_max(label_lengths)]
|
||||
vals_sparse = gather_nd(labels, indices, shape)
|
||||
|
||||
return tf.SparseTensor(tf.to_int64(indices), vals_sparse, tf.to_int64(label_shape))
|
@ -4,7 +4,6 @@ import tensorflow as tf
|
||||
from math import ceil
|
||||
from six.moves import range
|
||||
from threading import Thread
|
||||
from util.ctc import ctc_label_dense_to_sparse
|
||||
from util.gpu import get_available_gpus
|
||||
|
||||
|
||||
@ -143,11 +142,14 @@ class _DataSetLoader(object):
|
||||
(features.strides[0], features.strides[0], features.strides[1]),
|
||||
writeable=False)
|
||||
|
||||
# We add 1 to all elements of the transcript here to avoid any zero
|
||||
# values since we use that as an end-of-sequence token for converting
|
||||
# the batch into a SparseTensor.
|
||||
try:
|
||||
session.run(self._enqueue_op, feed_dict={
|
||||
self._model_feeder.ph_x: features,
|
||||
self._model_feeder.ph_x_length: num_strides,
|
||||
self._model_feeder.ph_y: transcript,
|
||||
self._model_feeder.ph_y: transcript + 1,
|
||||
self._model_feeder.ph_y_length: transcript_len
|
||||
})
|
||||
except tf.errors.CancelledError:
|
||||
@ -173,8 +175,10 @@ class _TowerFeeder(object):
|
||||
Draw the next batch from from the combined switchable queue.
|
||||
'''
|
||||
source, source_lengths, target, target_lengths = self._queue.dequeue_many(self._model_feeder.ph_batch_size)
|
||||
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._model_feeder.ph_batch_size)
|
||||
return source, source_lengths, sparse_labels
|
||||
# Back to sparse, then subtract one to get the real labels
|
||||
sparse_labels = tf.contrib.layers.dense_to_sparse(target)
|
||||
neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape)
|
||||
return source, source_lengths, tf.sparse_add(sparse_labels, neg_ones)
|
||||
|
||||
def start_queue_threads(self, session, coord):
|
||||
'''
|
||||
|
Loading…
x
Reference in New Issue
Block a user