Merge pull request #2263 from mozilla/remove-unneeded-ops
Remove use of StridedSlice and update op/kernel deps (Fixes #2179)
This commit is contained in:
commit
b68bfdbb6e
@ -141,11 +141,12 @@ def rnn_impl_static_rnn(x, seq_length, previous_state, reuse):
|
||||
return output, output_state
|
||||
|
||||
|
||||
def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None, overlap=True, rnn_impl=rnn_impl_lstmblockfusedcell):
|
||||
def create_model(batch_x, batch_size, seq_length, dropout, reuse=False, previous_state=None, overlap=True, rnn_impl=rnn_impl_lstmblockfusedcell):
|
||||
layers = {}
|
||||
|
||||
# Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]
|
||||
batch_size = tf.shape(batch_x)[0]
|
||||
if not batch_size:
|
||||
batch_size = tf.shape(batch_x)[0]
|
||||
|
||||
# Create overlapping feature windows if needed
|
||||
if overlap:
|
||||
@ -206,7 +207,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None,
|
||||
# Conveniently, this loss function is implemented in TensorFlow.
|
||||
# Thus, we can simply make use of this implementation to define our loss.
|
||||
|
||||
def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse):
|
||||
def calculate_mean_edit_distance_and_loss(iterator, dropout, batch_size, reuse):
|
||||
r'''
|
||||
This routine beam search decodes a mini-batch and calculates the loss and mean edit distance.
|
||||
Next to total and average loss it returns the mean edit distance,
|
||||
@ -221,7 +222,7 @@ def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse):
|
||||
rnn_impl = rnn_impl_lstmblockfusedcell
|
||||
|
||||
# Calculate the logits of the batch
|
||||
logits, _ = create_model(batch_x, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl)
|
||||
logits, _ = create_model(batch_x, batch_size, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl)
|
||||
|
||||
# Compute the CTC loss using TensorFlow's `ctc_loss`
|
||||
total_loss = tfv1.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len)
|
||||
@ -266,7 +267,7 @@ def create_optimizer():
|
||||
# on which all operations within the tower execute.
|
||||
# For example, all operations of 'tower 0' could execute on the first GPU `tf.device('/gpu:0')`.
|
||||
|
||||
def get_tower_results(iterator, optimizer, dropout_rates):
|
||||
def get_tower_results(iterator, optimizer, dropout_rates, batch_size):
|
||||
r'''
|
||||
With this preliminary step out of the way, we can for each GPU introduce a
|
||||
tower for which's batch we calculate and return the optimization gradients
|
||||
@ -288,7 +289,7 @@ def get_tower_results(iterator, optimizer, dropout_rates):
|
||||
with tf.name_scope('tower_%d' % i):
|
||||
# Calculate the avg_loss and mean_edit_distance and retrieve the decoded
|
||||
# batch along with the original batch's labels (Y) of this tower
|
||||
avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0)
|
||||
avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, batch_size, reuse=i > 0)
|
||||
|
||||
# Allow for variables to be re-used by the next tower
|
||||
tfv1.get_variable_scope().reuse_variables()
|
||||
@ -435,7 +436,7 @@ def train():
|
||||
|
||||
# Building the graph
|
||||
optimizer = create_optimizer()
|
||||
gradients, loss = get_tower_results(iterator, optimizer, dropout_rates)
|
||||
gradients, loss = get_tower_results(iterator, optimizer, dropout_rates, FLAGS.train_batch_size)
|
||||
|
||||
# Average tower gradients across GPUs
|
||||
avg_tower_gradients = average_gradients(gradients)
|
||||
@ -626,6 +627,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
|
||||
rnn_impl = rnn_impl_lstmblockfusedcell
|
||||
|
||||
logits, layers = create_model(batch_x=input_tensor,
|
||||
batch_size=batch_size,
|
||||
seq_length=seq_length if not FLAGS.export_tflite else None,
|
||||
dropout=no_dropout,
|
||||
previous_state=previous_state,
|
||||
|
@ -57,6 +57,7 @@ def evaluate(test_csvs, create_model, try_loading):
|
||||
# One rate per layer
|
||||
no_dropout = [None] * 6
|
||||
logits, _ = create_model(batch_x=batch_x,
|
||||
batch_size=FLAGS.test_batch_size,
|
||||
seq_length=batch_x_len,
|
||||
dropout=no_dropout)
|
||||
|
||||
|
@ -30,24 +30,24 @@ KENLM_INCLUDES = [
|
||||
"kenlm",
|
||||
]
|
||||
|
||||
OPENFST_SOURCES_PLATFORM = select({
|
||||
OPENFST_SOURCES_PLATFORM = select({
|
||||
"//tensorflow:windows": glob(["ctcdecode/third_party/openfst-1.6.9-win/src/lib/*.cc"]),
|
||||
"//conditions:default": glob(["ctcdecode/third_party/openfst-1.6.7/src/lib/*.cc"]),
|
||||
})
|
||||
|
||||
DECODER_SOURCES = glob([
|
||||
"ctcdecode/*.h",
|
||||
"ctcdecode/*.cpp",
|
||||
"ctcdecode/*.cpp",
|
||||
], exclude=["ctcdecode/*_wrap.cpp"]) + OPENFST_SOURCES_PLATFORM + KENLM_SOURCES
|
||||
|
||||
OPENFST_INCLUDES_PLATFORM = select({
|
||||
OPENFST_INCLUDES_PLATFORM = select({
|
||||
"//tensorflow:windows": ["ctcdecode/third_party/openfst-1.6.9-win/src/include"],
|
||||
"//conditions:default": ["ctcdecode/third_party/openfst-1.6.7/src/include"],
|
||||
})
|
||||
|
||||
DECODER_INCLUDES = [
|
||||
".",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
"ctcdecode/third_party/ThreadPool",
|
||||
] + OPENFST_INCLUDES_PLATFORM + KENLM_INCLUDES
|
||||
|
||||
LINUX_LINKOPTS = [
|
||||
@ -77,7 +77,7 @@ tf_cc_shared_object(
|
||||
"tfmodelstate.h",
|
||||
"tfmodelstate.cc"
|
||||
]}),
|
||||
copts = select({
|
||||
copts = select({
|
||||
# -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default
|
||||
"//tensorflow:windows": ["/w"],
|
||||
# -Wno-sign-compare to silent a lot of warnings from tensorflow itself,
|
||||
@ -107,28 +107,26 @@ tf_cc_shared_object(
|
||||
### => Trying to be more fine-grained
|
||||
### Use bin/ops_in_graph.py to list all the ops used by a frozen graph.
|
||||
### CPU only build, libdeepspeech.so file size reduced by ~50%
|
||||
"//tensorflow/core/kernels:dense_update_ops", # Assign (remove once prod model no longer depends on it)
|
||||
"//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram
|
||||
"//tensorflow/core/kernels:bias_op", # BiasAdd
|
||||
"//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM
|
||||
"//tensorflow/core/kernels:cast_op", # Cast
|
||||
"//tensorflow/core/kernels:concat_op", # ConcatV2
|
||||
"//tensorflow/core/kernels:constant_op", # Const, Placeholder
|
||||
"//tensorflow/core/kernels:shape_ops", # ExpandDims, Shape
|
||||
"//tensorflow/core/kernels:gather_nd_op", # GatherNd
|
||||
"//tensorflow/core/kernels:identity_op", # Identity
|
||||
"//tensorflow/core/kernels:immutable_constant_op", # ImmutableConst (used in memmapped models)
|
||||
"//tensorflow/core/kernels:deepspeech_cwise_ops", # Less, Minimum
|
||||
"//tensorflow/core/kernels:deepspeech_cwise_ops", # Less, Minimum, Mul
|
||||
"//tensorflow/core/kernels:matmul_op", # MatMul
|
||||
"//tensorflow/core/kernels:reduction_ops", # Max
|
||||
"//tensorflow/core/kernels:mfcc_op", # Mfcc
|
||||
"//tensorflow/core/kernels:no_op", # NoOp
|
||||
"//tensorflow/core/kernels:pack_op", # Pack
|
||||
"//tensorflow/core/kernels:constant_op", # Placeholder
|
||||
"//tensorflow/core/kernels:sequence_ops", # Range
|
||||
"//tensorflow/core/kernels:relu_op", # Relu
|
||||
"//tensorflow/core/kernels:reshape_op", # Reshape
|
||||
"//tensorflow/core/kernels:shape_ops", # Shape
|
||||
"//tensorflow/core/kernels:slice_op", # Slice, needed by StridedSlice
|
||||
"//tensorflow/core/kernels:softmax_op", # Softmax
|
||||
"//tensorflow/core/kernels:strided_slice_op", # StridedSlice
|
||||
"//tensorflow/core/kernels:tile_ops", # Tile
|
||||
"//tensorflow/core/kernels:transpose_op", # Transpose
|
||||
# And we also need the op libs for these ops used in the model:
|
||||
@ -139,7 +137,6 @@ tf_cc_shared_object(
|
||||
"//tensorflow/core:no_op_op_lib", # NoOp
|
||||
"//tensorflow/core:nn_ops_op_lib", # Relu, Softmax, BiasAdd
|
||||
# And op libs for these ops brought in by dependencies of dependencies to silence unknown OpKernel warnings:
|
||||
"//tensorflow/core:state_ops_op_lib", # Assign, AssignSub, AssignAnd, Variable, VariableV2
|
||||
"//tensorflow/core:bitwise_ops_op_lib", # BitwiseAnd, BitwiseOr, BitwiseXor, LeftShift, RightShift
|
||||
"//tensorflow/core:random_ops_op_lib", # RandomGammaGrad
|
||||
"//tensorflow/core:dataset_ops_op_lib", # UnwrapDatasetVariant, WrapDatasetVariant
|
||||
|
@ -38,8 +38,8 @@ then:
|
||||
DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_build}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
PIP_EXTRA_INDEX_URL: "https://lissyx.github.io/deepspeech-python-wheels/"
|
||||
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Debian Stretch
|
||||
|
@ -43,8 +43,8 @@ then:
|
||||
DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${darwin_amd64_tflite}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
||||
command:
|
||||
|
@ -43,8 +43,8 @@ then:
|
||||
DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_tflite}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm
|
||||
DECODER_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
|
@ -38,8 +38,8 @@ then:
|
||||
DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_build}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm
|
||||
PIP_DEFAULT_TIMEOUT: "60"
|
||||
PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple"
|
||||
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Raspbian Stretch / PiWheels
|
||||
|
@ -45,8 +45,8 @@ then:
|
||||
DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${win_amd64_tflite}/artifacts/public
|
||||
DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public
|
||||
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm
|
||||
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb
|
||||
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm
|
||||
EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}"
|
||||
TC_MSYS_VERSION: 'MSYS_NT-6.3'
|
||||
MSYS: 'winsymlinks:nativestrict'
|
||||
|
Loading…
x
Reference in New Issue
Block a user