diff --git a/DeepSpeech.py b/DeepSpeech.py index 8492fa66..19e16d3b 100755 --- a/DeepSpeech.py +++ b/DeepSpeech.py @@ -141,11 +141,12 @@ def rnn_impl_static_rnn(x, seq_length, previous_state, reuse): return output, output_state -def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None, overlap=True, rnn_impl=rnn_impl_lstmblockfusedcell): +def create_model(batch_x, batch_size, seq_length, dropout, reuse=False, previous_state=None, overlap=True, rnn_impl=rnn_impl_lstmblockfusedcell): layers = {} # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context] - batch_size = tf.shape(batch_x)[0] + if not batch_size: + batch_size = tf.shape(batch_x)[0] # Create overlapping feature windows if needed if overlap: @@ -206,7 +207,7 @@ def create_model(batch_x, seq_length, dropout, reuse=False, previous_state=None, # Conveniently, this loss function is implemented in TensorFlow. # Thus, we can simply make use of this implementation to define our loss. -def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse): +def calculate_mean_edit_distance_and_loss(iterator, dropout, batch_size, reuse): r''' This routine beam search decodes a mini-batch and calculates the loss and mean edit distance. Next to total and average loss it returns the mean edit distance, @@ -221,7 +222,7 @@ def calculate_mean_edit_distance_and_loss(iterator, dropout, reuse): rnn_impl = rnn_impl_lstmblockfusedcell # Calculate the logits of the batch - logits, _ = create_model(batch_x, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl) + logits, _ = create_model(batch_x, batch_size, batch_seq_len, dropout, reuse=reuse, rnn_impl=rnn_impl) # Compute the CTC loss using TensorFlow's `ctc_loss` total_loss = tfv1.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) @@ -266,7 +267,7 @@ def create_optimizer(): # on which all operations within the tower execute. # For example, all operations of 'tower 0' could execute on the first GPU `tf.device('/gpu:0')`. -def get_tower_results(iterator, optimizer, dropout_rates): +def get_tower_results(iterator, optimizer, dropout_rates, batch_size): r''' With this preliminary step out of the way, we can for each GPU introduce a tower for which's batch we calculate and return the optimization gradients @@ -288,7 +289,7 @@ def get_tower_results(iterator, optimizer, dropout_rates): with tf.name_scope('tower_%d' % i): # Calculate the avg_loss and mean_edit_distance and retrieve the decoded # batch along with the original batch's labels (Y) of this tower - avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, reuse=i > 0) + avg_loss = calculate_mean_edit_distance_and_loss(iterator, dropout_rates, batch_size, reuse=i > 0) # Allow for variables to be re-used by the next tower tfv1.get_variable_scope().reuse_variables() @@ -435,7 +436,7 @@ def train(): # Building the graph optimizer = create_optimizer() - gradients, loss = get_tower_results(iterator, optimizer, dropout_rates) + gradients, loss = get_tower_results(iterator, optimizer, dropout_rates, FLAGS.train_batch_size) # Average tower gradients across GPUs avg_tower_gradients = average_gradients(gradients) @@ -626,6 +627,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): rnn_impl = rnn_impl_lstmblockfusedcell logits, layers = create_model(batch_x=input_tensor, + batch_size=batch_size, seq_length=seq_length if not FLAGS.export_tflite else None, dropout=no_dropout, previous_state=previous_state, diff --git a/evaluate.py b/evaluate.py index a864935a..a8de7dc7 100755 --- a/evaluate.py +++ b/evaluate.py @@ -57,6 +57,7 @@ def evaluate(test_csvs, create_model, try_loading): # One rate per layer no_dropout = [None] * 6 logits, _ = create_model(batch_x=batch_x, + batch_size=FLAGS.test_batch_size, seq_length=batch_x_len, dropout=no_dropout) diff --git a/native_client/BUILD b/native_client/BUILD index c51dd21c..689c8a38 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -30,24 +30,24 @@ KENLM_INCLUDES = [ "kenlm", ] -OPENFST_SOURCES_PLATFORM = select({ +OPENFST_SOURCES_PLATFORM = select({ "//tensorflow:windows": glob(["ctcdecode/third_party/openfst-1.6.9-win/src/lib/*.cc"]), "//conditions:default": glob(["ctcdecode/third_party/openfst-1.6.7/src/lib/*.cc"]), }) DECODER_SOURCES = glob([ "ctcdecode/*.h", - "ctcdecode/*.cpp", + "ctcdecode/*.cpp", ], exclude=["ctcdecode/*_wrap.cpp"]) + OPENFST_SOURCES_PLATFORM + KENLM_SOURCES -OPENFST_INCLUDES_PLATFORM = select({ +OPENFST_INCLUDES_PLATFORM = select({ "//tensorflow:windows": ["ctcdecode/third_party/openfst-1.6.9-win/src/include"], "//conditions:default": ["ctcdecode/third_party/openfst-1.6.7/src/include"], }) DECODER_INCLUDES = [ ".", - "ctcdecode/third_party/ThreadPool", + "ctcdecode/third_party/ThreadPool", ] + OPENFST_INCLUDES_PLATFORM + KENLM_INCLUDES LINUX_LINKOPTS = [ @@ -77,7 +77,7 @@ tf_cc_shared_object( "tfmodelstate.h", "tfmodelstate.cc" ]}), - copts = select({ + copts = select({ # -fvisibility=hidden is not required on Windows, MSCV hides all declarations by default "//tensorflow:windows": ["/w"], # -Wno-sign-compare to silent a lot of warnings from tensorflow itself, @@ -107,28 +107,26 @@ tf_cc_shared_object( ### => Trying to be more fine-grained ### Use bin/ops_in_graph.py to list all the ops used by a frozen graph. ### CPU only build, libdeepspeech.so file size reduced by ~50% - "//tensorflow/core/kernels:dense_update_ops", # Assign (remove once prod model no longer depends on it) "//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram "//tensorflow/core/kernels:bias_op", # BiasAdd "//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM "//tensorflow/core/kernels:cast_op", # Cast "//tensorflow/core/kernels:concat_op", # ConcatV2 + "//tensorflow/core/kernels:constant_op", # Const, Placeholder + "//tensorflow/core/kernels:shape_ops", # ExpandDims, Shape "//tensorflow/core/kernels:gather_nd_op", # GatherNd "//tensorflow/core/kernels:identity_op", # Identity "//tensorflow/core/kernels:immutable_constant_op", # ImmutableConst (used in memmapped models) - "//tensorflow/core/kernels:deepspeech_cwise_ops", # Less, Minimum + "//tensorflow/core/kernels:deepspeech_cwise_ops", # Less, Minimum, Mul "//tensorflow/core/kernels:matmul_op", # MatMul "//tensorflow/core/kernels:reduction_ops", # Max "//tensorflow/core/kernels:mfcc_op", # Mfcc + "//tensorflow/core/kernels:no_op", # NoOp "//tensorflow/core/kernels:pack_op", # Pack - "//tensorflow/core/kernels:constant_op", # Placeholder "//tensorflow/core/kernels:sequence_ops", # Range "//tensorflow/core/kernels:relu_op", # Relu "//tensorflow/core/kernels:reshape_op", # Reshape - "//tensorflow/core/kernels:shape_ops", # Shape - "//tensorflow/core/kernels:slice_op", # Slice, needed by StridedSlice "//tensorflow/core/kernels:softmax_op", # Softmax - "//tensorflow/core/kernels:strided_slice_op", # StridedSlice "//tensorflow/core/kernels:tile_ops", # Tile "//tensorflow/core/kernels:transpose_op", # Transpose # And we also need the op libs for these ops used in the model: @@ -139,7 +137,6 @@ tf_cc_shared_object( "//tensorflow/core:no_op_op_lib", # NoOp "//tensorflow/core:nn_ops_op_lib", # Relu, Softmax, BiasAdd # And op libs for these ops brought in by dependencies of dependencies to silence unknown OpKernel warnings: - "//tensorflow/core:state_ops_op_lib", # Assign, AssignSub, AssignAnd, Variable, VariableV2 "//tensorflow/core:bitwise_ops_op_lib", # BitwiseAnd, BitwiseOr, BitwiseXor, LeftShift, RightShift "//tensorflow/core:random_ops_op_lib", # RandomGammaGrad "//tensorflow/core:dataset_ops_op_lib", # UnwrapDatasetVariant, WrapDatasetVariant diff --git a/taskcluster/test-armbian-opt-base.tyml b/taskcluster/test-armbian-opt-base.tyml index 22115a46..3cf9e437 100644 --- a/taskcluster/test-armbian-opt-base.tyml +++ b/taskcluster/test-armbian-opt-base.tyml @@ -38,8 +38,8 @@ then: DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_build}/artifacts/public DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb - DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb - DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm PIP_DEFAULT_TIMEOUT: "60" PIP_EXTRA_INDEX_URL: "https://lissyx.github.io/deepspeech-python-wheels/" EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Debian Stretch diff --git a/taskcluster/test-darwin-opt-base.tyml b/taskcluster/test-darwin-opt-base.tyml index 0007af6a..ea11e02d 100644 --- a/taskcluster/test-darwin-opt-base.tyml +++ b/taskcluster/test-darwin-opt-base.tyml @@ -43,8 +43,8 @@ then: DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${darwin_amd64_tflite}/artifacts/public DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb - DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb - DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}" command: diff --git a/taskcluster/test-linux-opt-base.tyml b/taskcluster/test-linux-opt-base.tyml index 59bb7d43..25a9d322 100644 --- a/taskcluster/test-linux-opt-base.tyml +++ b/taskcluster/test-linux-opt-base.tyml @@ -43,8 +43,8 @@ then: DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_tflite}/artifacts/public DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb - DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb - DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm DECODER_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public PIP_DEFAULT_TIMEOUT: "60" EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}" diff --git a/taskcluster/test-raspbian-opt-base.tyml b/taskcluster/test-raspbian-opt-base.tyml index 1c333432..444cec05 100644 --- a/taskcluster/test-raspbian-opt-base.tyml +++ b/taskcluster/test-raspbian-opt-base.tyml @@ -38,8 +38,8 @@ then: DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_build}/artifacts/public DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb - DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb - DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm PIP_DEFAULT_TIMEOUT: "60" PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple" EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Raspbian Stretch / PiWheels diff --git a/taskcluster/test-win-opt-base.tyml b/taskcluster/test-win-opt-base.tyml index d3249325..9bdb2faa 100644 --- a/taskcluster/test-win-opt-base.tyml +++ b/taskcluster/test-win-opt-base.tyml @@ -45,8 +45,8 @@ then: DEEPSPEECH_ARTIFACTS_TFLITE_ROOT: https://queue.taskcluster.net/v1/task/${win_amd64_tflite}/artifacts/public DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package_cpu}/artifacts/public DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb - DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pb - DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.0/output_graph.pbmm + DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pb + DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.6.0-alpha.4/output_graph.pbmm EXPECTED_TENSORFLOW_VERSION: "${build.tensorflow_git_desc}" TC_MSYS_VERSION: 'MSYS_NT-6.3' MSYS: 'winsymlinks:nativestrict'