From 69aa316c88ab3f709dd642223d7f51aa68ceb147 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 6 Nov 2018 16:37:33 +0100 Subject: [PATCH] Add TFLite engine --- native_client/BUILD | 81 ++++++++------- native_client/deepspeech.cc | 161 +++++++++++++++++++++++++++-- native_client/kenlm/README.mozilla | 16 +++ native_client/kenlm/util/file.cc | 2 +- 4 files changed, 217 insertions(+), 43 deletions(-) diff --git a/native_client/BUILD b/native_client/BUILD index c86a3237..0ca2f3ce 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -3,6 +3,9 @@ load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object", "if_cuda") +load("@org_tensorflow//tensorflow/contrib/lite:build_def.bzl", + "tflite_copts", "tflite_linkopts") + genrule( name = "ds_git_version", outs = ["ds_version.h"], @@ -55,47 +58,53 @@ tf_cc_shared_object( DECODER_SOURCES, # -Wno-sign-compare to silent a lot of warnings from tensorflow itself, # which makes it harder to see our own warnings - copts = ["-Wno-sign-compare", "-fvisibility=hidden"], + copts = ["-Wno-sign-compare", "-fvisibility=hidden"] + tflite_copts(), linkopts = select({ "//tensorflow:darwin": [], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, "//tensorflow:rpi3": LINUX_LINKOPTS + ["-l:libstdc++.a"], "//tensorflow:rpi3-armv8": LINUX_LINKOPTS + ["-l:libstdc++.a"], - }), - deps = [ - "//tensorflow/core:core_cpu", - "//tensorflow/core:direct_session", - "//third_party/eigen3", - #"//tensorflow/core:all_kernels", - ### => Trying to be more fine-grained - ### Use bin/ops_in_graph.py to list all the ops used by a frozen graph. - ### CPU only build, libdeepspeech.so file size reduced by ~50% - "//tensorflow/core/kernels:dense_update_ops", # Assign - "//tensorflow/core/kernels:constant_op", # Const - "//tensorflow/core/kernels:immutable_constant_op", # ImmutableConst - "//tensorflow/core/kernels:identity_op", # Identity - "//tensorflow/core/kernels:softmax_op", # Softmax - "//tensorflow/core/kernels:transpose_op", # Transpose - "//tensorflow/core/kernels:reshape_op", # Reshape - "//tensorflow/core/kernels:shape_ops", # Shape - "//tensorflow/core/kernels:concat_op", # ConcatV2 - "//tensorflow/core/kernels:split_op", # Split - "//tensorflow/core/kernels:variable_ops", # VariableV2 - "//tensorflow/core/kernels:relu_op", # Relu - "//tensorflow/core/kernels:bias_op", # BiasAdd - "//tensorflow/core/kernels:math", # Range, MatMul - "//tensorflow/core/kernels:control_flow_ops", # Enter - "//tensorflow/core/kernels:tile_ops", # Tile - "//tensorflow/core/kernels:gather_op", # Gather - "//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM - "//tensorflow/core/kernels:random_ops", # RandomGammaGrad - "//tensorflow/core/kernels:pack_op", # Pack - "//tensorflow/core/kernels:gather_nd_op", # GatherNd - #### Needed by production model produced without "--use_seq_length False" - #"//tensorflow/core/kernels:logging_ops", # Assert - #"//tensorflow/core/kernels:reverse_sequence_op", # ReverseSequence - ] + if_cuda([ - "//tensorflow/core:core", + "//conditions:default": [] + }) + tflite_linkopts(), + deps = select({ + "//tensorflow:android": [ + "//tensorflow/contrib/lite/kernels:builtin_ops", + ], + "//conditions:default": [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:direct_session", + "//third_party/eigen3", + #"//tensorflow/core:all_kernels", + ### => Trying to be more fine-grained + ### Use bin/ops_in_graph.py to list all the ops used by a frozen graph. + ### CPU only build, libdeepspeech.so file size reduced by ~50% + "//tensorflow/core/kernels:dense_update_ops", # Assign + "//tensorflow/core/kernels:constant_op", # Const + "//tensorflow/core/kernels:immutable_constant_op", # ImmutableConst + "//tensorflow/core/kernels:identity_op", # Identity + "//tensorflow/core/kernels:softmax_op", # Softmax + "//tensorflow/core/kernels:transpose_op", # Transpose + "//tensorflow/core/kernels:reshape_op", # Reshape + "//tensorflow/core/kernels:shape_ops", # Shape + "//tensorflow/core/kernels:concat_op", # ConcatV2 + "//tensorflow/core/kernels:split_op", # Split + "//tensorflow/core/kernels:variable_ops", # VariableV2 + "//tensorflow/core/kernels:relu_op", # Relu + "//tensorflow/core/kernels:bias_op", # BiasAdd + "//tensorflow/core/kernels:math", # Range, MatMul + "//tensorflow/core/kernels:control_flow_ops", # Enter + "//tensorflow/core/kernels:tile_ops", # Tile + "//tensorflow/core/kernels:gather_op", # Gather + "//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM + "//tensorflow/core/kernels:random_ops", # RandomGammaGrad + "//tensorflow/core/kernels:pack_op", # Pack + "//tensorflow/core/kernels:gather_nd_op", # GatherNd + #### Needed by production model produced without "--use_seq_length False" + #"//tensorflow/core/kernels:logging_ops", # Assert + #"//tensorflow/core/kernels:reverse_sequence_op", # ReverseSequence + ], + }) + if_cuda([ + "//tensorflow/core:core", ]), includes = ["c_speech_features", "kiss_fft130"] + DECODER_INCLUDES, defines = ["KENLM_MAX_ORDER=6"], diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc index 6f627a83..676586c7 100644 --- a/native_client/deepspeech.cc +++ b/native_client/deepspeech.cc @@ -9,12 +9,20 @@ #include "deepspeech.h" #include "alphabet.h" +#ifndef USE_TFLITE #include "tensorflow/core/public/version.h" +#endif // USE_TFLITE + #include "native_client/ds_version.h" -#include "tensorflow/core/public/session.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/util/memmapped_file_system.h" +#ifndef USE_TFLITE + #include "tensorflow/core/public/session.h" + #include "tensorflow/core/platform/env.h" + #include "tensorflow/core/util/memmapped_file_system.h" +#else // USE_TFLITE + #include "tensorflow/contrib/lite/model.h" + #include "tensorflow/contrib/lite/kernels/register.h" +#endif // USE_TFLITE #include "c_speech_features.h" @@ -51,7 +59,11 @@ std::array calc_hamming_window() { std::array hamming_window = calc_hamming_window(); -using namespace tensorflow; +#ifndef USE_TFLITE + using namespace tensorflow; +#else + using namespace tflite; +#endif using std::vector; @@ -104,9 +116,14 @@ struct StreamingState { }; struct ModelState { +#ifndef USE_TFLITE MemmappedEnv* mmap_env; Session* session; GraphDef graph_def; +#else // USE_TFLITE + std::unique_ptr interpreter; + std::unique_ptr fbmodel; +#endif // USE_TFLITE unsigned int ncep; unsigned int ncontext; Alphabet* alphabet; @@ -116,6 +133,12 @@ struct ModelState { unsigned int mfcc_feats_per_timestep; unsigned int n_context; +#ifdef USE_TFLITE + size_t previous_state_size; + std::unique_ptr previous_state_c_; + std::unique_ptr previous_state_h_; +#endif + ModelState(); ~ModelState(); @@ -144,8 +167,14 @@ struct ModelState { }; ModelState::ModelState() - : mmap_env(nullptr) + : +#ifndef USE_TFLITE + mmap_env(nullptr) , session(nullptr) +#else // USE_TFLITE + interpreter(nullptr) + , fbmodel(nullptr) +#endif // USE_TFLITE , ncep(0) , ncontext(0) , alphabet(nullptr) @@ -154,20 +183,27 @@ ModelState::ModelState() , n_steps(-1) , mfcc_feats_per_timestep(-1) , n_context(-1) +#ifdef USE_TFLITE + , previous_state_size(0) + , previous_state_c_(nullptr) + , previous_state_h_(nullptr) +#endif { } ModelState::~ModelState() { +#ifndef USE_TFLITE if (session) { Status status = session->Close(); if (!status.ok()) { std::cerr << "Error closing TensorFlow session: " << status << std::endl; } } + delete mmap_env; +#endif // USE_TFLITE delete scorer; - delete mmap_env; delete alphabet; } @@ -293,6 +329,7 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector& logi { const size_t num_classes = alphabet->GetSize() + 1; // +1 for blank +#ifndef USE_TFLITE Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, 2*n_context+1, MFCC_FEATURES})); auto input_mapped = input.flat(); @@ -322,6 +359,41 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector& logi for (int t = 0; t < n_frames * BATCH_SIZE * num_classes; ++t) { logits_output.push_back(logits_mapped(t)); } +#else // USE_TFLITE + // Feeding input_node + float* input_node = interpreter->typed_tensor(interpreter->inputs()[0]); + { + int i; + for (i = 0; i < n_frames*mfcc_feats_per_timestep; ++i) { + input_node[i] = aMfcc[i]; + } + for (; i < n_steps*mfcc_feats_per_timestep; ++i) { + input_node[i] = 0; + } + } + + assert(previous_state_size > 0); + + // Feeding previous_state_c, previous_state_h + memcpy(interpreter->typed_tensor(interpreter->inputs()[1]), previous_state_c_.get(), sizeof(float) * previous_state_size); + memcpy(interpreter->typed_tensor(interpreter->inputs()[2]), previous_state_h_.get(), sizeof(float) * previous_state_size); + + TfLiteStatus status = interpreter->Invoke(); + if (status != kTfLiteOk) { + std::cerr << "Error running session: " << status << "\n"; + return; + } + + float* outputs = interpreter->typed_tensor(interpreter->outputs()[0]); + + // The CTCDecoder works with log-probs. + for (int t = 0; t < n_frames * BATCH_SIZE * num_classes; ++t) { + logits_output.push_back(outputs[t]); + } + + memcpy(previous_state_c_.get(), interpreter->typed_tensor(interpreter->outputs()[1]), sizeof(float) * previous_state_size); + memcpy(previous_state_h_.get(), interpreter->typed_tensor(interpreter->outputs()[2]), sizeof(float) * previous_state_size); +#endif // USE_TFLITE } char* @@ -352,7 +424,9 @@ DS_CreateModel(const char* aModelPath, ModelState** retval) { std::unique_ptr model(new ModelState()); +#ifndef USE_TFLITE model->mmap_env = new MemmappedEnv(Env::Default()); +#endif // USE_TFLITE model->ncep = aNCep; model->ncontext = aNContext; model->alphabet = new Alphabet(aAlphabetConfigPath); @@ -364,9 +438,14 @@ DS_CreateModel(const char* aModelPath, if (!aModelPath || strlen(aModelPath) < 1) { std::cerr << "No model specified, cannot continue." << std::endl; +#ifndef USE_TFLITE return error::INVALID_ARGUMENT; +#else // USE_TFLITE + return EINVAL; +#endif // USE_TFLITE } +#ifndef USE_TFLITE Status status; SessionOptions options; @@ -448,6 +527,62 @@ DS_CreateModel(const char* aModelPath, *retval = model.release(); return tensorflow::error::OK; +#else // USE_TFLITE + TfLiteStatus status; + + model->fbmodel = tflite::FlatBufferModel::BuildFromFile(aModelPath); + if (status != kTfLiteOk) { + std::cerr << status << std::endl; + return status; + } + + + tflite::ops::builtin::BuiltinOpResolver resolver; + status = tflite::InterpreterBuilder(*model->fbmodel, resolver)(&model->interpreter); + if (status != kTfLiteOk) { + std::cerr << status << std::endl; + return status; + } + + model->interpreter->AllocateTensors(); + model->interpreter->SetNumThreads(4); + + TfLiteIntArray* dims_input_node = model->interpreter->tensor(model->interpreter->inputs()[0])->dims; + + model->n_steps = dims_input_node->data[1]; + model->n_context = (dims_input_node->data[2] - 1 ) / 2; + model->mfcc_feats_per_timestep = dims_input_node->data[2] * dims_input_node->data[3]; + + TfLiteIntArray* dims_logits = model->interpreter->tensor(model->interpreter->outputs()[0])->dims; + const int final_dim_size = dims_logits->data[1] - 1; + if (final_dim_size != model->alphabet->GetSize()) { + std::cerr << "Error: Alphabet size does not match loaded model: alphabet " + << "has size " << model->alphabet->GetSize() + << ", but model has " << final_dim_size + << " classes in its output. Make sure you're passing an alphabet " + << "file with the same size as the one used for training." + << std::endl; + return EINVAL; + } + + const int previous_state_c_id = model->interpreter->inputs()[1]; + const int previous_state_h_id = model->interpreter->inputs()[2]; + + TfLiteIntArray* dims_c = model->interpreter->tensor(previous_state_c_id)->dims; + TfLiteIntArray* dims_h = model->interpreter->tensor(previous_state_h_id)->dims; + assert(dims_c->data[1] == dims_h->data[1]); + + model->previous_state_size = dims_c->data[1]; + model->previous_state_c_.reset(new float[model->previous_state_size]()); + model->previous_state_h_.reset(new float[model->previous_state_size]()); + + // Set initial values for previous_state_c and previous_state_h + memset(model->previous_state_c_.get(), 0, sizeof(float) * model->previous_state_size); + memset(model->previous_state_h_.get(), 0, sizeof(float) * model->previous_state_size); + + *retval = model.release(); + return kTfLiteOk; +#endif // USE_TFLITE } void @@ -483,7 +618,11 @@ DS_SpeechToText(ModelState* aCtx, { StreamingState* ctx; int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx); +#ifndef USE_TFLITE if (status != tensorflow::error::OK) { +#else // USE_TFLITE + if (status != kTfLiteOk) { +#endif // USE_TFLITE return nullptr; } DS_FeedAudioContent(ctx, aBuffer, aBufferSize); @@ -498,16 +637,22 @@ DS_SetupStream(ModelState* aCtx, { *retval = nullptr; +#ifndef USE_TFLITE Status status = aCtx->session->Run({}, {}, {"initialize_state"}, nullptr); if (!status.ok()) { std::cerr << "Error running session: " << status << std::endl; return status.code(); } +#endif // USE_TFLITE std::unique_ptr ctx(new StreamingState()); if (!ctx) { std::cerr << "Could not allocate streaming state." << std::endl; +#ifndef USE_TFLITE return status.code(); +#else // USE_TFLITE + return ENOMEM; +#endif // USE_TFLITE } const size_t num_classes = aCtx->alphabet->GetSize() + 1; // +1 for blank @@ -528,7 +673,11 @@ DS_SetupStream(ModelState* aCtx, ctx->model = aCtx; *retval = ctx.release(); +#ifndef USE_TFLITE return tensorflow::error::OK; +#else // USE_TFLITE + return kTfLiteOk; +#endif // USE_TFLITE } void diff --git a/native_client/kenlm/README.mozilla b/native_client/kenlm/README.mozilla index 5ae7d64c..7bad32fd 100644 --- a/native_client/kenlm/README.mozilla +++ b/native_client/kenlm/README.mozilla @@ -10,3 +10,19 @@ rm -rf windows include lm/filter lm/builder util/stream util/getopt.* python This was done in order to ensure uniqueness of double_conversion: git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/double_conversion/kenlm_double_conversion/g' + +Please apply this patch to be able to build on Android: +diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc +index d53dc0a..b5e36b2 100644 +--- a/native_client/kenlm/util/file.cc ++++ b/native_client/kenlm/util/file.cc +@@ -540,7 +540,7 @@ std::string DefaultTempDirectory() { + const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0}; + for (int i=0; vars[i]; ++i) { + char *val = +-#if defined(_GNU_SOURCE) ++#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) + #if __GLIBC_PREREQ(2,17) + secure_getenv + #else // __GLIBC_PREREQ + diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc index d53dc0a4..b5e36b20 100644 --- a/native_client/kenlm/util/file.cc +++ b/native_client/kenlm/util/file.cc @@ -540,7 +540,7 @@ std::string DefaultTempDirectory() { const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0}; for (int i=0; vars[i]; ++i) { char *val = -#if defined(_GNU_SOURCE) +#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ) #if __GLIBC_PREREQ(2,17) secure_getenv #else // __GLIBC_PREREQ