From 3dad37eb05153cb935183f26e796a1437812d9af Mon Sep 17 00:00:00 2001
From: Alexandre Lissy <lissyx@lissyx.dyndns.org>
Date: Thu, 20 Sep 2018 13:04:32 +0200
Subject: [PATCH] Remove AOT

---
 bin/benchmark_nc.py                      | 34 ++-------
 bin/benchmark_plotter.py                 |  5 +-
 native_client/BUILD                      | 59 +--------------
 native_client/README.md                  | 26 -------
 native_client/deepspeech.bzl             |  5 --
 native_client/deepspeech.cc              | 90 +++++++----------------
 native_client/model_size.sh              | 11 ---
 native_client/tfcompile.config.pbtxt.src | 32 --------
 taskcluster/aot-test-model-build.sh      | 29 --------
 taskcluster/arm64-build.sh               | 11 ---
 taskcluster/host-build.sh                | 11 ---
 taskcluster/rpi3-build.sh                | 11 ---
 taskcluster/test-armbian-opt-base.tyml   |  2 -
 taskcluster/test-linux-opt-base.tyml     |  2 -
 taskcluster/test-raspbian-opt-base.tyml  |  2 -
 tc-benchmark-tests.sh                    | 29 ++------
 tc-cpp-ds-tests.sh                       |  4 +-
 tc-node-tests.sh                         |  7 +-
 tc-python-tests.sh                       |  7 +-
 tc-tests-utils.sh                        | 94 +-----------------------
 20 files changed, 45 insertions(+), 426 deletions(-)
 delete mode 100644 native_client/deepspeech.bzl
 delete mode 100755 native_client/model_size.sh
 delete mode 100644 native_client/tfcompile.config.pbtxt.src
 delete mode 100755 taskcluster/aot-test-model-build.sh

diff --git a/bin/benchmark_nc.py b/bin/benchmark_nc.py
index cb42a976..4160ca83 100755
--- a/bin/benchmark_nc.py
+++ b/bin/benchmark_nc.py
@@ -125,12 +125,10 @@ def maybe_inspect_zip(models):
     that were inside.
     '''
 
-    if len(models) > 1:
+    if not(is_zip_file(models)):
         return models
 
-    # With AOT, we may have just one file that is not a ZIP file
-    # so verify that we don't have a .zip extension
-    if not(is_zip_file(models)):
+    if len(models) > 1:
         return models
 
     if len(models) < 1:
@@ -376,23 +374,18 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru
 
     return ssh_conn
 
-def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1, extra_aot_model=None):
+def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1):
     r'''
     Core of the running of the benchmarks. We will run on all of models, against
     the WAV file provided as wav, and the provided alphabet.
-
-    If supplied extra_aot_model, add another pass with the .so built AOT model.
     '''
 
     assert_valid_dir(dir)
 
     inference_times = [ ]
 
-    if extra_aot_model:
-        models.append(extra_aot_model)
-
     for model in models:
-        model_filename = '' if model is extra_aot_model else model
+        model_filename = model
 
         current_model = {
           'name':   model,
@@ -457,8 +450,6 @@ def handle_args():
                                  help='Local directory where to copy stuff. This will be mirrored to the remote system if needed (make sure to use path that will work on both).')
     parser.add_argument('--models', nargs='+', required=False,
                                  help='List of files (protocolbuffer) to work on. Might be a zip file.')
-    parser.add_argument('--so-model', required=False,
-                                 help='Perform one step using AOT-based .so model')
     parser.add_argument('--wav', required=False,
                                  help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
     parser.add_argument('--alphabet', required=False,
@@ -483,19 +474,6 @@ def do_main():
     if not cli_args.models or not cli_args.wav or not cli_args.alphabet:
         raise AssertionError('Missing arguments (models, wav or alphabet)')
 
-    if cli_args.so_model:
-        '''
-        Verify we have a string that matches the format described in
-        reduce_filename above: NAME.aot.EPOCHS.XXX.YYY.so
-         - Where XXX is a variation on the model size for example
-         - And where YYY is a const related to the training dataset
-        '''
-
-        parts = cli_args.so_model.split('.')
-        assert len(parts) == 6
-        assert parts[1]   == 'aot'
-        assert parts[-1]  == 'so'
-
     if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
         raise AssertionError('Inexistent temp directory')
 
@@ -514,9 +492,9 @@ def do_main():
     if cli_args.lm_binary and cli_args.trie:
         dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
         dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
-        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, extra_aot_model=cli_args.so_model, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
+        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
     else:
-        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, extra_aot_model=cli_args.so_model, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
+        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
 
     if cli_args.csv:
         produce_csv(input=inference_times, output=cli_args.csv)
diff --git a/bin/benchmark_plotter.py b/bin/benchmark_plotter.py
index e6da2a38..4584ad36 100755
--- a/bin/benchmark_plotter.py
+++ b/bin/benchmark_plotter.py
@@ -35,10 +35,7 @@ def reduce_filename(f):
     '''
 
     f = os.path.basename(f).split('.')
-    if f[1] == 'aot':
-        return 'AOT:' + str(keep_only_digits(f[-3]))
-    else:
-        return keep_only_digits(f[-3])
+    return keep_only_digits(f[-3])
 
 def ingest_csv(datasets=None, range=None):
     existing_files = filter(lambda x: os.path.isfile(x[1]), datasets)
diff --git a/native_client/BUILD b/native_client/BUILD
index 2b59a426..693963c8 100644
--- a/native_client/BUILD
+++ b/native_client/BUILD
@@ -1,49 +1,7 @@
 # Description: Deepspeech native client library.
 
 load("@org_tensorflow//tensorflow:tensorflow.bzl",
-     "if_linux_x86_64", "tf_cc_shared_object", "if_cuda")
-
-load("@org_tensorflow//tensorflow/compiler/aot:tfcompile.bzl",
-     "tf_library")
-
-load(":deepspeech.bzl", "if_native_model")
-
-config_setting(
-    name = "ds_native_model",
-    values = {
-        "define": "DS_NATIVE_MODEL=1"
-    }
-)
-
-tf_library(
-    name = "deepspeech_model_core",
-    cpp_class = "DeepSpeech::nativeModel",
-    # We don't need tests or benchmark binaries
-    gen_test=False, gen_benchmark=False,
-    # graph and config will be generated at build time thanks to the matching
-    # genrule.
-    graph = "tfcompile.model.pb",
-    config = "tfcompile.config.pbtxt",
-    # This depends on //tensorflow:rpi3 condition defined in mozilla/tensorflow
-    tfcompile_flags = select({
-        "//tensorflow:rpi3": str('--target_cpu="cortex-a53"'),
-        "//conditions:default": str('')
-    }),
-)
-
-genrule(
-    name = "tfcompile.config",
-    srcs = ["tfcompile.config.pbtxt.src"],
-    outs = ["tfcompile.config.pbtxt"],
-    cmd = "$(location :model_size.sh) $(SRCS) $(DS_MODEL_TIMESTEPS) $(DS_MODEL_FRAMESIZE) >$@",
-    tools = [":model_size.sh"]
-)
-
-genrule(
-    name = "tfcompile.model",
-    outs = ["tfcompile.model.pb"],
-    cmd = "cp $(DS_MODEL_FILE) $@"
-)
+     "tf_cc_shared_object", "if_cuda")
 
 genrule(
     name = "ds_git_version",
@@ -68,17 +26,13 @@ tf_cc_shared_object(
             "kiss_fft130/_kiss_fft_guts.h",
             "kiss_fft130/tools/kiss_fftr.h",
             "ds_version.h"] +
-           if_native_model(["deepspeech_model_core.h"]) +
            glob(["kenlm/lm/*.cc", "kenlm/util/*.cc", "kenlm/util/double-conversion/*.cc",
                  "kenlm/lm/*.hh", "kenlm/util/*.hh", "kenlm/util/double-conversion/*.h"],
                 exclude = ["kenlm/*/*test.cc", "kenlm/*/*main.cc"]) +
            glob(["boost_locale/**/*.hpp"]),
     # -Wno-sign-compare to silent a lot of warnings from tensorflow itself,
     # which makes it harder to see our own warnings
-    copts = ["-Wno-sign-compare", "-fvisibility=hidden"] + if_native_model([
-        "-DDS_MODEL_TIMESTEPS=$(DS_MODEL_TIMESTEPS)",
-        "-DDS_NATIVE_MODEL=1",
-    ]),
+    copts = ["-Wno-sign-compare", "-fvisibility=hidden"],
     deps = [
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:direct_session",
@@ -108,20 +62,13 @@ tf_cc_shared_object(
         #### Needed by production model produced without "--use_seq_length False"
         #"//tensorflow/core/kernels:logging_ops",         # Assert
         #"//tensorflow/core/kernels:reverse_sequence_op", # ReverseSequence
-    ] + if_native_model([
-        "//tensorflow/compiler/tf2xla:xla_compiled_cpu_function",
-    ])+ if_cuda([
+    ] + if_cuda([
         "//tensorflow/core:core",
     ]),
     includes = ["kenlm", "boost_locale", "c_speech_features", "kiss_fft130"],
     defines = ["KENLM_MAX_ORDER=6"],
 )
 
-tf_cc_shared_object(
-    name = "libdeepspeech_model.so",
-    deps = [":deepspeech_model_core"]
-)
-
 tf_cc_shared_object(
     name = "libctc_decoder_with_kenlm.so",
     srcs = [
diff --git a/native_client/README.md b/native_client/README.md
index ed53e303..b44b6bf3 100644
--- a/native_client/README.md
+++ b/native_client/README.md
@@ -92,32 +92,6 @@ cd ../DeepSpeech/native_client
 make deepspeech
 ```
 
-## Building with AOT model
-
-First, please note that this is still experimental. AOT model relies on TensorFlow's [AOT tfcompile](https://www.tensorflow.org/performance/xla/tfcompile) tooling. It takes a protocol buffer file graph as input, and produces a .so library that one can call from C++ code.
-To experiment, you will need to build TensorFlow from [github.com/mozilla/tensorflow r1.6 branch](https://github.com/mozilla/tensorflow/tree/r1.6). Follow TensorFlow's documentation for the configuration of your system.
-When building, you will have to add some extra parameter and targets.
-
-Bazel defines:
-* `--define=DS_NATIVE_MODEL=1`: to toggle AOT support.
-* `--define=DS_MODEL_TIMESTEPS=x`: to define how many timesteps you want to handle. Relying on prebuilt model implies we need to use a fixed value for how much audio value we want to use. Timesteps defines that value, and an audio file bigger than this will just be dealt with over several samples. This means there's a compromise between quality and minimum audio segment you want to handle.
-* `--define=DS_MODEL_FRAMESIZE=y`: to define your model framesize, this is the second component of your model's input layer shape. Can be extracted using TensorFlow's `summarize_graph` tool.
-* `--define=DS_MODEL_FILE=/path/to/graph.pb`: the model you want to use
-
-Bazel targets:
-* `//native_client:libdeepspeech_model.so`: to produce `libdeepspeech_model.so`
-
-In the end, the previous example becomes (no change for `libctc_decoder_with_kenlm.so`):
-
-```
-bazel build --config=monolithic -c opt --copt=-O3 --copt=-fvisibility=hidden --define=DS_NATIVE_MODEL=1 --define=DS_MODEL_TIMESTEPS=64 --define=DS_MODEL_FRAMESIZE=494 --define=DS_MODEL_FILE=/tmp/model.ldc93s1.pb //native_client:libdeepspeech_model.so //native_client:libdeepspeech.so //native_client:generate_trie
-```
-
-Later, when building either `deepspeech` binaries or bindings, you will have to add some extra variables to your `make` command-line (assuming `TFDIR` points to your TensorFlow's git clone):
-```
-EXTRA_LIBS="-ldeepspeech_model"
-```
-
 ## Installing
 
 After building, the library files and binary can optionally be installed to a system path for ease of development. This is also a required step for bindings generation.
diff --git a/native_client/deepspeech.bzl b/native_client/deepspeech.bzl
deleted file mode 100644
index 6f3e53f1..00000000
--- a/native_client/deepspeech.bzl
+++ /dev/null
@@ -1,5 +0,0 @@
-def if_native_model(a):
-    return select({
-        ":ds_native_model": a,
-        "//conditions:default": []
-    })
diff --git a/native_client/deepspeech.cc b/native_client/deepspeech.cc
index 6a54b986..5714496c 100644
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@@ -1,12 +1,3 @@
-#ifdef DS_NATIVE_MODEL
-#define EIGEN_USE_THREADS
-#define EIGEN_USE_CUSTOM_THREAD_POOL
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "native_client/deepspeech_model_core.h" // generated
-#endif
-
 #include <algorithm>
 #include <iostream>
 #include <memory>
@@ -109,7 +100,6 @@ struct ModelState {
   Alphabet* alphabet;
   KenLMBeamScorer* scorer;
   unsigned int beam_width;
-  bool run_aot;
   unsigned int n_steps;
   unsigned int mfcc_feats_per_timestep;
   unsigned int n_context;
@@ -149,7 +139,6 @@ ModelState::ModelState()
   , alphabet(nullptr)
   , scorer(nullptr)
   , beam_width(0)
-  , run_aot(false)
   , n_steps(-1)
   , mfcc_feats_per_timestep(-1)
   , n_context(-1)
@@ -297,61 +286,34 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector<float>& logi
 {
   const size_t num_classes = alphabet->GetSize() + 1; // +1 for blank
 
-  if (run_aot) {
-#ifdef DS_NATIVE_MODEL
-    Eigen::ThreadPool tp(2);  // Size the thread pool as appropriate.
-    Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
+  Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, 2*n_context+1, MFCC_FEATURES}));
 
-    nativeModel nm(nativeModel::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
-    nm.set_thread_pool(&device);
+  auto input_mapped = input.flat<float>();
+  int i;
+  for (i = 0; i < n_frames*mfcc_feats_per_timestep; ++i) {
+    input_mapped(i) = aMfcc[i];
+  }
+  for (; i < n_steps*mfcc_feats_per_timestep; ++i) {
+    input_mapped(i) = 0;
+  }
 
-    for (int ot = 0; ot < n_frames; ot += DS_MODEL_TIMESTEPS) {
-      nm.set_arg0_data(&(aMfcc[ot * mfcc_feats_per_timestep]));
-      nm.Run();
+  Tensor input_lengths(DT_INT32, TensorShape({1}));
+  input_lengths.scalar<int>()() = n_frames;
 
-      // The CTCDecoder works with log-probs.
-      for (int t = 0; t < DS_MODEL_TIMESTEPS, (ot + t) < n_frames; ++t) {
-        for (int b = 0; b < BATCH_SIZE; ++b) {
-          for (int c = 0; c < num_classes; ++c) {
-            logits_output.push_back(nm.result0(t, b, c));
-          }
-        }
-      }
-    }
-#else
-    std::cerr << "No support for native model built-in." << std::endl;
+  vector<Tensor> outputs;
+  Status status = session->Run(
+    {{"input_node", input}, {"input_lengths", input_lengths}},
+    {"logits"}, {}, &outputs);
+
+  if (!status.ok()) {
+    std::cerr << "Error running session: " << status << "\n";
     return;
-#endif // DS_NATIVE_MODEL
-  } else {
-    Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, 2*n_context+1, MFCC_FEATURES}));
+  }
 
-    auto input_mapped = input.flat<float>();
-    int i;
-    for (i = 0; i < n_frames*mfcc_feats_per_timestep; ++i) {
-      input_mapped(i) = aMfcc[i];
-    }
-    for (; i < n_steps*mfcc_feats_per_timestep; ++i) {
-      input_mapped(i) = 0;
-    }
-
-    Tensor input_lengths(DT_INT32, TensorShape({1}));
-    input_lengths.scalar<int>()() = n_frames;
-
-    vector<Tensor> outputs;
-    Status status = session->Run(
-      {{"input_node", input}, {"input_lengths", input_lengths}},
-      {"logits"}, {}, &outputs);
-
-    if (!status.ok()) {
-      std::cerr << "Error running session: " << status << "\n";
-      return;
-    }
-
-    auto logits_mapped = outputs[0].flat<float>();
-    // The CTCDecoder works with log-probs.
-    for (int t = 0; t < n_frames * BATCH_SIZE * num_classes; ++t) {
-      logits_output.push_back(logits_mapped(t));
-    }
+  auto logits_mapped = outputs[0].flat<float>();
+  // The CTCDecoder works with log-probs.
+  for (int t = 0; t < n_frames * BATCH_SIZE * num_classes; ++t) {
+    logits_output.push_back(logits_mapped(t));
   }
 }
 
@@ -423,16 +385,14 @@ DS_CreateModel(const char* aModelPath,
   model->ncontext   = aNContext;
   model->alphabet   = new Alphabet(aAlphabetConfigPath);
   model->beam_width = aBeamWidth;
-  model->run_aot    = false;
 
   *retval = nullptr;
 
   DS_PrintVersions();
 
   if (!aModelPath || strlen(aModelPath) < 1) {
-    std::cerr << "No model specified, will rely on built-in model." << std::endl;
-    model->run_aot = true;
-    return 0;
+    std::cerr << "No model specified, cannot continue." << std::endl;
+    return error::INVALID_ARGUMENT;
   }
 
   Status status;
diff --git a/native_client/model_size.sh b/native_client/model_size.sh
deleted file mode 100755
index 86c3c489..00000000
--- a/native_client/model_size.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-
-# Fail if we don't have *three* arguments
-#  $1 => file to generate
-#  $2 => dimension to use for timesteps
-#  $3 => dimension to use for framesize
-if [ $# -ne 3 ]; then
-    exit 1
-fi;
-
-sed -e "s|\$DS_MODEL_TIMESTEPS|$2|g" -e "s|\$DS_MODEL_FRAMESIZE|$3|g" < $1
diff --git a/native_client/tfcompile.config.pbtxt.src b/native_client/tfcompile.config.pbtxt.src
deleted file mode 100644
index f98852a7..00000000
--- a/native_client/tfcompile.config.pbtxt.src
+++ /dev/null
@@ -1,32 +0,0 @@
-# Each feed is a positional input argument for the generated function.  The order
-# of each entry matches the order of each input argument.  Here “x_hold” and “y_hold”
-# refer to the names of placeholder nodes defined in the graph.
-#
-## From: input_tensor = tf.placeholder(tf.float32, [None, None, n_input + 2*n_input*n_context], name='input_node')
-feed {
-  id { node_name: "input_node" }
-  shape {
-    dim { size: 1 } # Batch size of 1
-    dim { size: $DS_MODEL_TIMESTEPS } # aNFrames: 146 for data/ldc93s1/LDC93S1.wav
-    dim { size: $DS_MODEL_FRAMESIZE } # frameSize: n_input + 2*n_input*n_context] ; n_input=26, n_context=9
-  }
-}
-
-#feed {
-#  id { node_name: "input_lengths" }
-#  shape {
-#    dim { size: 1 }
-#  }
-#}
-
-# Each fetch is a positional output argument for the generated function.  The order
-# of each entry matches the order of each output argument.  Here “x_y_prod”
-# refers to the name of a matmul node defined in the graph.
-#fetch {
-#  id { node_name: "output_node" }
-#}
-
-fetch {
-  id { node_name: "logits" }
-}
-
diff --git a/taskcluster/aot-test-model-build.sh b/taskcluster/aot-test-model-build.sh
deleted file mode 100755
index 2e0db422..00000000
--- a/taskcluster/aot-test-model-build.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-set -xe
-
-source $(dirname "$0")/../tc-tests-utils.sh
-
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
-
-BAZEL_TARGETS="
-//native_client:libdeepspeech.so
-${BAZEL_AOT_TARGETS}"
-
-BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
-SYSTEM_TARGET=host
-
-EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-do_get_model_parameters "${DEEPSPEECH_TEST_MODEL}" AOT_MODEL_PARAMS
-BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-
-do_bazel_build
-
-do_deepspeech_binary_build
-
-do_deepspeech_python_build
-
-do_deepspeech_nodejs_build
diff --git a/taskcluster/arm64-build.sh b/taskcluster/arm64-build.sh
index 347221b6..dddbac02 100644
--- a/taskcluster/arm64-build.sh
+++ b/taskcluster/arm64-build.sh
@@ -16,17 +16,6 @@ BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=rpi3-armv8
 SYSTEM_RASPBIAN=/tmp/multistrap-armbian64-stretch
 
-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 maybe_install_xldd
 
 do_bazel_build
diff --git a/taskcluster/host-build.sh b/taskcluster/host-build.sh
index d322ee99..66b5ba7c 100755
--- a/taskcluster/host-build.sh
+++ b/taskcluster/host-build.sh
@@ -15,17 +15,6 @@ BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
 BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=host
 
-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 do_bazel_build
 
 do_deepspeech_binary_build
diff --git a/taskcluster/rpi3-build.sh b/taskcluster/rpi3-build.sh
index cde29a0d..0d879049 100755
--- a/taskcluster/rpi3-build.sh
+++ b/taskcluster/rpi3-build.sh
@@ -16,17 +16,6 @@ BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=rpi3
 SYSTEM_RASPBIAN=/tmp/multistrap-raspbian-stretch
 
-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 maybe_install_xldd
 
 do_bazel_build
diff --git a/taskcluster/test-armbian-opt-base.tyml b/taskcluster/test-armbian-opt-base.tyml
index f07e0899..79310dbf 100644
--- a/taskcluster/test-armbian-opt-base.tyml
+++ b/taskcluster/test-armbian-opt-base.tyml
@@ -32,14 +32,12 @@ then:
     env:
       $let:
         training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_arm64_aot_prod: { $eval: as_slugid("linux-arm64-cpu-aot_prod-opt") }
         linux_arm64_build: { $eval: as_slugid("linux-arm64-cpu-opt") }
         node_package: { $eval: as_slugid("node-package") }
       in:
         CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
         DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_build}/artifacts/public
         DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_aot_prod}/artifacts/public
         DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
         DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
         DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pbmm
diff --git a/taskcluster/test-linux-opt-base.tyml b/taskcluster/test-linux-opt-base.tyml
index 95aba59d..a5c5176d 100644
--- a/taskcluster/test-linux-opt-base.tyml
+++ b/taskcluster/test-linux-opt-base.tyml
@@ -32,7 +32,6 @@ then:
     env:
       $let:
         training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_amd64_aot_test: { $eval: as_slugid("linux-amd64-cpu-aot_test-opt") }
         linux_amd64_build: { $eval: as_slugid("linux-amd64-cpu-opt") }
         linux_amd64_ctc: { $eval: as_slugid("linux-amd64-ctc-opt") }
         node_package: { $eval: as_slugid("node-package") }
@@ -40,7 +39,6 @@ then:
         CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
         DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_build}/artifacts/public
         DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_aot_test}/artifacts/public
         DEEPSPEECH_LIBCTC: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public/decoder.tar.xz
         DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
         DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
diff --git a/taskcluster/test-raspbian-opt-base.tyml b/taskcluster/test-raspbian-opt-base.tyml
index 18facd50..fdbc6d3e 100644
--- a/taskcluster/test-raspbian-opt-base.tyml
+++ b/taskcluster/test-raspbian-opt-base.tyml
@@ -32,14 +32,12 @@ then:
     env:
       $let:
         training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_rpi3_aot_prod: { $eval: as_slugid("linux-rpi3-cpu-aot_prod-opt") }
         linux_rpi3_build: { $eval: as_slugid("linux-rpi3-cpu-opt") }
         node_package: { $eval: as_slugid("node-package") }
       in:
         CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
         DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_build}/artifacts/public
         DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_aot_prod}/artifacts/public
         DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
         DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
         DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pbmm
diff --git a/tc-benchmark-tests.sh b/tc-benchmark-tests.sh
index 1b2833b5..47833cd5 100755
--- a/tc-benchmark-tests.sh
+++ b/tc-benchmark-tests.sh
@@ -8,7 +8,6 @@ exec_benchmark()
 {
     model_file="$1"
     run_postfix=$2
-    aot_model=$3
 
     mkdir -p /tmp/bench-ds/ || true
     mkdir -p /tmp/bench-ds-nolm/ || true
@@ -18,14 +17,9 @@ exec_benchmark()
     png=${TASKCLUSTER_ARTIFACTS}/benchmark-${run_postfix}.png
     svg=${TASKCLUSTER_ARTIFACTS}/benchmark-${run_postfix}.svg
 
-    AOT_MODEL_ARGS=""
-    if [ ! -z "${aot_model}" ]; then
-        AOT_MODEL_ARGS="--so-model ${aot_model}"
-    fi;
-
     python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_nc.py \
         --dir /tmp/bench-ds/ \
-        --models ${model_file} ${AOT_MODEL_ARGS} \
+        --models ${model_file} \
         --wav /tmp/LDC93S1.wav \
         --alphabet /tmp/alphabet.txt \
         --lm_binary /tmp/lm.binary \
@@ -34,7 +28,7 @@ exec_benchmark()
 
     python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_nc.py \
         --dir /tmp/bench-ds-nolm/ \
-        --models ${model_file} ${AOT_MODEL_ARGS} \
+        --models ${model_file} \
         --wav /tmp/LDC93S1.wav \
         --alphabet /tmp/alphabet.txt \
         --csv ${csv_nolm}
@@ -91,12 +85,7 @@ done;
 # Let's prepare another model for single-model codepath
 mv /tmp/${model_name} /tmp/test.frozen.e75.lstm494.ldc93s1.pb
 
-# We don't need download_material here, benchmark code should take care of it.
-if [ "$1" = "--aot" ]; then
-    export TASKCLUSTER_SCHEME=${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/native_client.tar.xz
-else
-    export TASKCLUSTER_SCHEME=${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz
-fi;
+export TASKCLUSTER_SCHEME=${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz
 
 install_pyenv "${PYENV_ROOT}"
 install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
@@ -108,15 +97,9 @@ source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate
 
 pip install -r ${DS_ROOT_TASK}/DeepSpeech/ds/requirements.txt | cat
 
-exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model_noAOT"
-exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model_noAOT"
-exec_benchmark "${model_list}" "multi-model_noAOT"
-
-if [ "$1" = "--aot" ]; then
-    exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-    exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-    exec_benchmark "${model_list}" "multi-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-fi;
+exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model"
+exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model"
+exec_benchmark "${model_list}" "multi-model"
 
 deactivate
 pyenv uninstall --force ${PYENV_NAME}
diff --git a/tc-cpp-ds-tests.sh b/tc-cpp-ds-tests.sh
index b093cb25..7acd27a7 100644
--- a/tc-cpp-ds-tests.sh
+++ b/tc-cpp-ds-tests.sh
@@ -4,9 +4,7 @@ set -xe
 
 source $(dirname "$0")/tc-tests-utils.sh
 
-aot_model=$1
-
-download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}"
+download_material "${TASKCLUSTER_TMP_DIR}/ds"
 
 export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH
 
diff --git a/tc-node-tests.sh b/tc-node-tests.sh
index f91ae379..60eac4a1 100644
--- a/tc-node-tests.sh
+++ b/tc-node-tests.sh
@@ -5,7 +5,6 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
 
 nodever=$1
-aot_model=$2
 
 if [ -z "${nodever}" ]; then
     echo "No node version given, aborting."
@@ -21,10 +20,6 @@ NODE_ROOT="${DS_ROOT_TASK}/ds-test/"
 export NODE_PATH="${NODE_ROOT}/node_modules/"
 export PATH="${NODE_PATH}/.bin/:$PATH"
 
-if [ "${aot_model}" = "--aot" ]; then
-    npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/deepspeech-${DS_VERSION}.tgz
-else
-    npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-${DS_VERSION}.tgz
-fi
+npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-${DS_VERSION}.tgz
 
 run_all_inference_tests
diff --git a/tc-python-tests.sh b/tc-python-tests.sh
index cbbc2b83..9227a242 100644
--- a/tc-python-tests.sh
+++ b/tc-python-tests.sh
@@ -5,7 +5,6 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh
 
 pyver_full=$1
-aot_model=$2
 
 if [ -z "${pyver_full}" ]; then
     echo "No python version given, aborting."
@@ -47,11 +46,7 @@ platform=$(python -c 'import sys; import platform; plat = platform.system().lowe
 whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
 deepspeech_pkg="deepspeech-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"
 
-if [ "${aot_model}" = "--aot" ]; then
-    deepspeech_pkg_url=${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/${deepspeech_pkg}
-else
-    deepspeech_pkg_url=${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg}
-fi
+deepspeech_pkg_url=${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg}
 LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} --upgrade ${deepspeech_pkg_url} | cat
 
 run_all_inference_tests
diff --git a/tc-tests-utils.sh b/tc-tests-utils.sh
index 71b6d25d..87c9f12c 100755
--- a/tc-tests-utils.sh
+++ b/tc-tests-utils.sh
@@ -28,15 +28,6 @@ export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds
 
 export BAZEL_CTC_TARGETS="//native_client:libctc_decoder_with_kenlm.so"
 
-export EXTRA_AOT_CFLAGS=""
-export EXTRA_AOT_LDFLAGS=""
-export EXTRA_AOT_LIBS="-ldeepspeech_model"
-
-export BAZEL_AOT_BUILD_FLAGS="--define=DS_NATIVE_MODEL=1 --define=DS_MODEL_TIMESTEPS=64"
-export BAZEL_AOT_TARGETS="
-//native_client:libdeepspeech_model.so
-"
-
 export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)"
 
 model_source="${DEEPSPEECH_TEST_MODEL}"
@@ -160,37 +151,6 @@ assert_correct_ldc93s1_prodmodel()
   assert_correct_inference "$1" "she had tired or so and greasy wash war or year"
 }
 
-assert_correct_ldc93s1_somodel()
-{
-    somodel_nolm=$(strip "$1")
-    somodel_withlm=$(strip "$2")
-
-    # We want to be able to return non zero value from the function, while not
-    # failing the whole execution
-    set +e
-
-    assert_correct_ldc93s1 "${somodel_nolm}"
-    so_nolm=$?
-
-    assert_correct_ldc93s1 "${somodel_withlm}"
-    so_lm=$?
-
-    set -e
-
-    # We accept that with no LM there may be errors, but we do not accept that
-    # for LM. For now.
-    if [ ${so_lm} -eq 1 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
-    then
-        exit 1
-    elif [ ${so_lm} -eq 0 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
-    then
-        exit 0
-    else
-        echo "Unexpected status"
-        exit 2
-    fi
-}
-
 assert_correct_warning_upsampling()
 {
   assert_shows_something "$1" "erratic speech recognition"
@@ -232,23 +192,6 @@ run_all_inference_tests()
 
   phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
   assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
-
-  if [ "${aot_model}" = "--aot" ]; then
-      phrase_somodel_nolm=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
-      phrase_somodel_withlm=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
-
-      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
-
-      phrase_somodel_nolm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
-      phrase_somodel_withlm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
-
-      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}"
-
-      phrase_somodel_nolm_mono_8k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
-      phrase_somodel_withlm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
-
-      assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}"
-  fi;
 }
 
 run_prod_inference_tests()
@@ -297,11 +240,6 @@ download_native_client_files()
   generic_download_tarxz "$1" "${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz"
 }
 
-download_aot_model_files()
-{
-  generic_download_tarxz "$1" "${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/native_client.tar.xz"
-}
-
 download_ctc_kenlm()
 {
   generic_download_tarxz "$1" "${DEEPSPEECH_LIBCTC}"
@@ -325,14 +263,8 @@ download_for_frozen()
 download_material()
 {
   target_dir=$1
-  maybe_aot=$2
-
-  if [ "${maybe_aot}" = "--aot" ]; then
-    download_aot_model_files "${target_dir}"
-  else
-    download_native_client_files "${target_dir}"
-  fi
 
+  download_native_client_files "${target_dir}"
   download_data
 
   ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
@@ -377,30 +309,6 @@ maybe_install_xldd()
   fi
 }
 
-do_get_model_parameters()
-{
-  local __result=$2
-  model_url=$1
-  model_file=/tmp/$(basename "${model_url}")
-
-  if [ -z "${model_url}" ]; then
-    echo "Empty URL for model"
-    exit 1
-  fi;
-
-  wget "${model_url}" -O "${model_file}"
-  wget -P "/tmp/" "${SUMMARIZE_GRAPH_BINARY}" && chmod +x /tmp/summarize_graph
-
-  if [ ! -f "${model_file}" ]; then
-    echo "No such model: ${model_file}"
-    exit 1
-  fi;
-
-  model_width=$(/tmp/summarize_graph --in_graph="${model_file}" | grep "inputs" | grep -Eo "shape=\[\?,\?,[[:digit:]]+" | cut -d',' -f3)
-
-  eval $__result="'--define=DS_MODEL_FRAMESIZE=${model_width} --define=DS_MODEL_FILE=${model_file}'"
-}
-
 # Checks whether we run a patched version of bazel.
 # Patching is required to dump computeKey() parameters to .ckd files
 # See bazel.patch