Remove AOT

2018-09-20 13:04:32 +02:00 · 2018-09-20 13:04:32 +02:00 · 3dad37eb05
commit 3dad37eb05
parent 04b7f9684e
20 changed files with 45 additions and 426 deletions
--- a/bin/benchmark_nc.py
+++ b/bin/benchmark_nc.py
@ -125,12 +125,10 @@ def maybe_inspect_zip(models):
    that were inside.
    '''

-    if len(models) > 1:
+    if not(is_zip_file(models)):
        return models

-    # With AOT, we may have just one file that is not a ZIP file
-    # so verify that we don't have a .zip extension
-    if not(is_zip_file(models)):
+    if len(models) > 1:
        return models

    if len(models) < 1:
@ -376,23 +374,18 @@ def establish_ssh(target=None, auto_trust=False, allow_agent=True, look_keys=Tru

    return ssh_conn

-def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1, extra_aot_model=None):
+def run_benchmarks(dir, models, wav, alphabet, lm_binary=None, trie=None, iters=-1):
    r'''
    Core of the running of the benchmarks. We will run on all of models, against
    the WAV file provided as wav, and the provided alphabet.
-
-    If supplied extra_aot_model, add another pass with the .so built AOT model.
    '''

    assert_valid_dir(dir)

    inference_times = [ ]

-    if extra_aot_model:
-        models.append(extra_aot_model)
-
    for model in models:
-        model_filename = '' if model is extra_aot_model else model
+        model_filename = model

        current_model = {
          'name':   model,
@ -457,8 +450,6 @@ def handle_args():
                                 help='Local directory where to copy stuff. This will be mirrored to the remote system if needed (make sure to use path that will work on both).')
    parser.add_argument('--models', nargs='+', required=False,
                                 help='List of files (protocolbuffer) to work on. Might be a zip file.')
-    parser.add_argument('--so-model', required=False,
-                                 help='Perform one step using AOT-based .so model')
    parser.add_argument('--wav', required=False,
                                 help='WAV file to pass to native_client. Supply again in plotting mode to draw realine line.')
    parser.add_argument('--alphabet', required=False,
@ -483,19 +474,6 @@ def do_main():
    if not cli_args.models or not cli_args.wav or not cli_args.alphabet:
        raise AssertionError('Missing arguments (models, wav or alphabet)')

-    if cli_args.so_model:
-        '''
-        Verify we have a string that matches the format described in
-        reduce_filename above: NAME.aot.EPOCHS.XXX.YYY.so
-         - Where XXX is a variation on the model size for example
-         - And where YYY is a const related to the training dataset
-        '''
-
-        parts = cli_args.so_model.split('.')
-        assert len(parts) == 6
-        assert parts[1]   == 'aot'
-        assert parts[-1]  == 'so'
-
    if cli_args.dir is not None and not os.path.isdir(cli_args.dir):
        raise AssertionError('Inexistent temp directory')

@ -514,9 +492,9 @@ def do_main():
    if cli_args.lm_binary and cli_args.trie:
        dest_lm_binary = os.path.join(tempdir, os.path.basename(cli_args.lm_binary))
        dest_trie = os.path.join(tempdir, os.path.basename(cli_args.trie))
-        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, extra_aot_model=cli_args.so_model, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
+        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, lm_binary=dest_lm_binary, trie=dest_trie, iters=cli_args.iters)
    else:
-        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, extra_aot_model=cli_args.so_model, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)
+        inference_times = run_benchmarks(dir=tempdir, models=dest_sorted_models, wav=dest_wav, alphabet=dest_alphabet, iters=cli_args.iters)

    if cli_args.csv:
        produce_csv(input=inference_times, output=cli_args.csv)
--- a/bin/benchmark_plotter.py
+++ b/bin/benchmark_plotter.py
@ -35,9 +35,6 @@ def reduce_filename(f):
    '''

    f = os.path.basename(f).split('.')
-    if f[1] == 'aot':
-        return 'AOT:' + str(keep_only_digits(f[-3]))
-    else:
    return keep_only_digits(f[-3])

 def ingest_csv(datasets=None, range=None):
--- a/native_client/BUILD
+++ b/native_client/BUILD
@ -1,49 +1,7 @@
 # Description: Deepspeech native client library.

 load("@org_tensorflow//tensorflow:tensorflow.bzl",
-     "if_linux_x86_64", "tf_cc_shared_object", "if_cuda")
-
-load("@org_tensorflow//tensorflow/compiler/aot:tfcompile.bzl",
-     "tf_library")
-
-load(":deepspeech.bzl", "if_native_model")
-
-config_setting(
-    name = "ds_native_model",
-    values = {
-        "define": "DS_NATIVE_MODEL=1"
-    }
-)
-
-tf_library(
-    name = "deepspeech_model_core",
-    cpp_class = "DeepSpeech::nativeModel",
-    # We don't need tests or benchmark binaries
-    gen_test=False, gen_benchmark=False,
-    # graph and config will be generated at build time thanks to the matching
-    # genrule.
-    graph = "tfcompile.model.pb",
-    config = "tfcompile.config.pbtxt",
-    # This depends on //tensorflow:rpi3 condition defined in mozilla/tensorflow
-    tfcompile_flags = select({
-        "//tensorflow:rpi3": str('--target_cpu="cortex-a53"'),
-        "//conditions:default": str('')
-    }),
-)
-
-genrule(
-    name = "tfcompile.config",
-    srcs = ["tfcompile.config.pbtxt.src"],
-    outs = ["tfcompile.config.pbtxt"],
-    cmd = "$(location :model_size.sh) $(SRCS) $(DS_MODEL_TIMESTEPS) $(DS_MODEL_FRAMESIZE) >$@",
-    tools = [":model_size.sh"]
-)
-
-genrule(
-    name = "tfcompile.model",
-    outs = ["tfcompile.model.pb"],
-    cmd = "cp $(DS_MODEL_FILE) $@"
-)
+     "tf_cc_shared_object", "if_cuda")

 genrule(
    name = "ds_git_version",
@ -68,17 +26,13 @@ tf_cc_shared_object(
            "kiss_fft130/_kiss_fft_guts.h",
            "kiss_fft130/tools/kiss_fftr.h",
            "ds_version.h"] +
-           if_native_model(["deepspeech_model_core.h"]) +
           glob(["kenlm/lm/*.cc", "kenlm/util/*.cc", "kenlm/util/double-conversion/*.cc",
                 "kenlm/lm/*.hh", "kenlm/util/*.hh", "kenlm/util/double-conversion/*.h"],
                exclude = ["kenlm/*/*test.cc", "kenlm/*/*main.cc"]) +
           glob(["boost_locale/**/*.hpp"]),
    # -Wno-sign-compare to silent a lot of warnings from tensorflow itself,
    # which makes it harder to see our own warnings
-    copts = ["-Wno-sign-compare", "-fvisibility=hidden"] + if_native_model([
-        "-DDS_MODEL_TIMESTEPS=$(DS_MODEL_TIMESTEPS)",
-        "-DDS_NATIVE_MODEL=1",
-    ]),
+    copts = ["-Wno-sign-compare", "-fvisibility=hidden"],
    deps = [
        "//tensorflow/core:core_cpu",
        "//tensorflow/core:direct_session",
@ -108,20 +62,13 @@ tf_cc_shared_object(
        #### Needed by production model produced without "--use_seq_length False"
        #"//tensorflow/core/kernels:logging_ops",         # Assert
        #"//tensorflow/core/kernels:reverse_sequence_op", # ReverseSequence
-    ] + if_native_model([
-        "//tensorflow/compiler/tf2xla:xla_compiled_cpu_function",
-    ])+ if_cuda([
+    ] + if_cuda([
        "//tensorflow/core:core",
    ]),
    includes = ["kenlm", "boost_locale", "c_speech_features", "kiss_fft130"],
    defines = ["KENLM_MAX_ORDER=6"],
 )

-tf_cc_shared_object(
-    name = "libdeepspeech_model.so",
-    deps = [":deepspeech_model_core"]
-)
-
 tf_cc_shared_object(
    name = "libctc_decoder_with_kenlm.so",
    srcs = [
--- a/native_client/README.md
+++ b/native_client/README.md
@ -92,32 +92,6 @@ cd ../DeepSpeech/native_client
 make deepspeech
 ```

-## Building with AOT model
-
-First, please note that this is still experimental. AOT model relies on TensorFlow's [AOT tfcompile](https://www.tensorflow.org/performance/xla/tfcompile) tooling. It takes a protocol buffer file graph as input, and produces a .so library that one can call from C++ code.
-To experiment, you will need to build TensorFlow from [github.com/mozilla/tensorflow r1.6 branch](https://github.com/mozilla/tensorflow/tree/r1.6). Follow TensorFlow's documentation for the configuration of your system.
-When building, you will have to add some extra parameter and targets.
-
-Bazel defines:
-* `--define=DS_NATIVE_MODEL=1`: to toggle AOT support.
-* `--define=DS_MODEL_TIMESTEPS=x`: to define how many timesteps you want to handle. Relying on prebuilt model implies we need to use a fixed value for how much audio value we want to use. Timesteps defines that value, and an audio file bigger than this will just be dealt with over several samples. This means there's a compromise between quality and minimum audio segment you want to handle.
-* `--define=DS_MODEL_FRAMESIZE=y`: to define your model framesize, this is the second component of your model's input layer shape. Can be extracted using TensorFlow's `summarize_graph` tool.
-* `--define=DS_MODEL_FILE=/path/to/graph.pb`: the model you want to use
-
-Bazel targets:
-* `//native_client:libdeepspeech_model.so`: to produce `libdeepspeech_model.so`
-
-In the end, the previous example becomes (no change for `libctc_decoder_with_kenlm.so`):
-
-```
-bazel build --config=monolithic -c opt --copt=-O3 --copt=-fvisibility=hidden --define=DS_NATIVE_MODEL=1 --define=DS_MODEL_TIMESTEPS=64 --define=DS_MODEL_FRAMESIZE=494 --define=DS_MODEL_FILE=/tmp/model.ldc93s1.pb //native_client:libdeepspeech_model.so //native_client:libdeepspeech.so //native_client:generate_trie
-```
-
-Later, when building either `deepspeech` binaries or bindings, you will have to add some extra variables to your `make` command-line (assuming `TFDIR` points to your TensorFlow's git clone):
-```
-EXTRA_LIBS="-ldeepspeech_model"
-```
-
 ## Installing

 After building, the library files and binary can optionally be installed to a system path for ease of development. This is also a required step for bindings generation.
--- a/native_client/deepspeech.bzl
+++ b/native_client/deepspeech.bzl
@ -1,5 +0,0 @@
-def if_native_model(a):
-    return select({
-        ":ds_native_model": a,
-        "//conditions:default": []
-    })
--- a/native_client/deepspeech.cc
+++ b/native_client/deepspeech.cc
@ -1,12 +1,3 @@
-#ifdef DS_NATIVE_MODEL
-#define EIGEN_USE_THREADS
-#define EIGEN_USE_CUSTOM_THREAD_POOL
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "native_client/deepspeech_model_core.h" // generated
-#endif
-
 #include <algorithm>
 #include <iostream>
 #include <memory>
@ -109,7 +100,6 @@ struct ModelState {
  Alphabet* alphabet;
  KenLMBeamScorer* scorer;
  unsigned int beam_width;
-  bool run_aot;
  unsigned int n_steps;
  unsigned int mfcc_feats_per_timestep;
  unsigned int n_context;
@ -149,7 +139,6 @@ ModelState::ModelState()
  , alphabet(nullptr)
  , scorer(nullptr)
  , beam_width(0)
-  , run_aot(false)
  , n_steps(-1)
  , mfcc_feats_per_timestep(-1)
  , n_context(-1)
@ -297,32 +286,6 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector<float>& logi
 {
  const size_t num_classes = alphabet->GetSize() + 1; // +1 for blank

-  if (run_aot) {
-#ifdef DS_NATIVE_MODEL
-    Eigen::ThreadPool tp(2);  // Size the thread pool as appropriate.
-    Eigen::ThreadPoolDevice device(&tp, tp.NumThreads());
-
-    nativeModel nm(nativeModel::AllocMode::RESULTS_PROFILES_AND_TEMPS_ONLY);
-    nm.set_thread_pool(&device);
-
-    for (int ot = 0; ot < n_frames; ot += DS_MODEL_TIMESTEPS) {
-      nm.set_arg0_data(&(aMfcc[ot * mfcc_feats_per_timestep]));
-      nm.Run();
-
-      // The CTCDecoder works with log-probs.
-      for (int t = 0; t < DS_MODEL_TIMESTEPS, (ot + t) < n_frames; ++t) {
-        for (int b = 0; b < BATCH_SIZE; ++b) {
-          for (int c = 0; c < num_classes; ++c) {
-            logits_output.push_back(nm.result0(t, b, c));
-          }
-        }
-      }
-    }
-#else
-    std::cerr << "No support for native model built-in." << std::endl;
-    return;
-#endif // DS_NATIVE_MODEL
-  } else {
  Tensor input(DT_FLOAT, TensorShape({BATCH_SIZE, n_steps, 2*n_context+1, MFCC_FEATURES}));

  auto input_mapped = input.flat<float>();
@ -352,7 +315,6 @@ ModelState::infer(const float* aMfcc, unsigned int n_frames, vector<float>& logi
  for (int t = 0; t < n_frames * BATCH_SIZE * num_classes; ++t) {
    logits_output.push_back(logits_mapped(t));
  }
-  }
 }

 char*
@ -423,16 +385,14 @@ DS_CreateModel(const char* aModelPath,
  model->ncontext   = aNContext;
  model->alphabet   = new Alphabet(aAlphabetConfigPath);
  model->beam_width = aBeamWidth;
-  model->run_aot    = false;

  *retval = nullptr;

  DS_PrintVersions();

  if (!aModelPath || strlen(aModelPath) < 1) {
-    std::cerr << "No model specified, will rely on built-in model." << std::endl;
-    model->run_aot = true;
-    return 0;
+    std::cerr << "No model specified, cannot continue." << std::endl;
+    return error::INVALID_ARGUMENT;
  }

  Status status;
--- a/native_client/model_size.sh
+++ b/native_client/model_size.sh
@ -1,11 +0,0 @@
-#!/bin/sh
-
-# Fail if we don't have *three* arguments
-#  $1 => file to generate
-#  $2 => dimension to use for timesteps
-#  $3 => dimension to use for framesize
-if [ $# -ne 3 ]; then
-    exit 1
-fi;
-
-sed -e "s|\$DS_MODEL_TIMESTEPS|$2|g" -e "s|\$DS_MODEL_FRAMESIZE|$3|g" < $1
--- a/native_client/tfcompile.config.pbtxt.src
+++ b/native_client/tfcompile.config.pbtxt.src
@ -1,32 +0,0 @@
-# Each feed is a positional input argument for the generated function.  The order
-# of each entry matches the order of each input argument.  Here “x_hold” and “y_hold”
-# refer to the names of placeholder nodes defined in the graph.
-#
-## From: input_tensor = tf.placeholder(tf.float32, [None, None, n_input + 2*n_input*n_context], name='input_node')
-feed {
-  id { node_name: "input_node" }
-  shape {
-    dim { size: 1 } # Batch size of 1
-    dim { size: $DS_MODEL_TIMESTEPS } # aNFrames: 146 for data/ldc93s1/LDC93S1.wav
-    dim { size: $DS_MODEL_FRAMESIZE } # frameSize: n_input + 2*n_input*n_context] ; n_input=26, n_context=9
-  }
-}
-
-#feed {
-#  id { node_name: "input_lengths" }
-#  shape {
-#    dim { size: 1 }
-#  }
-#}
-
-# Each fetch is a positional output argument for the generated function.  The order
-# of each entry matches the order of each output argument.  Here “x_y_prod”
-# refers to the name of a matmul node defined in the graph.
-#fetch {
-#  id { node_name: "output_node" }
-#}
-
-fetch {
-  id { node_name: "logits" }
-}
-
--- a/taskcluster/aot-test-model-build.sh
+++ b/taskcluster/aot-test-model-build.sh
@ -1,29 +0,0 @@
-#!/bin/bash
-
-set -xe
-
-source $(dirname "$0")/../tc-tests-utils.sh
-
-source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
-
-BAZEL_TARGETS="
-//native_client:libdeepspeech.so
-${BAZEL_AOT_TARGETS}"
-
-BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
-SYSTEM_TARGET=host
-
-EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-do_get_model_parameters "${DEEPSPEECH_TEST_MODEL}" AOT_MODEL_PARAMS
-BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-
-do_bazel_build
-
-do_deepspeech_binary_build
-
-do_deepspeech_python_build
-
-do_deepspeech_nodejs_build
--- a/taskcluster/arm64-build.sh
+++ b/taskcluster/arm64-build.sh
@ -16,17 +16,6 @@ BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=rpi3-armv8
 SYSTEM_RASPBIAN=/tmp/multistrap-armbian64-stretch

-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 maybe_install_xldd

 do_bazel_build
--- a/taskcluster/host-build.sh
+++ b/taskcluster/host-build.sh
@ -15,17 +15,6 @@ BAZEL_BUILD_FLAGS="${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}"
 BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=host

-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 do_bazel_build

 do_deepspeech_binary_build
--- a/taskcluster/rpi3-build.sh
+++ b/taskcluster/rpi3-build.sh
@ -16,17 +16,6 @@ BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
 SYSTEM_TARGET=rpi3
 SYSTEM_RASPBIAN=/tmp/multistrap-raspbian-stretch

-if [ $1 = "--aot" ]; then
-  EXTRA_LOCAL_CFLAGS="${EXTRA_AOT_CFLAGS}"
-  EXTRA_LOCAL_LDFLAGS="${EXTRA_AOT_LDFLAGS}"
-  EXTRA_LOCAL_LIBS="${EXTRA_AOT_LIBS}"
-
-  do_get_model_parameters "${DEEPSPEECH_PROD_MODEL}" AOT_MODEL_PARAMS
-
-  BAZEL_TARGETS="${BAZEL_AOT_TARGETS} ${BAZEL_TARGETS}"
-  BAZEL_BUILD_FLAGS="${BAZEL_BUILD_FLAGS} ${BAZEL_AOT_BUILD_FLAGS} ${AOT_MODEL_PARAMS}"
-fi;
-
 maybe_install_xldd

 do_bazel_build
--- a/taskcluster/test-armbian-opt-base.tyml
+++ b/taskcluster/test-armbian-opt-base.tyml
@ -32,14 +32,12 @@ then:
    env:
      $let:
        training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_arm64_aot_prod: { $eval: as_slugid("linux-arm64-cpu-aot_prod-opt") }
        linux_arm64_build: { $eval: as_slugid("linux-arm64-cpu-opt") }
        node_package: { $eval: as_slugid("node-package") }
      in:
        CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
        DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_build}/artifacts/public
        DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_arm64_aot_prod}/artifacts/public
        DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
        DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
        DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pbmm
--- a/taskcluster/test-linux-opt-base.tyml
+++ b/taskcluster/test-linux-opt-base.tyml
@ -32,7 +32,6 @@ then:
    env:
      $let:
        training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_amd64_aot_test: { $eval: as_slugid("linux-amd64-cpu-aot_test-opt") }
        linux_amd64_build: { $eval: as_slugid("linux-amd64-cpu-opt") }
        linux_amd64_ctc: { $eval: as_slugid("linux-amd64-ctc-opt") }
        node_package: { $eval: as_slugid("node-package") }
@ -40,7 +39,6 @@ then:
        CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
        DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_build}/artifacts/public
        DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_aot_test}/artifacts/public
        DEEPSPEECH_LIBCTC: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public/decoder.tar.xz
        DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
        DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
--- a/taskcluster/test-raspbian-opt-base.tyml
+++ b/taskcluster/test-raspbian-opt-base.tyml
@ -32,14 +32,12 @@ then:
    env:
      $let:
        training: { $eval: as_slugid("test-training_upstream-linux-amd64-py27mu-opt") }
-        linux_rpi3_aot_prod: { $eval: as_slugid("linux-rpi3-cpu-aot_prod-opt") }
        linux_rpi3_build: { $eval: as_slugid("linux-rpi3-cpu-opt") }
        node_package: { $eval: as_slugid("node-package") }
      in:
        CONVERT_GRAPHDEF_MEMMAPPED: ${build.convert_graphdef}
        DEEPSPEECH_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_build}/artifacts/public
        DEEPSPEECH_NODEJS: https://queue.taskcluster.net/v1/task/${node_package}/artifacts/public
-        DEEPSPEECH_AOT_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_rpi3_aot_prod}/artifacts/public
        DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
        DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pb
        DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod/output_graph.pbmm
--- a/tc-benchmark-tests.sh
+++ b/tc-benchmark-tests.sh
@ -8,7 +8,6 @@ exec_benchmark()
 {
    model_file="$1"
    run_postfix=$2
-    aot_model=$3

    mkdir -p /tmp/bench-ds/ || true
    mkdir -p /tmp/bench-ds-nolm/ || true
@ -18,14 +17,9 @@ exec_benchmark()
    png=${TASKCLUSTER_ARTIFACTS}/benchmark-${run_postfix}.png
    svg=${TASKCLUSTER_ARTIFACTS}/benchmark-${run_postfix}.svg

-    AOT_MODEL_ARGS=""
-    if [ ! -z "${aot_model}" ]; then
-        AOT_MODEL_ARGS="--so-model ${aot_model}"
-    fi;
-
    python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_nc.py \
        --dir /tmp/bench-ds/ \
-        --models ${model_file} ${AOT_MODEL_ARGS} \
+        --models ${model_file} \
        --wav /tmp/LDC93S1.wav \
        --alphabet /tmp/alphabet.txt \
        --lm_binary /tmp/lm.binary \
@ -34,7 +28,7 @@ exec_benchmark()

    python ${DS_ROOT_TASK}/DeepSpeech/ds/bin/benchmark_nc.py \
        --dir /tmp/bench-ds-nolm/ \
-        --models ${model_file} ${AOT_MODEL_ARGS} \
+        --models ${model_file} \
        --wav /tmp/LDC93S1.wav \
        --alphabet /tmp/alphabet.txt \
        --csv ${csv_nolm}
@ -91,12 +85,7 @@ done;
 # Let's prepare another model for single-model codepath
 mv /tmp/${model_name} /tmp/test.frozen.e75.lstm494.ldc93s1.pb

-# We don't need download_material here, benchmark code should take care of it.
-if [ "$1" = "--aot" ]; then
-    export TASKCLUSTER_SCHEME=${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/native_client.tar.xz
-else
-    export TASKCLUSTER_SCHEME=${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz
-fi;
+export TASKCLUSTER_SCHEME=${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz

 install_pyenv "${PYENV_ROOT}"
 install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
@ -108,15 +97,9 @@ source ${PYENV_ROOT}/versions/${pyver}/envs/${PYENV_NAME}/bin/activate

 pip install -r ${DS_ROOT_TASK}/DeepSpeech/ds/requirements.txt | cat

-exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model_noAOT"
-exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model_noAOT"
-exec_benchmark "${model_list}" "multi-model_noAOT"
-
-if [ "$1" = "--aot" ]; then
-    exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-    exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-    exec_benchmark "${model_list}" "multi-model_AOT" "test.aot.e5.lstm494.ldc93s1.so"
-fi;
+exec_benchmark "/tmp/test.frozen.e75.lstm494.ldc93s1.pb" "single-model"
+exec_benchmark "/tmp/test.frozen.e75.lstm100-900.ldc93s1.zip" "zipfile-model"
+exec_benchmark "${model_list}" "multi-model"

 deactivate
 pyenv uninstall --force ${PYENV_NAME}
--- a/tc-cpp-ds-tests.sh
+++ b/tc-cpp-ds-tests.sh
@ -4,9 +4,7 @@ set -xe

 source $(dirname "$0")/tc-tests-utils.sh

-aot_model=$1
-
-download_material "${TASKCLUSTER_TMP_DIR}/ds" "${aot_model}"
+download_material "${TASKCLUSTER_TMP_DIR}/ds"

 export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH

--- a/tc-node-tests.sh
+++ b/tc-node-tests.sh
@ -5,7 +5,6 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh

 nodever=$1
-aot_model=$2

 if [ -z "${nodever}" ]; then
    echo "No node version given, aborting."
@ -21,10 +20,6 @@ NODE_ROOT="${DS_ROOT_TASK}/ds-test/"
 export NODE_PATH="${NODE_ROOT}/node_modules/"
 export PATH="${NODE_PATH}/.bin/:$PATH"

-if [ "${aot_model}" = "--aot" ]; then
-    npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/deepspeech-${DS_VERSION}.tgz
-else
-    npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-${DS_VERSION}.tgz
-fi
+npm install --prefix ${NODE_ROOT} ${DEEPSPEECH_NODEJS}/deepspeech-${DS_VERSION}.tgz

 run_all_inference_tests
--- a/tc-python-tests.sh
+++ b/tc-python-tests.sh
@ -5,7 +5,6 @@ set -xe
 source $(dirname "$0")/tc-tests-utils.sh

 pyver_full=$1
-aot_model=$2

 if [ -z "${pyver_full}" ]; then
    echo "No python version given, aborting."
@ -47,11 +46,7 @@ platform=$(python -c 'import sys; import platform; plat = platform.system().lowe
 whl_ds_version="$(python -c 'from pkg_resources import parse_version; print(parse_version("'${DS_VERSION}'"))')"
 deepspeech_pkg="deepspeech-${whl_ds_version}-cp${pyver_pkg}-cp${pyver_pkg}${py_unicode_type}-${platform}.whl"

-if [ "${aot_model}" = "--aot" ]; then
-    deepspeech_pkg_url=${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/${deepspeech_pkg}
-else
-    deepspeech_pkg_url=${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg}
-fi
+deepspeech_pkg_url=${DEEPSPEECH_ARTIFACTS_ROOT}/${deepspeech_pkg}
 LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-binary :all: ${PY37_SOURCE_PACKAGE} --upgrade ${deepspeech_pkg_url} | cat

 run_all_inference_tests
--- a/tc-tests-utils.sh
+++ b/tc-tests-utils.sh
@ -28,15 +28,6 @@ export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds

 export BAZEL_CTC_TARGETS="//native_client:libctc_decoder_with_kenlm.so"

-export EXTRA_AOT_CFLAGS=""
-export EXTRA_AOT_LDFLAGS=""
-export EXTRA_AOT_LIBS="-ldeepspeech_model"
-
-export BAZEL_AOT_BUILD_FLAGS="--define=DS_NATIVE_MODEL=1 --define=DS_MODEL_TIMESTEPS=64"
-export BAZEL_AOT_TARGETS="
-//native_client:libdeepspeech_model.so
-"
-
 export DS_VERSION="$(cat ${DS_DSDIR}/VERSION)"

 model_source="${DEEPSPEECH_TEST_MODEL}"
@ -160,37 +151,6 @@ assert_correct_ldc93s1_prodmodel()
  assert_correct_inference "$1" "she had tired or so and greasy wash war or year"
 }

-assert_correct_ldc93s1_somodel()
-{
-    somodel_nolm=$(strip "$1")
-    somodel_withlm=$(strip "$2")
-
-    # We want to be able to return non zero value from the function, while not
-    # failing the whole execution
-    set +e
-
-    assert_correct_ldc93s1 "${somodel_nolm}"
-    so_nolm=$?
-
-    assert_correct_ldc93s1 "${somodel_withlm}"
-    so_lm=$?
-
-    set -e
-
-    # We accept that with no LM there may be errors, but we do not accept that
-    # for LM. For now.
-    if [ ${so_lm} -eq 1 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
-    then
-        exit 1
-    elif [ ${so_lm} -eq 0 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
-    then
-        exit 0
-    else
-        echo "Unexpected status"
-        exit 2
-    fi
-}
-
 assert_correct_warning_upsampling()
 {
  assert_shows_something "$1" "erratic speech recognition"
@ -232,23 +192,6 @@ run_all_inference_tests()

  phrase_pbmodel_withlm_mono_8k=$(deepspeech --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
  assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}"
-
-  if [ "${aot_model}" = "--aot" ]; then
-      phrase_somodel_nolm=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
-      phrase_somodel_withlm=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav)
-
-      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm}" "${phrase_somodel_withlm}"
-
-      phrase_somodel_nolm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
-      phrase_somodel_withlm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_2_44100.wav)
-
-      assert_correct_ldc93s1_somodel "${phrase_somodel_nolm_stereo_44k}" "${phrase_somodel_withlm_stereo_44k}"
-
-      phrase_somodel_nolm_mono_8k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
-      phrase_somodel_withlm_stereo_44k=$(deepspeech --model "" --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt --lm ${TASKCLUSTER_TMP_DIR}/lm.binary --trie ${TASKCLUSTER_TMP_DIR}/trie --audio ${TASKCLUSTER_TMP_DIR}/LDC93S1_pcms16le_1_8000.wav 2>&1 1>/dev/null)
-
-      assert_correct_warning_upsampling "${phrase_somodel_nolm_mono_8k}" "${phrase_somodel_withlm_mono_8k}"
-  fi;
 }

 run_prod_inference_tests()
@ -297,11 +240,6 @@ download_native_client_files()
  generic_download_tarxz "$1" "${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz"
 }

-download_aot_model_files()
-{
-  generic_download_tarxz "$1" "${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/native_client.tar.xz"
-}
-
 download_ctc_kenlm()
 {
  generic_download_tarxz "$1" "${DEEPSPEECH_LIBCTC}"
@ -325,14 +263,8 @@ download_for_frozen()
 download_material()
 {
  target_dir=$1
-  maybe_aot=$2

-  if [ "${maybe_aot}" = "--aot" ]; then
-    download_aot_model_files "${target_dir}"
-  else
  download_native_client_files "${target_dir}"
-  fi
-
  download_data

  ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
@ -377,30 +309,6 @@ maybe_install_xldd()
  fi
 }

-do_get_model_parameters()
-{
-  local __result=$2
-  model_url=$1
-  model_file=/tmp/$(basename "${model_url}")
-
-  if [ -z "${model_url}" ]; then
-    echo "Empty URL for model"
-    exit 1
-  fi;
-
-  wget "${model_url}" -O "${model_file}"
-  wget -P "/tmp/" "${SUMMARIZE_GRAPH_BINARY}" && chmod +x /tmp/summarize_graph
-
-  if [ ! -f "${model_file}" ]; then
-    echo "No such model: ${model_file}"
-    exit 1
-  fi;
-
-  model_width=$(/tmp/summarize_graph --in_graph="${model_file}" | grep "inputs" | grep -Eo "shape=\[\?,\?,[[:digit:]]+" | cut -d',' -f3)
-
-  eval $__result="'--define=DS_MODEL_FRAMESIZE=${model_width} --define=DS_MODEL_FILE=${model_file}'"
-}
-
 # Checks whether we run a patched version of bazel.
 # Patching is required to dump computeKey() parameters to .ckd files
 # See bazel.patch