From 2fd98de56f8b73e0402fb8c71296c3fa629592ed Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Wed, 25 Aug 2021 13:08:23 +0200
Subject: [PATCH] Split train.py into separate modules

Currently train.py is overloaded with many independent features.
Understanding the code and what will be the result of a training
call requires untangling the entire script. It's also an error
prone UX. This is a first step at separating independent parts
into their own scripts.
---
 .github/actions/numpy_vers/action.yml         |  10 +-
 bin/run-ci-ldc93s1_new.sh                     |   3 +-
 bin/run-ci-ldc93s1_new_bytes.sh               |   3 +-
 bin/run-ldc93s1.py                            |   4 +-
 setup.py                                      |   1 +
 .../coqui_stt_training/deepspeech_model.py    | 403 +++++++++++
 training/coqui_stt_training/evaluate.py       |  29 +-
 training/coqui_stt_training/export.py         | 216 ++++++
 training/coqui_stt_training/train.py          | 672 +-----------------
 .../training_graph_inference.py               |  87 +++
 training/coqui_stt_training/util/config.py    |   2 +-
 training/coqui_stt_training/util/io.py        |   7 +
 12 files changed, 780 insertions(+), 657 deletions(-)
 create mode 100644 training/coqui_stt_training/deepspeech_model.py
 mode change 100755 => 100644 training/coqui_stt_training/evaluate.py
 create mode 100644 training/coqui_stt_training/export.py
 create mode 100644 training/coqui_stt_training/training_graph_inference.py

diff --git a/.github/actions/numpy_vers/action.yml b/.github/actions/numpy_vers/action.yml
index d93dfff7..41c7ebdd 100644
--- a/.github/actions/numpy_vers/action.yml
+++ b/.github/actions/numpy_vers/action.yml
@@ -28,15 +28,15 @@ runs:
                 case "${{ inputs.pyver }}" in
                     3.7*)
                         NUMPY_BUILD_VERSION="==1.14.5"
-                        NUMPY_DEP_VERSION=">=1.14.5"
+                        NUMPY_DEP_VERSION=">=1.14.5,<=1.19.4"
                     ;;
                     3.8*)
                         NUMPY_BUILD_VERSION="==1.17.3"
-                        NUMPY_DEP_VERSION=">=1.17.3"
+                        NUMPY_DEP_VERSION=">=1.17.3,<=1.19.4"
                     ;;
                     3.9*)
                         NUMPY_BUILD_VERSION="==1.19.4"
-                        NUMPY_DEP_VERSION=">=1.19.4"
+                        NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
                     ;;
                 esac
             ;;
@@ -57,7 +57,7 @@ runs:
                     ;;
                     3.9*)
                         NUMPY_BUILD_VERSION="==1.19.4"
-                        NUMPY_DEP_VERSION=">=1.19.4"
+                        NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
                     ;;
                 esac
             ;;
@@ -82,7 +82,7 @@ runs:
                     ;;
                     3.9*)
                         NUMPY_BUILD_VERSION="==1.19.4"
-                        NUMPY_DEP_VERSION=">=1.19.4"
+                        NUMPY_DEP_VERSION=">=1.19.4,<=1.19.4"
                     ;;
                 esac
             ;;
diff --git a/bin/run-ci-ldc93s1_new.sh b/bin/run-ci-ldc93s1_new.sh
index a0261257..6c7ac939 100755
--- a/bin/run-ci-ldc93s1_new.sh
+++ b/bin/run-ci-ldc93s1_new.sh
@@ -27,4 +27,5 @@ python -u train.py --alphabet_config_path "data/alphabet.txt" \
   --max_to_keep 1 --checkpoint_dir '/tmp/ckpt' \
   --learning_rate 0.001 --dropout_rate 0.05  --export_dir '/tmp/train' \
   --scorer_path 'data/smoke_test/pruned_lm.scorer' \
-  --audio_sample_rate ${audio_sample_rate}
+  --audio_sample_rate ${audio_sample_rate} \
+  --export_tflite false
diff --git a/bin/run-ci-ldc93s1_new_bytes.sh b/bin/run-ci-ldc93s1_new_bytes.sh
index 5dec1fed..d08c729e 100755
--- a/bin/run-ci-ldc93s1_new_bytes.sh
+++ b/bin/run-ci-ldc93s1_new_bytes.sh
@@ -27,4 +27,5 @@ python -u train.py --show_progressbar false --early_stop false \
   --learning_rate 0.001 --dropout_rate 0.05  --export_dir '/tmp/train_bytes' \
   --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \
   --audio_sample_rate ${audio_sample_rate} \
-  --bytes_output_mode true
+  --bytes_output_mode true \
+  --export_tflite false
diff --git a/bin/run-ldc93s1.py b/bin/run-ldc93s1.py
index 5a2746d8..b25cc998 100755
--- a/bin/run-ldc93s1.py
+++ b/bin/run-ldc93s1.py
@@ -2,7 +2,8 @@
 import os
 from import_ldc93s1 import _download_and_preprocess_data as download_ldc
 from coqui_stt_training.util.config import initialize_globals_from_args
-from coqui_stt_training.train import train, test
+from coqui_stt_training.train import train
+from coqui_stt_training.evaluate import test
 
 # only one GPU for only one training sample
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
@@ -21,5 +22,4 @@ initialize_globals_from_args(
 )
 
 train()
-
 test()
diff --git a/setup.py b/setup.py
index 84b4364a..93cdf094 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,7 @@ def main():
         "coqpit",
         "numpy",
         "optuna",
+        "numba <= 0.53.1",
         "opuslib == 2.0.0",
         "pandas",
         "progressbar2",
diff --git a/training/coqui_stt_training/deepspeech_model.py b/training/coqui_stt_training/deepspeech_model.py
new file mode 100644
index 00000000..c0579f63
--- /dev/null
+++ b/training/coqui_stt_training/deepspeech_model.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import sys
+
+LOG_LEVEL_INDEX = sys.argv.index("--log_level") + 1 if "--log_level" in sys.argv else 0
+DESIRED_LOG_LEVEL = (
+    sys.argv[LOG_LEVEL_INDEX] if 0 < LOG_LEVEL_INDEX < len(sys.argv) else "3"
+)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = DESIRED_LOG_LEVEL
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.compat.v1 as tfv1
+
+tfv1.logging.set_verbosity(
+    {
+        "0": tfv1.logging.DEBUG,
+        "1": tfv1.logging.INFO,
+        "2": tfv1.logging.WARN,
+        "3": tfv1.logging.ERROR,
+    }.get(DESIRED_LOG_LEVEL)
+)
+
+from .util.config import Config
+from .util.feeding import audio_to_features
+
+
+def variable_on_cpu(name, shape, initializer):
+    r"""
+    Next we concern ourselves with graph creation.
+    However, before we do so we must introduce a utility function ``variable_on_cpu()``
+    used to create a variable in CPU memory.
+    """
+    # Use the /cpu:0 device for scoped operations
+    with tf.device(Config.cpu_device):
+        # Create or get apropos variable
+        var = tfv1.get_variable(name=name, shape=shape, initializer=initializer)
+    return var
+
+
+def create_overlapping_windows(batch_x):
+    batch_size = tf.shape(input=batch_x)[0]
+    window_width = 2 * Config.n_context + 1
+    num_channels = Config.n_input
+
+    # Create a constant convolution filter using an identity matrix, so that the
+    # convolution returns patches of the input tensor as is, and we can create
+    # overlapping windows over the MFCCs.
+    eye_filter = tf.constant(
+        np.eye(window_width * num_channels).reshape(
+            window_width, num_channels, window_width * num_channels
+        ),
+        tf.float32,
+    )  # pylint: disable=bad-continuation
+
+    # Create overlapping windows
+    batch_x = tf.nn.conv1d(input=batch_x, filters=eye_filter, stride=1, padding="SAME")
+
+    # Remove dummy depth dimension and reshape into [batch_size, n_windows, window_width, n_input]
+    batch_x = tf.reshape(batch_x, [batch_size, -1, window_width, num_channels])
+
+    return batch_x
+
+
+def dense(name, x, units, dropout_rate=None, relu=True, layer_norm=False):
+    with tfv1.variable_scope(name):
+        bias = variable_on_cpu("bias", [units], tfv1.zeros_initializer())
+        weights = variable_on_cpu(
+            "weights",
+            [x.shape[-1], units],
+            tfv1.keras.initializers.VarianceScaling(
+                scale=1.0, mode="fan_avg", distribution="uniform"
+            ),
+        )
+
+    output = tf.nn.bias_add(tf.matmul(x, weights), bias)
+
+    if relu:
+        output = tf.minimum(tf.nn.relu(output), Config.relu_clip)
+
+    if layer_norm:
+        with tfv1.variable_scope(name):
+            output = tf.contrib.layers.layer_norm(output)
+
+    if dropout_rate is not None:
+        output = tf.nn.dropout(output, rate=dropout_rate)
+
+    return output
+
+
+def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse):
+    with tfv1.variable_scope("cudnn_lstm/rnn/multi_rnn_cell/cell_0"):
+        fw_cell = tf.contrib.rnn.LSTMBlockFusedCell(
+            Config.n_cell_dim,
+            forget_bias=0,
+            reuse=reuse,
+            name="cudnn_compatible_lstm_cell",
+        )
+
+        output, output_state = fw_cell(
+            inputs=x,
+            dtype=tf.float32,
+            sequence_length=seq_length,
+            initial_state=previous_state,
+        )
+
+    return output, output_state
+
+
+def rnn_impl_cudnn_rnn(x, seq_length, previous_state, _):
+    assert (
+        previous_state is None
+    )  # 'Passing previous state not supported with CuDNN backend'
+
+    # Hack: CudnnLSTM works similarly to Keras layers in that when you instantiate
+    # the object it creates the variables, and then you just call it several times
+    # to enable variable re-use. Because all of our code is structure in an old
+    # school TensorFlow structure where you can just call tf.get_variable again with
+    # reuse=True to reuse variables, we can't easily make use of the object oriented
+    # way CudnnLSTM is implemented, so we save a singleton instance in the function,
+    # emulating a static function variable.
+    if not rnn_impl_cudnn_rnn.cell:
+        # Forward direction cell:
+        fw_cell = tf.contrib.cudnn_rnn.CudnnLSTM(
+            num_layers=1,
+            num_units=Config.n_cell_dim,
+            input_mode="linear_input",
+            direction="unidirectional",
+            dtype=tf.float32,
+        )
+        rnn_impl_cudnn_rnn.cell = fw_cell
+
+    output, output_state = rnn_impl_cudnn_rnn.cell(
+        inputs=x, sequence_lengths=seq_length
+    )
+
+    return output, output_state
+
+
+rnn_impl_cudnn_rnn.cell = None
+
+
+def rnn_impl_static_rnn(x, seq_length, previous_state, reuse):
+    with tfv1.variable_scope("cudnn_lstm/rnn/multi_rnn_cell"):
+        # Forward direction cell:
+        fw_cell = tfv1.nn.rnn_cell.LSTMCell(
+            Config.n_cell_dim,
+            forget_bias=0,
+            reuse=reuse,
+            name="cudnn_compatible_lstm_cell",
+        )
+
+        # Split rank N tensor into list of rank N-1 tensors
+        x = [x[l] for l in range(x.shape[0])]
+
+        output, output_state = tfv1.nn.static_rnn(
+            cell=fw_cell,
+            inputs=x,
+            sequence_length=seq_length,
+            initial_state=previous_state,
+            dtype=tf.float32,
+            scope="cell_0",
+        )
+
+        output = tf.concat(output, 0)
+
+    return output, output_state
+
+
+def create_model(
+    batch_x,
+    seq_length,
+    dropout,
+    reuse=False,
+    batch_size=None,
+    previous_state=None,
+    overlap=True,
+    rnn_impl=rnn_impl_lstmblockfusedcell,
+):
+    layers = {}
+
+    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]
+    if not batch_size:
+        batch_size = tf.shape(input=batch_x)[0]
+
+    # Create overlapping feature windows if needed
+    if overlap:
+        batch_x = create_overlapping_windows(batch_x)
+
+    # Reshaping `batch_x` to a tensor with shape `[n_steps*batch_size, n_input + 2*n_input*n_context]`.
+    # This is done to prepare the batch for input into the first layer which expects a tensor of rank `2`.
+
+    # Permute n_steps and batch_size
+    batch_x = tf.transpose(a=batch_x, perm=[1, 0, 2, 3])
+    # Reshape to prepare input for first layer
+    batch_x = tf.reshape(
+        batch_x, [-1, Config.n_input + 2 * Config.n_input * Config.n_context]
+    )  # (n_steps*batch_size, n_input + 2*n_input*n_context)
+    layers["input_reshaped"] = batch_x
+
+    # The next three blocks will pass `batch_x` through three hidden layers with
+    # clipped RELU activation and dropout.
+    layers["layer_1"] = layer_1 = dense(
+        "layer_1",
+        batch_x,
+        Config.n_hidden_1,
+        dropout_rate=dropout[0],
+        layer_norm=Config.layer_norm,
+    )
+    layers["layer_2"] = layer_2 = dense(
+        "layer_2",
+        layer_1,
+        Config.n_hidden_2,
+        dropout_rate=dropout[1],
+        layer_norm=Config.layer_norm,
+    )
+    layers["layer_3"] = layer_3 = dense(
+        "layer_3",
+        layer_2,
+        Config.n_hidden_3,
+        dropout_rate=dropout[2],
+        layer_norm=Config.layer_norm,
+    )
+
+    # `layer_3` is now reshaped into `[n_steps, batch_size, 2*n_cell_dim]`,
+    # as the LSTM RNN expects its input to be of shape `[max_time, batch_size, input_size]`.
+    layer_3 = tf.reshape(layer_3, [-1, batch_size, Config.n_hidden_3])
+
+    # Run through parametrized RNN implementation, as we use different RNNs
+    # for training and inference
+    output, output_state = rnn_impl(layer_3, seq_length, previous_state, reuse)
+
+    # Reshape output from a tensor of shape [n_steps, batch_size, n_cell_dim]
+    # to a tensor of shape [n_steps*batch_size, n_cell_dim]
+    output = tf.reshape(output, [-1, Config.n_cell_dim])
+    layers["rnn_output"] = output
+    layers["rnn_output_state"] = output_state
+
+    # Now we feed `output` to the fifth hidden layer with clipped RELU activation
+    layers["layer_5"] = layer_5 = dense(
+        "layer_5",
+        output,
+        Config.n_hidden_5,
+        dropout_rate=dropout[5],
+        layer_norm=Config.layer_norm,
+    )
+
+    # Now we apply a final linear layer creating `n_classes` dimensional vectors, the logits.
+    layers["layer_6"] = layer_6 = dense(
+        "layer_6", layer_5, Config.n_hidden_6, relu=False
+    )
+
+    # Finally we reshape layer_6 from a tensor of shape [n_steps*batch_size, n_hidden_6]
+    # to the slightly more useful shape [n_steps, batch_size, n_hidden_6].
+    # Note, that this differs from the input in that it is time-major.
+    layer_6 = tf.reshape(
+        layer_6, [-1, batch_size, Config.n_hidden_6], name="raw_logits"
+    )
+    layers["raw_logits"] = layer_6
+
+    # Output shape: [n_steps, batch_size, n_hidden_6]
+    return layer_6, layers
+
+
+def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
+    batch_size = batch_size if batch_size > 0 else None
+
+    # Create feature computation graph
+
+    # native_client: this node's name and shape are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    input_samples = tfv1.placeholder(
+        tf.float32, [Config.audio_window_samples], "input_samples"
+    )
+    samples = tf.expand_dims(input_samples, -1)
+    mfccs, _ = audio_to_features(samples, Config.audio_sample_rate)
+    # native_client: this node's name and shape are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    mfccs = tf.identity(mfccs, name="mfccs")
+
+    # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input]
+    # This shape is read by the native_client in STT_CreateModel to know the
+    # value of n_steps, n_context and n_input. Make sure you update the code
+    # there if this shape is changed.
+    #
+    # native_client: this node's name and shape are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    input_tensor = tfv1.placeholder(
+        tf.float32,
+        [
+            batch_size,
+            n_steps if n_steps > 0 else None,
+            2 * Config.n_context + 1,
+            Config.n_input,
+        ],
+        name="input_node",
+    )
+    # native_client: this node's name and shape are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    seq_length = tfv1.placeholder(tf.int32, [batch_size], name="input_lengths")
+
+    if batch_size <= 0:
+        # no state management since n_step is expected to be dynamic too (see below)
+        previous_state = None
+    else:
+        # native_client: this node's name and shape are part of the API boundary
+        #   with the native client, if you change them you should sync changes with
+        #   the C++ code.
+        previous_state_c = tfv1.placeholder(
+            tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_c"
+        )
+        # native_client: this node's name and shape are part of the API boundary
+        #   with the native client, if you change them you should sync changes with
+        #   the C++ code.
+        previous_state_h = tfv1.placeholder(
+            tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_h"
+        )
+
+        previous_state = tf.nn.rnn_cell.LSTMStateTuple(
+            previous_state_c, previous_state_h
+        )
+
+    # One rate per layer
+    no_dropout = [None] * 6
+
+    if tflite:
+        rnn_impl = rnn_impl_static_rnn
+    else:
+        rnn_impl = rnn_impl_lstmblockfusedcell
+
+    logits, layers = create_model(
+        batch_x=input_tensor,
+        batch_size=batch_size,
+        seq_length=seq_length if not Config.export_tflite else None,
+        dropout=no_dropout,
+        previous_state=previous_state,
+        overlap=False,
+        rnn_impl=rnn_impl,
+    )
+
+    # TF Lite runtime will check that input dimensions are 1, 2 or 4
+    # by default we get 3, the middle one being batch_size which is forced to
+    # one on inference graph, so remove that dimension
+    #
+    # native_client: this node's name and shape are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    if tflite:
+        logits = tf.squeeze(logits, [1])
+
+    # Apply softmax for CTC decoder
+    probs = tf.nn.softmax(logits, name="logits")
+
+    if batch_size <= 0:
+        if tflite:
+            raise NotImplementedError(
+                "dynamic batch_size does not support tflite nor streaming"
+            )
+        if n_steps > 0:
+            raise NotImplementedError(
+                "dynamic batch_size expect n_steps to be dynamic too"
+            )
+        return (
+            {
+                "input": input_tensor,
+                "input_lengths": seq_length,
+            },
+            {
+                "outputs": probs,
+            },
+            layers,
+        )
+
+    new_state_c, new_state_h = layers["rnn_output_state"]
+    new_state_c = tf.identity(new_state_c, name="new_state_c")
+    new_state_h = tf.identity(new_state_h, name="new_state_h")
+
+    inputs = {
+        "input": input_tensor,
+        "previous_state_c": previous_state_c,
+        "previous_state_h": previous_state_h,
+        "input_samples": input_samples,
+    }
+
+    if not Config.export_tflite:
+        inputs["input_lengths"] = seq_length
+
+    outputs = {
+        "outputs": probs,
+        "new_state_c": new_state_c,
+        "new_state_h": new_state_h,
+        "mfccs": mfccs,
+        # Expose internal layers for downstream applications
+        "layer_3": layers["layer_3"],
+        "layer_5": layers["layer_5"],
+    }
+
+    return inputs, outputs, layers
diff --git a/training/coqui_stt_training/evaluate.py b/training/coqui_stt_training/evaluate.py
old mode 100755
new mode 100644
index 4425582b..6c4f6dca
--- a/training/coqui_stt_training/evaluate.py
+++ b/training/coqui_stt_training/evaluate.py
@@ -13,6 +13,7 @@ from six.moves import zip
 
 import tensorflow as tf
 
+from .deepspeech_model import create_model
 from .util.augmentations import NormalizeSampleRate
 from .util.checkpoints import load_graph_for_evaluation
 from .util.config import (
@@ -168,25 +169,25 @@ def evaluate(test_csvs, create_model):
         return samples
 
 
-def main():
-    initialize_globals_from_cli()
-
-    if not Config.test_files:
-        log_error(
-            "You need to specify what files to use for evaluation via "
-            "the --test_files flag."
-        )
-        sys.exit(1)
-
-    from .train import (  # pylint: disable=cyclic-import,import-outside-toplevel
-        create_model,
-    )
+def test():
+    tfv1.reset_default_graph()
 
     samples = evaluate(Config.test_files, create_model)
-
     if Config.test_output_file:
         save_samples_json(samples, Config.test_output_file)
 
 
+def main():
+    initialize_globals_from_cli()
+
+    if not Config.test_files:
+        raise RuntimeError(
+            "You need to specify what files to use for evaluation via "
+            "the --test_files flag."
+        )
+
+    test()
+
+
 if __name__ == "__main__":
     main()
diff --git a/training/coqui_stt_training/export.py b/training/coqui_stt_training/export.py
new file mode 100644
index 00000000..22c31ad6
--- /dev/null
+++ b/training/coqui_stt_training/export.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import sys
+
+LOG_LEVEL_INDEX = sys.argv.index("--log_level") + 1 if "--log_level" in sys.argv else 0
+DESIRED_LOG_LEVEL = (
+    sys.argv[LOG_LEVEL_INDEX] if 0 < LOG_LEVEL_INDEX < len(sys.argv) else "3"
+)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = DESIRED_LOG_LEVEL
+
+import tensorflow as tf
+import tensorflow.compat.v1 as tfv1
+import shutil
+
+from .deepspeech_model import create_inference_graph
+from .util.checkpoints import load_graph_for_evaluation
+from .util.config import Config, initialize_globals_from_cli, log_error, log_info
+from .util.io import (
+    open_remote,
+    rmtree_remote,
+    listdir_remote,
+    is_remote_path,
+    isdir_remote,
+)
+
+
+def file_relative_read(fname):
+    return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+
+def export():
+    r"""
+    Restores the trained variables into a simpler graph that will be exported for serving.
+    """
+    log_info("Exporting the model...")
+
+    tfv1.reset_default_graph()
+
+    inputs, outputs, _ = create_inference_graph(
+        batch_size=Config.export_batch_size,
+        n_steps=Config.n_steps,
+        tflite=Config.export_tflite,
+    )
+
+    graph_version = int(file_relative_read("GRAPH_VERSION").strip())
+    assert graph_version > 0
+
+    # native_client: these nodes's names and shapes are part of the API boundary
+    #   with the native client, if you change them you should sync changes with
+    #   the C++ code.
+    outputs["metadata_version"] = tf.constant([graph_version], name="metadata_version")
+    outputs["metadata_sample_rate"] = tf.constant(
+        [Config.audio_sample_rate], name="metadata_sample_rate"
+    )
+    outputs["metadata_feature_win_len"] = tf.constant(
+        [Config.feature_win_len], name="metadata_feature_win_len"
+    )
+    outputs["metadata_feature_win_step"] = tf.constant(
+        [Config.feature_win_step], name="metadata_feature_win_step"
+    )
+    outputs["metadata_beam_width"] = tf.constant(
+        [Config.export_beam_width], name="metadata_beam_width"
+    )
+    outputs["metadata_alphabet"] = tf.constant(
+        [Config.alphabet.Serialize()], name="metadata_alphabet"
+    )
+
+    if Config.export_language:
+        outputs["metadata_language"] = tf.constant(
+            [Config.export_language.encode("utf-8")], name="metadata_language"
+        )
+
+    # Prevent further graph changes
+    tfv1.get_default_graph().finalize()
+
+    output_names_tensors = [
+        tensor.op.name for tensor in outputs.values() if isinstance(tensor, tf.Tensor)
+    ]
+    output_names_ops = [
+        op.name for op in outputs.values() if isinstance(op, tf.Operation)
+    ]
+    output_names = output_names_tensors + output_names_ops
+
+    with tf.Session() as session:
+        # Restore variables from checkpoint
+        load_graph_for_evaluation(session)
+
+        output_filename = Config.export_file_name + ".pb"
+        if Config.remove_export:
+            if isdir_remote(Config.export_dir):
+                log_info("Removing old export")
+                rmtree_remote(Config.export_dir)
+
+        output_graph_path = os.path.join(Config.export_dir, output_filename)
+
+        if not is_remote_path(Config.export_dir) and not os.path.isdir(
+            Config.export_dir
+        ):
+            os.makedirs(Config.export_dir)
+
+        frozen_graph = tfv1.graph_util.convert_variables_to_constants(
+            sess=session,
+            input_graph_def=tfv1.get_default_graph().as_graph_def(),
+            output_node_names=output_names,
+        )
+
+        frozen_graph = tfv1.graph_util.extract_sub_graph(
+            graph_def=frozen_graph, dest_nodes=output_names
+        )
+
+        if not Config.export_tflite:
+            with open_remote(output_graph_path, "wb") as fout:
+                fout.write(frozen_graph.SerializeToString())
+        else:
+            output_tflite_path = os.path.join(
+                Config.export_dir, output_filename.replace(".pb", ".tflite")
+            )
+
+            converter = tf.lite.TFLiteConverter(
+                frozen_graph,
+                input_tensors=inputs.values(),
+                output_tensors=outputs.values(),
+            )
+
+            if Config.export_quantize:
+                converter.optimizations = [tf.lite.Optimize.DEFAULT]
+
+            # AudioSpectrogram and Mfcc ops are custom but have built-in kernels in TFLite
+            converter.allow_custom_ops = True
+            tflite_model = converter.convert()
+
+            with open_remote(output_tflite_path, "wb") as fout:
+                fout.write(tflite_model)
+
+        log_info("Models exported at %s" % (Config.export_dir))
+
+    metadata_fname = os.path.join(
+        Config.export_dir,
+        "{}_{}_{}.md".format(
+            Config.export_author_id,
+            Config.export_model_name,
+            Config.export_model_version,
+        ),
+    )
+
+    model_runtime = "tflite" if Config.export_tflite else "tensorflow"
+    with open_remote(metadata_fname, "w") as f:
+        f.write("---\n")
+        f.write("author: {}\n".format(Config.export_author_id))
+        f.write("model_name: {}\n".format(Config.export_model_name))
+        f.write("model_version: {}\n".format(Config.export_model_version))
+        f.write("contact_info: {}\n".format(Config.export_contact_info))
+        f.write("license: {}\n".format(Config.export_license))
+        f.write("language: {}\n".format(Config.export_language))
+        f.write("runtime: {}\n".format(model_runtime))
+        f.write("min_stt_version: {}\n".format(Config.export_min_stt_version))
+        f.write("max_stt_version: {}\n".format(Config.export_max_stt_version))
+        f.write(
+            "acoustic_model_url: <replace this with a publicly available URL of the acoustic model>\n"
+        )
+        f.write(
+            "scorer_url: <replace this with a publicly available URL of the scorer, if present>\n"
+        )
+        f.write("---\n")
+        f.write("{}\n".format(Config.export_description))
+
+    log_info(
+        "Model metadata file saved to {}. Before submitting the exported model for publishing make sure all information in the metadata file is correct, and complete the URL fields.".format(
+            metadata_fname
+        )
+    )
+
+
+def package_zip():
+    # --export_dir path/to/export/LANG_CODE/ => path/to/export/LANG_CODE.zip
+    export_dir = os.path.join(
+        os.path.abspath(Config.export_dir), ""
+    )  # Force ending '/'
+    if is_remote_path(export_dir):
+        log_error(
+            "Cannot package remote path zip %s. Please do this manually." % export_dir
+        )
+        return
+
+    zip_filename = os.path.dirname(export_dir)
+
+    shutil.copy(Config.scorer_path, export_dir)
+
+    archive = shutil.make_archive(zip_filename, "zip", export_dir)
+    log_info("Exported packaged model {}".format(archive))
+
+
+def main(_):
+    initialize_globals_from_cli()
+
+    if not Config.export_dir:
+        raise RuntimeError(
+            "Calling export script directly but no --export_dir specified"
+        )
+
+    if not Config.export_zip:
+        # Export to folder
+        export()
+    else:
+        if listdir_remote(Config.export_dir):
+            raise RuntimeError(
+                "Directory {} is not empty, please fix this.".format(Config.export_dir)
+            )
+
+        export()
+        package_zip()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/training/coqui_stt_training/train.py b/training/coqui_stt_training/train.py
index acfe15e2..6032184c 100644
--- a/training/coqui_stt_training/train.py
+++ b/training/coqui_stt_training/train.py
@@ -14,12 +14,13 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = DESIRED_LOG_LEVEL
 import json
 import shutil
 import time
+from datetime import datetime
 
 import numpy as np
 import progressbar
 import tensorflow.compat.v1 as tfv1
-
 import tensorflow as tf
+from coqui_stt_ctcdecoder import Scorer
 
 tfv1.logging.set_verbosity(
     {
@@ -30,12 +31,15 @@ tfv1.logging.set_verbosity(
     }.get(DESIRED_LOG_LEVEL)
 )
 
-from datetime import datetime
 
-from coqui_stt_ctcdecoder import Scorer, ctc_beam_search_decoder
-from six.moves import range, zip
-
-from .evaluate import evaluate
+from . import evaluate
+from . import export
+from . import training_graph_inference
+from .deepspeech_model import (
+    create_model,
+    rnn_impl_lstmblockfusedcell,
+    rnn_impl_cudnn_rnn,
+)
 from .util.augmentations import NormalizeSampleRate
 from .util.checkpoints import (
     load_graph_for_evaluation,
@@ -52,260 +56,16 @@ from .util.config import (
     log_progress,
     log_warn,
 )
-from .util.evaluate_tools import save_samples_json
-from .util.feeding import audio_to_features, audiofile_to_features, create_dataset
+from .util.feeding import create_dataset
 from .util.helpers import ExceptionBox, check_ctcdecoder_version
 from .util.io import (
     is_remote_path,
-    isdir_remote,
-    listdir_remote,
     open_remote,
     remove_remote,
 )
 
 check_ctcdecoder_version()
 
-# Graph Creation
-# ==============
-
-
-def variable_on_cpu(name, shape, initializer):
-    r"""
-    Next we concern ourselves with graph creation.
-    However, before we do so we must introduce a utility function ``variable_on_cpu()``
-    used to create a variable in CPU memory.
-    """
-    # Use the /cpu:0 device for scoped operations
-    with tf.device(Config.cpu_device):
-        # Create or get apropos variable
-        var = tfv1.get_variable(name=name, shape=shape, initializer=initializer)
-    return var
-
-
-def create_overlapping_windows(batch_x):
-    batch_size = tf.shape(input=batch_x)[0]
-    window_width = 2 * Config.n_context + 1
-    num_channels = Config.n_input
-
-    # Create a constant convolution filter using an identity matrix, so that the
-    # convolution returns patches of the input tensor as is, and we can create
-    # overlapping windows over the MFCCs.
-    eye_filter = tf.constant(
-        np.eye(window_width * num_channels).reshape(
-            window_width, num_channels, window_width * num_channels
-        ),
-        tf.float32,
-    )  # pylint: disable=bad-continuation
-
-    # Create overlapping windows
-    batch_x = tf.nn.conv1d(input=batch_x, filters=eye_filter, stride=1, padding="SAME")
-
-    # Remove dummy depth dimension and reshape into [batch_size, n_windows, window_width, n_input]
-    batch_x = tf.reshape(batch_x, [batch_size, -1, window_width, num_channels])
-
-    return batch_x
-
-
-def dense(name, x, units, dropout_rate=None, relu=True, layer_norm=False):
-    with tfv1.variable_scope(name):
-        bias = variable_on_cpu("bias", [units], tfv1.zeros_initializer())
-        weights = variable_on_cpu(
-            "weights",
-            [x.shape[-1], units],
-            tfv1.keras.initializers.VarianceScaling(
-                scale=1.0, mode="fan_avg", distribution="uniform"
-            ),
-        )
-
-    output = tf.nn.bias_add(tf.matmul(x, weights), bias)
-
-    if relu:
-        output = tf.minimum(tf.nn.relu(output), Config.relu_clip)
-
-    if layer_norm:
-        with tfv1.variable_scope(name):
-            output = tf.contrib.layers.layer_norm(output)
-
-    if dropout_rate is not None:
-        output = tf.nn.dropout(output, rate=dropout_rate)
-
-    return output
-
-
-def rnn_impl_lstmblockfusedcell(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope("cudnn_lstm/rnn/multi_rnn_cell/cell_0"):
-        fw_cell = tf.contrib.rnn.LSTMBlockFusedCell(
-            Config.n_cell_dim,
-            forget_bias=0,
-            reuse=reuse,
-            name="cudnn_compatible_lstm_cell",
-        )
-
-        output, output_state = fw_cell(
-            inputs=x,
-            dtype=tf.float32,
-            sequence_length=seq_length,
-            initial_state=previous_state,
-        )
-
-    return output, output_state
-
-
-def rnn_impl_cudnn_rnn(x, seq_length, previous_state, _):
-    assert (
-        previous_state is None
-    )  # 'Passing previous state not supported with CuDNN backend'
-
-    # Hack: CudnnLSTM works similarly to Keras layers in that when you instantiate
-    # the object it creates the variables, and then you just call it several times
-    # to enable variable re-use. Because all of our code is structure in an old
-    # school TensorFlow structure where you can just call tf.get_variable again with
-    # reuse=True to reuse variables, we can't easily make use of the object oriented
-    # way CudnnLSTM is implemented, so we save a singleton instance in the function,
-    # emulating a static function variable.
-    if not rnn_impl_cudnn_rnn.cell:
-        # Forward direction cell:
-        fw_cell = tf.contrib.cudnn_rnn.CudnnLSTM(
-            num_layers=1,
-            num_units=Config.n_cell_dim,
-            input_mode="linear_input",
-            direction="unidirectional",
-            dtype=tf.float32,
-        )
-        rnn_impl_cudnn_rnn.cell = fw_cell
-
-    output, output_state = rnn_impl_cudnn_rnn.cell(
-        inputs=x, sequence_lengths=seq_length
-    )
-
-    return output, output_state
-
-
-rnn_impl_cudnn_rnn.cell = None
-
-
-def rnn_impl_static_rnn(x, seq_length, previous_state, reuse):
-    with tfv1.variable_scope("cudnn_lstm/rnn/multi_rnn_cell"):
-        # Forward direction cell:
-        fw_cell = tfv1.nn.rnn_cell.LSTMCell(
-            Config.n_cell_dim,
-            forget_bias=0,
-            reuse=reuse,
-            name="cudnn_compatible_lstm_cell",
-        )
-
-        # Split rank N tensor into list of rank N-1 tensors
-        x = [x[l] for l in range(x.shape[0])]
-
-        output, output_state = tfv1.nn.static_rnn(
-            cell=fw_cell,
-            inputs=x,
-            sequence_length=seq_length,
-            initial_state=previous_state,
-            dtype=tf.float32,
-            scope="cell_0",
-        )
-
-        output = tf.concat(output, 0)
-
-    return output, output_state
-
-
-def create_model(
-    batch_x,
-    seq_length,
-    dropout,
-    reuse=False,
-    batch_size=None,
-    previous_state=None,
-    overlap=True,
-    rnn_impl=rnn_impl_lstmblockfusedcell,
-):
-    layers = {}
-
-    # Input shape: [batch_size, n_steps, n_input + 2*n_input*n_context]
-    if not batch_size:
-        batch_size = tf.shape(input=batch_x)[0]
-
-    # Create overlapping feature windows if needed
-    if overlap:
-        batch_x = create_overlapping_windows(batch_x)
-
-    # Reshaping `batch_x` to a tensor with shape `[n_steps*batch_size, n_input + 2*n_input*n_context]`.
-    # This is done to prepare the batch for input into the first layer which expects a tensor of rank `2`.
-
-    # Permute n_steps and batch_size
-    batch_x = tf.transpose(a=batch_x, perm=[1, 0, 2, 3])
-    # Reshape to prepare input for first layer
-    batch_x = tf.reshape(
-        batch_x, [-1, Config.n_input + 2 * Config.n_input * Config.n_context]
-    )  # (n_steps*batch_size, n_input + 2*n_input*n_context)
-    layers["input_reshaped"] = batch_x
-
-    # The next three blocks will pass `batch_x` through three hidden layers with
-    # clipped RELU activation and dropout.
-    layers["layer_1"] = layer_1 = dense(
-        "layer_1",
-        batch_x,
-        Config.n_hidden_1,
-        dropout_rate=dropout[0],
-        layer_norm=Config.layer_norm,
-    )
-    layers["layer_2"] = layer_2 = dense(
-        "layer_2",
-        layer_1,
-        Config.n_hidden_2,
-        dropout_rate=dropout[1],
-        layer_norm=Config.layer_norm,
-    )
-    layers["layer_3"] = layer_3 = dense(
-        "layer_3",
-        layer_2,
-        Config.n_hidden_3,
-        dropout_rate=dropout[2],
-        layer_norm=Config.layer_norm,
-    )
-
-    # `layer_3` is now reshaped into `[n_steps, batch_size, 2*n_cell_dim]`,
-    # as the LSTM RNN expects its input to be of shape `[max_time, batch_size, input_size]`.
-    layer_3 = tf.reshape(layer_3, [-1, batch_size, Config.n_hidden_3])
-
-    # Run through parametrized RNN implementation, as we use different RNNs
-    # for training and inference
-    output, output_state = rnn_impl(layer_3, seq_length, previous_state, reuse)
-
-    # Reshape output from a tensor of shape [n_steps, batch_size, n_cell_dim]
-    # to a tensor of shape [n_steps*batch_size, n_cell_dim]
-    output = tf.reshape(output, [-1, Config.n_cell_dim])
-    layers["rnn_output"] = output
-    layers["rnn_output_state"] = output_state
-
-    # Now we feed `output` to the fifth hidden layer with clipped RELU activation
-    layers["layer_5"] = layer_5 = dense(
-        "layer_5",
-        output,
-        Config.n_hidden_5,
-        dropout_rate=dropout[5],
-        layer_norm=Config.layer_norm,
-    )
-
-    # Now we apply a final linear layer creating `n_classes` dimensional vectors, the logits.
-    layers["layer_6"] = layer_6 = dense(
-        "layer_6", layer_5, Config.n_hidden_6, relu=False
-    )
-
-    # Finally we reshape layer_6 from a tensor of shape [n_steps*batch_size, n_hidden_6]
-    # to the slightly more useful shape [n_steps, batch_size, n_hidden_6].
-    # Note, that this differs from the input in that it is time-major.
-    layer_6 = tf.reshape(
-        layer_6, [-1, batch_size, Config.n_hidden_6], name="raw_logits"
-    )
-    layers["raw_logits"] = layer_6
-
-    # Output shape: [n_steps, batch_size, n_hidden_6]
-    return layer_6, layers
-
-
 # Accuracy and Loss
 # =================
 
@@ -900,371 +660,6 @@ def train():
     log_debug("Session closed.")
 
 
-def test():
-    tfv1.reset_default_graph()
-
-    samples = evaluate(Config.test_files, create_model)
-    if Config.test_output_file:
-        save_samples_json(samples, Config.test_output_file)
-
-
-def create_inference_graph(batch_size=1, n_steps=16, tflite=False):
-    batch_size = batch_size if batch_size > 0 else None
-
-    # Create feature computation graph
-
-    # native_client: this node's name and shape are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    input_samples = tfv1.placeholder(
-        tf.float32, [Config.audio_window_samples], "input_samples"
-    )
-    samples = tf.expand_dims(input_samples, -1)
-    mfccs, _ = audio_to_features(samples, Config.audio_sample_rate)
-    # native_client: this node's name and shape are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    mfccs = tf.identity(mfccs, name="mfccs")
-
-    # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input]
-    # This shape is read by the native_client in STT_CreateModel to know the
-    # value of n_steps, n_context and n_input. Make sure you update the code
-    # there if this shape is changed.
-    #
-    # native_client: this node's name and shape are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    input_tensor = tfv1.placeholder(
-        tf.float32,
-        [
-            batch_size,
-            n_steps if n_steps > 0 else None,
-            2 * Config.n_context + 1,
-            Config.n_input,
-        ],
-        name="input_node",
-    )
-    # native_client: this node's name and shape are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    seq_length = tfv1.placeholder(tf.int32, [batch_size], name="input_lengths")
-
-    if batch_size <= 0:
-        # no state management since n_step is expected to be dynamic too (see below)
-        previous_state = None
-    else:
-        # native_client: this node's name and shape are part of the API boundary
-        #   with the native client, if you change them you should sync changes with
-        #   the C++ code.
-        previous_state_c = tfv1.placeholder(
-            tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_c"
-        )
-        # native_client: this node's name and shape are part of the API boundary
-        #   with the native client, if you change them you should sync changes with
-        #   the C++ code.
-        previous_state_h = tfv1.placeholder(
-            tf.float32, [batch_size, Config.n_cell_dim], name="previous_state_h"
-        )
-
-        previous_state = tf.nn.rnn_cell.LSTMStateTuple(
-            previous_state_c, previous_state_h
-        )
-
-    # One rate per layer
-    no_dropout = [None] * 6
-
-    if tflite:
-        rnn_impl = rnn_impl_static_rnn
-    else:
-        rnn_impl = rnn_impl_lstmblockfusedcell
-
-    logits, layers = create_model(
-        batch_x=input_tensor,
-        batch_size=batch_size,
-        seq_length=seq_length if not Config.export_tflite else None,
-        dropout=no_dropout,
-        previous_state=previous_state,
-        overlap=False,
-        rnn_impl=rnn_impl,
-    )
-
-    # TF Lite runtime will check that input dimensions are 1, 2 or 4
-    # by default we get 3, the middle one being batch_size which is forced to
-    # one on inference graph, so remove that dimension
-    #
-    # native_client: this node's name and shape are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    if tflite:
-        logits = tf.squeeze(logits, [1])
-
-    # Apply softmax for CTC decoder
-    probs = tf.nn.softmax(logits, name="logits")
-
-    if batch_size <= 0:
-        if tflite:
-            raise NotImplementedError(
-                "dynamic batch_size does not support tflite nor streaming"
-            )
-        if n_steps > 0:
-            raise NotImplementedError(
-                "dynamic batch_size expect n_steps to be dynamic too"
-            )
-        return (
-            {
-                "input": input_tensor,
-                "input_lengths": seq_length,
-            },
-            {
-                "outputs": probs,
-            },
-            layers,
-        )
-
-    new_state_c, new_state_h = layers["rnn_output_state"]
-    new_state_c = tf.identity(new_state_c, name="new_state_c")
-    new_state_h = tf.identity(new_state_h, name="new_state_h")
-
-    inputs = {
-        "input": input_tensor,
-        "previous_state_c": previous_state_c,
-        "previous_state_h": previous_state_h,
-        "input_samples": input_samples,
-    }
-
-    if not Config.export_tflite:
-        inputs["input_lengths"] = seq_length
-
-    outputs = {
-        "outputs": probs,
-        "new_state_c": new_state_c,
-        "new_state_h": new_state_h,
-        "mfccs": mfccs,
-        # Expose internal layers for downstream applications
-        "layer_3": layers["layer_3"],
-        "layer_5": layers["layer_5"],
-    }
-
-    return inputs, outputs, layers
-
-
-def file_relative_read(fname):
-    return open(os.path.join(os.path.dirname(__file__), fname)).read()
-
-
-def export():
-    r"""
-    Restores the trained variables into a simpler graph that will be exported for serving.
-    """
-    log_info("Exporting the model...")
-
-    tfv1.reset_default_graph()
-
-    inputs, outputs, _ = create_inference_graph(
-        batch_size=Config.export_batch_size,
-        n_steps=Config.n_steps,
-        tflite=Config.export_tflite,
-    )
-
-    graph_version = int(file_relative_read("GRAPH_VERSION").strip())
-    assert graph_version > 0
-
-    # native_client: these nodes's names and shapes are part of the API boundary
-    #   with the native client, if you change them you should sync changes with
-    #   the C++ code.
-    outputs["metadata_version"] = tf.constant([graph_version], name="metadata_version")
-    outputs["metadata_sample_rate"] = tf.constant(
-        [Config.audio_sample_rate], name="metadata_sample_rate"
-    )
-    outputs["metadata_feature_win_len"] = tf.constant(
-        [Config.feature_win_len], name="metadata_feature_win_len"
-    )
-    outputs["metadata_feature_win_step"] = tf.constant(
-        [Config.feature_win_step], name="metadata_feature_win_step"
-    )
-    outputs["metadata_beam_width"] = tf.constant(
-        [Config.export_beam_width], name="metadata_beam_width"
-    )
-    outputs["metadata_alphabet"] = tf.constant(
-        [Config.alphabet.Serialize()], name="metadata_alphabet"
-    )
-
-    if Config.export_language:
-        outputs["metadata_language"] = tf.constant(
-            [Config.export_language.encode("utf-8")], name="metadata_language"
-        )
-
-    # Prevent further graph changes
-    tfv1.get_default_graph().finalize()
-
-    output_names_tensors = [
-        tensor.op.name for tensor in outputs.values() if isinstance(tensor, tf.Tensor)
-    ]
-    output_names_ops = [
-        op.name for op in outputs.values() if isinstance(op, tf.Operation)
-    ]
-    output_names = output_names_tensors + output_names_ops
-
-    with tf.Session() as session:
-        # Restore variables from checkpoint
-        load_graph_for_evaluation(session)
-
-        output_filename = Config.export_file_name + ".pb"
-        if Config.remove_export:
-            if isdir_remote(Config.export_dir):
-                log_info("Removing old export")
-                remove_remote(Config.export_dir)
-
-        output_graph_path = os.path.join(Config.export_dir, output_filename)
-
-        if not is_remote_path(Config.export_dir) and not os.path.isdir(
-            Config.export_dir
-        ):
-            os.makedirs(Config.export_dir)
-
-        frozen_graph = tfv1.graph_util.convert_variables_to_constants(
-            sess=session,
-            input_graph_def=tfv1.get_default_graph().as_graph_def(),
-            output_node_names=output_names,
-        )
-
-        frozen_graph = tfv1.graph_util.extract_sub_graph(
-            graph_def=frozen_graph, dest_nodes=output_names
-        )
-
-        if not Config.export_tflite:
-            with open_remote(output_graph_path, "wb") as fout:
-                fout.write(frozen_graph.SerializeToString())
-        else:
-            output_tflite_path = os.path.join(
-                Config.export_dir, output_filename.replace(".pb", ".tflite")
-            )
-
-            converter = tf.lite.TFLiteConverter(
-                frozen_graph,
-                input_tensors=inputs.values(),
-                output_tensors=outputs.values(),
-            )
-
-            if Config.export_quantize:
-                converter.optimizations = [tf.lite.Optimize.DEFAULT]
-
-            # AudioSpectrogram and Mfcc ops are custom but have built-in kernels in TFLite
-            converter.allow_custom_ops = True
-            tflite_model = converter.convert()
-
-            with open_remote(output_tflite_path, "wb") as fout:
-                fout.write(tflite_model)
-
-        log_info("Models exported at %s" % (Config.export_dir))
-
-    metadata_fname = os.path.join(
-        Config.export_dir,
-        "{}_{}_{}.md".format(
-            Config.export_author_id,
-            Config.export_model_name,
-            Config.export_model_version,
-        ),
-    )
-
-    model_runtime = "tflite" if Config.export_tflite else "tensorflow"
-    with open_remote(metadata_fname, "w") as f:
-        f.write("---\n")
-        f.write("author: {}\n".format(Config.export_author_id))
-        f.write("model_name: {}\n".format(Config.export_model_name))
-        f.write("model_version: {}\n".format(Config.export_model_version))
-        f.write("contact_info: {}\n".format(Config.export_contact_info))
-        f.write("license: {}\n".format(Config.export_license))
-        f.write("language: {}\n".format(Config.export_language))
-        f.write("runtime: {}\n".format(model_runtime))
-        f.write("min_stt_version: {}\n".format(Config.export_min_stt_version))
-        f.write("max_stt_version: {}\n".format(Config.export_max_stt_version))
-        f.write(
-            "acoustic_model_url: <replace this with a publicly available URL of the acoustic model>\n"
-        )
-        f.write(
-            "scorer_url: <replace this with a publicly available URL of the scorer, if present>\n"
-        )
-        f.write("---\n")
-        f.write("{}\n".format(Config.export_description))
-
-    log_info(
-        "Model metadata file saved to {}. Before submitting the exported model for publishing make sure all information in the metadata file is correct, and complete the URL fields.".format(
-            metadata_fname
-        )
-    )
-
-
-def package_zip():
-    # --export_dir path/to/export/LANG_CODE/ => path/to/export/LANG_CODE.zip
-    export_dir = os.path.join(
-        os.path.abspath(Config.export_dir), ""
-    )  # Force ending '/'
-    if is_remote_path(export_dir):
-        log_error(
-            "Cannot package remote path zip %s. Please do this manually." % export_dir
-        )
-        return
-
-    zip_filename = os.path.dirname(export_dir)
-
-    shutil.copy(Config.scorer_path, export_dir)
-
-    archive = shutil.make_archive(zip_filename, "zip", export_dir)
-    log_info("Exported packaged model {}".format(archive))
-
-
-def do_single_file_inference(input_file_path):
-    tfv1.reset_default_graph()
-
-    with tfv1.Session(config=Config.session_config) as session:
-        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)
-
-        # Restore variables from training checkpoint
-        load_graph_for_evaluation(session)
-
-        features, features_len = audiofile_to_features(input_file_path)
-        previous_state_c = np.zeros([1, Config.n_cell_dim])
-        previous_state_h = np.zeros([1, Config.n_cell_dim])
-
-        # Add batch dimension
-        features = tf.expand_dims(features, 0)
-        features_len = tf.expand_dims(features_len, 0)
-
-        # Evaluate
-        features = create_overlapping_windows(features).eval(session=session)
-        features_len = features_len.eval(session=session)
-
-        probs = outputs["outputs"].eval(
-            feed_dict={
-                inputs["input"]: features,
-                inputs["input_lengths"]: features_len,
-                inputs["previous_state_c"]: previous_state_c,
-                inputs["previous_state_h"]: previous_state_h,
-            },
-            session=session,
-        )
-
-        probs = np.squeeze(probs)
-
-        if Config.scorer_path:
-            scorer = Scorer(
-                Config.lm_alpha, Config.lm_beta, Config.scorer_path, Config.alphabet
-            )
-        else:
-            scorer = None
-        decoded = ctc_beam_search_decoder(
-            probs,
-            Config.alphabet,
-            Config.beam_width,
-            scorer=scorer,
-            cutoff_prob=Config.cutoff_prob,
-            cutoff_top_n=Config.cutoff_top_n,
-        )
-        # Print highest probability result
-        print(decoded[0][1])
-
-
 def early_training_checks():
     # Check for proper scorer early
     if Config.scorer_path:
@@ -1289,36 +684,47 @@ def early_training_checks():
         )
 
     if not Config.alphabet_config_path and not Config.bytes_output_mode:
-        log_error("Missing --alphabet_config_path flag, can't continue")
-        sys.exit(1)
+        raise RuntimeError("Missing --alphabet_config_path flag, can't continue")
 
 
 def main():
     initialize_globals_from_cli()
 
+    def deprecated_msg(prefix):
+        return (
+            f"{prefix} Using the training script as a generic driver for all training "
+            "related functionality is deprecated and will be removed soon. Use "
+            "the specific scripts: train.py/evaluate.py/export.py/training_graph_inference.py."
+        )
+
     if Config.train_files:
         train()
+    else:
+        log_warn(deprecated_msg("Calling training script without --train_files."))
 
     if Config.test_files:
-        test()
-
-    if Config.export_dir and not Config.export_zip:
-        export()
-
-    if Config.export_zip:
-        Config.export_tflite = True
-
-        if listdir_remote(Config.export_dir):
-            log_error(
-                "Directory {} is not empty, please fix this.".format(Config.export_dir)
+        log_warn(
+            deprecated_msg(
+                "Specifying --test_files when calling train.py script. Use evaluate.py."
             )
-            sys.exit(1)
+        )
+        evaluate.test()
 
-        export()
-        package_zip()
+    if Config.export_dir:
+        log_warn(
+            deprecated_msg(
+                "Specifying --export_dir when calling train.py script. Use export.py."
+            )
+        )
+        export.export()
 
     if Config.one_shot_infer:
-        do_single_file_inference(Config.one_shot_infer)
+        log_warn(
+            deprecated_msg(
+                "Specifying --one_shot_infer when calling train.py script. Use training_graph_inference.py."
+            )
+        )
+        traning_graph_inference.do_single_file_inference(Config.one_shot_infer)
 
 
 if __name__ == "__main__":
diff --git a/training/coqui_stt_training/training_graph_inference.py b/training/coqui_stt_training/training_graph_inference.py
new file mode 100644
index 00000000..b5399a91
--- /dev/null
+++ b/training/coqui_stt_training/training_graph_inference.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os
+import sys
+
+LOG_LEVEL_INDEX = sys.argv.index("--log_level") + 1 if "--log_level" in sys.argv else 0
+DESIRED_LOG_LEVEL = (
+    sys.argv[LOG_LEVEL_INDEX] if 0 < LOG_LEVEL_INDEX < len(sys.argv) else "3"
+)
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = DESIRED_LOG_LEVEL
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.compat.v1 as tfv1
+
+from coqui_stt_ctcdecoder import ctc_beam_search_decoder, Scorer
+from .deepspeech_model import create_inference_graph, create_overlapping_windows
+from .util.checkpoints import load_graph_for_evaluation
+from .util.config import Config, initialize_globals_from_cli, log_error
+from .util.feeding import audiofile_to_features
+
+
+def do_single_file_inference(input_file_path):
+    tfv1.reset_default_graph()
+
+    with tfv1.Session(config=Config.session_config) as session:
+        inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1)
+
+        # Restore variables from training checkpoint
+        load_graph_for_evaluation(session)
+
+        features, features_len = audiofile_to_features(input_file_path)
+        previous_state_c = np.zeros([1, Config.n_cell_dim])
+        previous_state_h = np.zeros([1, Config.n_cell_dim])
+
+        # Add batch dimension
+        features = tf.expand_dims(features, 0)
+        features_len = tf.expand_dims(features_len, 0)
+
+        # Evaluate
+        features = create_overlapping_windows(features).eval(session=session)
+        features_len = features_len.eval(session=session)
+
+        probs = outputs["outputs"].eval(
+            feed_dict={
+                inputs["input"]: features,
+                inputs["input_lengths"]: features_len,
+                inputs["previous_state_c"]: previous_state_c,
+                inputs["previous_state_h"]: previous_state_h,
+            },
+            session=session,
+        )
+
+        probs = np.squeeze(probs)
+
+        if Config.scorer_path:
+            scorer = Scorer(
+                Config.lm_alpha, Config.lm_beta, Config.scorer_path, Config.alphabet
+            )
+        else:
+            scorer = None
+        decoded = ctc_beam_search_decoder(
+            probs,
+            Config.alphabet,
+            Config.beam_width,
+            scorer=scorer,
+            cutoff_prob=Config.cutoff_prob,
+            cutoff_top_n=Config.cutoff_top_n,
+        )
+        # Print highest probability result
+        print(decoded[0][1])
+
+
+def main():
+    initialize_globals_from_cli()
+
+    if Config.one_shot_infer:
+        tfv1.reset_default_graph()
+        do_single_file_inference(Config.one_shot_infer)
+    else:
+        raise RuntimeError(
+            "Calling training_graph_inference script directly but no --one_shot_infer input audio file specified"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/training/coqui_stt_training/util/config.py b/training/coqui_stt_training/util/config.py
index 34794f1b..96114e25 100755
--- a/training/coqui_stt_training/util/config.py
+++ b/training/coqui_stt_training/util/config.py
@@ -477,7 +477,7 @@ class _SttConfig(Coqpit):
         default=False, metadata=dict(help="whether to remove old exported models")
     )
     export_tflite: bool = field(
-        default=False, metadata=dict(help="export a graph ready for TF Lite engine")
+        default=True, metadata=dict(help="export a graph ready for TF Lite engine")
     )
     export_quantize: bool = field(
         default=True,
diff --git a/training/coqui_stt_training/util/io.py b/training/coqui_stt_training/util/io.py
index a3fb3368..6d466631 100644
--- a/training/coqui_stt_training/util/io.py
+++ b/training/coqui_stt_training/util/io.py
@@ -90,3 +90,10 @@ def remove_remote(filename):
     """
     # Conditional import
     return gfile.remove(filename)
+
+
+def rmtree_remote(foldername):
+    """
+    Wrapper that can remove local and remote directories like `gs://...`
+    """
+    return gfile.rmtree(foldername)