From 2835151951094e167d5d2c767f01f5f2337c1cbb Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Mon, 27 Jul 2020 21:09:32 +0200
Subject: [PATCH 1/2] Remove external scorer file and documentation and flag
 references

---
 data/README.rst                            | 2 +-
 data/lm/kenlm.scorer                       | 3 ---
 doc/BUILDING.rst                           | 5 +++--
 training/deepspeech_training/util/flags.py | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)
 delete mode 100644 data/lm/kenlm.scorer

diff --git a/data/README.rst b/data/README.rst
index 3a60ea5a..f731a31c 100644
--- a/data/README.rst
+++ b/data/README.rst
@@ -5,7 +5,7 @@ This directory contains language-specific data files. Most importantly, you will
 
 1. A list of unique characters for the target language (e.g. English) in ``data/alphabet.txt``. After installing the training code, you can check ``python -m deepspeech_training.util.check_characters --help`` for a tool that creates an alphabet file from a list of training CSV files.
 
-2. A scorer package (``data/lm/kenlm.scorer``) generated with ``generate_scorer_package`` (``native_client/generate_scorer_package.cpp``). The scorer package includes a binary n-gram language model generated with ``data/lm/generate_lm.py``.
+2. A script used to generate a binary n-gram language model: ``data/lm/generate_lm.py``.
 
 For more information on how to build these resources from scratch, see the ``External scorer scripts`` section on `deepspeech.readthedocs.io <https://deepspeech.readthedocs.io/>`_.
 
diff --git a/data/lm/kenlm.scorer b/data/lm/kenlm.scorer
deleted file mode 100644
index d8581c05..00000000
--- a/data/lm/kenlm.scorer
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d0cf926ab9cab54a8a7d70003b931b2d62ebd9105ed392d1ec9c840029867799
-size 953363776
diff --git a/doc/BUILDING.rst b/doc/BUILDING.rst
index bcc4d374..4d25359a 100644
--- a/doc/BUILDING.rst
+++ b/doc/BUILDING.rst
@@ -282,8 +282,9 @@ Please push DeepSpeech data to ``/sdcard/deepspeech/``\ , including:
 
 
 * ``output_graph.tflite`` which is the TF Lite model
-* ``kenlm.scorer``, if you want to use the scorer; please be aware that too big
-  scorer will make the device run out of memory
+* External scorer file (available from one of our releases), if you want to use
+  the scorer; please be aware that too big scorer will make the device run out
+  of memory
 
 Then, push binaries from ``native_client.tar.xz`` to ``/data/local/tmp/ds``\ :
 
diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py
index 128441fd..e5ad8758 100644
--- a/training/deepspeech_training/util/flags.py
+++ b/training/deepspeech_training/util/flags.py
@@ -157,7 +157,7 @@ def create_flags():
 
     f.DEFINE_boolean('utf8', False, 'enable UTF-8 mode. When this is used the model outputs UTF-8 sequences directly rather than using an alphabet mapping.')
     f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.')
-    f.DEFINE_string('scorer_path', 'data/lm/kenlm.scorer', 'path to the external scorer file.')
+    f.DEFINE_string('scorer_path', '', 'path to the external scorer file.')
     f.DEFINE_alias('scorer', 'scorer_path')
     f.DEFINE_integer('beam_width', 1024, 'beam width used in the CTC decoder when building candidate transcriptions')
     f.DEFINE_float('lm_alpha', 0.931289039105002, 'the alpha hyperparameter of the CTC decoder. Language Model weight.')

From 216da91842ca38a4c56f0f8412eee7fabb89f322 Mon Sep 17 00:00:00 2001
From: Reuben Morais <reuben.morais@gmail.com>
Date: Tue, 28 Jul 2020 11:05:10 +0200
Subject: [PATCH 2/2] Remove Git LFS from docs

---
 Dockerfile.build.tmpl           | 1 -
 Dockerfile.train.tmpl           | 2 --
 doc/TRAINING.rst                | 4 +---
 native_client/dotnet/README.rst | 1 -
 4 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/Dockerfile.build.tmpl b/Dockerfile.build.tmpl
index d5f24344..58bea150 100644
--- a/Dockerfile.build.tmpl
+++ b/Dockerfile.build.tmpl
@@ -19,7 +19,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         g++ \
         gcc \
         git \
-        git-lfs \
         libbz2-dev \
         libboost-all-dev \
         libgsm1-dev \
diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl
index cdcf1d3c..e3b47795 100644
--- a/Dockerfile.train.tmpl
+++ b/Dockerfile.train.tmpl
@@ -13,7 +13,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         cmake \
         curl \
         git \
-        git-lfs \
         libboost-all-dev \
         libbz2-dev \
         locales \
@@ -32,7 +31,6 @@ RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
 RUN rm -rf /var/lib/apt/lists/*
 
 WORKDIR /
-RUN git lfs install
 RUN git clone $DEEPSPEECH_REPO
 
 WORKDIR /DeepSpeech
diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst
index 0463ba26..7de40e6a 100644
--- a/doc/TRAINING.rst
+++ b/doc/TRAINING.rst
@@ -6,15 +6,13 @@ Training Your Own Model
 Prerequisites for training a model
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-
 * `Python 3.6 <https://www.python.org/>`_
-* `Git Large File Storage <https://git-lfs.github.com/>`_
 * Mac or Linux environment
 
 Getting the training code
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Install `Git Large File Storage <https://git-lfs.github.com/>`_ either manually or through a package-manager if available on your system. Then clone the DeepSpeech repository normally:
+Clone the DeepSpeech repository:
 
 .. code-block:: bash
 
diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst
index 9f50f446..b1025573 100644
--- a/native_client/dotnet/README.rst
+++ b/native_client/dotnet/README.rst
@@ -31,7 +31,6 @@ Prerequisites
 * Windows 10
 * `Windows 10 SDK <https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk>`_
 * `Visual Studio 2019 Community <https://visualstudio.microsoft.com/vs/community/>`_ 
-* `Git Large File Storage <https://git-lfs.github.com/>`_
 * `TensorFlow Windows pre-requisites <https://www.tensorflow.org/install/source_windows>`_
 
 Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2019 v16.00 (v160) toolset for desktop``.