Fixes for Dockerfile.{train,build} and adjust instructions for new image
This commit is contained in:
parent
1029d06a23
commit
214a150c19
2
.gitignore
vendored
2
.gitignore
vendored
@ -32,8 +32,6 @@
|
|||||||
/doc/.build/
|
/doc/.build/
|
||||||
/doc/xml-c/
|
/doc/xml-c/
|
||||||
/doc/xml-java/
|
/doc/xml-java/
|
||||||
Dockerfile.build
|
|
||||||
Dockerfile.train
|
|
||||||
doc/xml-c
|
doc/xml-c
|
||||||
doc/xml-java
|
doc/xml-java
|
||||||
doc/xml-dotnet
|
doc/xml-dotnet
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
# Need devel version cause we need /usr/include/cudnn.h
|
# Need devel version cause we need /usr/include/cudnn.h
|
||||||
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
|
||||||
|
|
||||||
ENV STT_REPO=#STT_REPO#
|
ARG STT_REPO=https://github.com/coqui-ai/STT.git
|
||||||
ENV STT_SHA=#STT_SHA#
|
ARG STT_SHA=origin/main
|
||||||
|
|
||||||
# >> START Install base software
|
# >> START Install base software
|
||||||
|
|
63
Dockerfile.train
Normal file
63
Dockerfile.train
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
||||||
|
|
||||||
|
FROM tensorflow/tensorflow:1.15.4-gpu-py3
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# We need to purge python3-xdg because it's breaking STT install later with
|
||||||
|
# weird errors about setuptools
|
||||||
|
#
|
||||||
|
# libopus0 and libsndfile1 are dependencies for audio augmentation
|
||||||
|
#
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
curl \
|
||||||
|
git \
|
||||||
|
libboost-all-dev \
|
||||||
|
libbz2-dev \
|
||||||
|
libopus0 \
|
||||||
|
libsndfile1 \
|
||||||
|
unzip \
|
||||||
|
wget && \
|
||||||
|
apt-get purge -y python3-xdg && \
|
||||||
|
rm -rf /var/lib/apt/lists/
|
||||||
|
|
||||||
|
# Make sure pip and its deps are up-to-date
|
||||||
|
RUN pip3 install --upgrade pip wheel setuptools
|
||||||
|
|
||||||
|
WORKDIR /code
|
||||||
|
|
||||||
|
# Tool to convert output graph for inference
|
||||||
|
RUN wget https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip -O temp.zip && \
|
||||||
|
unzip temp.zip && rm temp.zip
|
||||||
|
|
||||||
|
COPY native_client /code/native_client
|
||||||
|
COPY .git /code/.git
|
||||||
|
COPY training/coqui_stt_training/VERSION /code/training/coqui_stt_training/VERSION
|
||||||
|
COPY training/coqui_stt_training/GRAPH_VERSION /code/training/coqui_stt_training/GRAPH_VERSION
|
||||||
|
|
||||||
|
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
||||||
|
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
||||||
|
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
||||||
|
|
||||||
|
# Install STT
|
||||||
|
# - No need for the decoder since we did it earlier
|
||||||
|
# - There is already correct TensorFlow GPU installed on the base image,
|
||||||
|
# we don't want to break that
|
||||||
|
COPY setup.py /code/setup.py
|
||||||
|
COPY VERSION /code/VERSION
|
||||||
|
COPY training /code/training
|
||||||
|
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
||||||
|
|
||||||
|
# Build KenLM to generate new scorers
|
||||||
|
COPY kenlm /code/kenlm
|
||||||
|
RUN cd /code/kenlm && \
|
||||||
|
mkdir -p build && \
|
||||||
|
cd build && \
|
||||||
|
cmake .. && \
|
||||||
|
make -j $(nproc)
|
||||||
|
|
||||||
|
# Copy rest of the code and test training
|
||||||
|
COPY . /code
|
||||||
|
RUN ./bin/run-ldc93s1.sh && rm -rf ~/.local/share/stt
|
@ -1,68 +0,0 @@
|
|||||||
# Please refer to the TRAINING documentation, "Basic Dockerfile for training"
|
|
||||||
|
|
||||||
FROM tensorflow/tensorflow:1.15.4-gpu-py3
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
|
|
||||||
ENV STT_REPO=#STT_REPO#
|
|
||||||
ENV STT_SHA=#STT_SHA#
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
apt-utils \
|
|
||||||
bash-completion \
|
|
||||||
build-essential \
|
|
||||||
cmake \
|
|
||||||
curl \
|
|
||||||
git \
|
|
||||||
libboost-all-dev \
|
|
||||||
libbz2-dev \
|
|
||||||
locales \
|
|
||||||
python3-venv \
|
|
||||||
unzip \
|
|
||||||
wget
|
|
||||||
|
|
||||||
# We need to remove it because it's breaking STT install later with
|
|
||||||
# weird errors about setuptools
|
|
||||||
RUN apt-get purge -y python3-xdg
|
|
||||||
|
|
||||||
# Install dependencies for audio augmentation
|
|
||||||
RUN apt-get install -y --no-install-recommends libopus0 libsndfile1
|
|
||||||
|
|
||||||
# Try and free some space
|
|
||||||
RUN rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
WORKDIR /
|
|
||||||
RUN git clone $STT_REPO STT
|
|
||||||
|
|
||||||
WORKDIR /STT
|
|
||||||
RUN git checkout $STT_SHA
|
|
||||||
|
|
||||||
# Build CTC decoder first, to avoid clashes on incompatible versions upgrades
|
|
||||||
RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
|
|
||||||
RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
|
|
||||||
|
|
||||||
# Prepare deps
|
|
||||||
RUN pip3 install --upgrade pip==20.2.2 wheel==0.34.2 setuptools==49.6.0
|
|
||||||
|
|
||||||
# Install STT
|
|
||||||
# - No need for the decoder since we did it earlier
|
|
||||||
# - There is already correct TensorFlow GPU installed on the base image,
|
|
||||||
# we don't want to break that
|
|
||||||
RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e .
|
|
||||||
|
|
||||||
# Tool to convert output graph for inference
|
|
||||||
RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \
|
|
||||||
--artifact convert_graphdef_memmapped_format --target .
|
|
||||||
|
|
||||||
# Build KenLM to generate new scorers
|
|
||||||
WORKDIR /STT/native_client
|
|
||||||
RUN rm -rf kenlm && \
|
|
||||||
git clone https://github.com/kpu/kenlm && \
|
|
||||||
cd kenlm && \
|
|
||||||
git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake .. && \
|
|
||||||
make -j $(nproc)
|
|
||||||
WORKDIR /STT
|
|
||||||
|
|
||||||
RUN ./bin/run-ldc93s1.sh
|
|
@ -181,17 +181,12 @@ Dockerfile for building from source
|
|||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
We provide ``Dockerfile.build`` to automatically build ``libstt.so``, the C++ native client, Python bindings, and KenLM.
|
We provide ``Dockerfile.build`` to automatically build ``libstt.so``, the C++ native client, Python bindings, and KenLM.
|
||||||
You need to generate the Dockerfile from the template using:
|
|
||||||
|
If you want to specify a different repository or branch, you can specify the ``STT_REPO`` or ``STT_SHA`` arguments:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
make Dockerfile.build
|
docker build . -f Dockerfile.build --build-arg STT_REPO=git://your/fork --build-arg STT_SHA=origin/your-branch
|
||||||
|
|
||||||
If you want to specify a different repository or branch, you can pass ``STT_REPO`` or ``STT_SHA`` parameters:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
make Dockerfile.build STT_REPO=git://your/fork STT_SHA=origin/your-branch
|
|
||||||
|
|
||||||
.. _runtime-deps:
|
.. _runtime-deps:
|
||||||
|
|
||||||
|
@ -88,18 +88,7 @@ Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems
|
|||||||
Basic Dockerfile for training
|
Basic Dockerfile for training
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. You need to generate the Dockerfile from the template using:
|
We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image. The image can be used with ``FROM ghcr.io/coqui-ai/stt-train``.
|
||||||
This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
make Dockerfile.train
|
|
||||||
|
|
||||||
If you want to specify a different 🐸STT repository / branch, you can pass ``STT_REPO`` or ``STT_SHA`` parameters:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
make Dockerfile.train STT_REPO=git://your/fork STT_SHA=origin/your-branch
|
|
||||||
|
|
||||||
Common Voice training data
|
Common Voice training data
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
Loading…
Reference in New Issue
Block a user