Decouple Dockerfile into build and train

2020-06-02 21:23:20 +02:00 · 2020-06-02 21:23:20 +02:00 · 4d541394e8
commit 4d541394e8
parent cbb9c28e2c
9 changed files with 163 additions and 108 deletions
--- a/Dockerfile.build.tmpl
+++ b/Dockerfile.build.tmpl
@ -1,68 +1,56 @@
 # Please refer to the USING documentation, "Dockerfile for building from source"
 # Need devel version cause we need /usr/include/cudnn.h 
 # for compiling libctc_decoder_with_kenlm.so
 FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
 ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
 ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
 # >> START Install base software
 # Get basic packages
 RUN apt-get update && apt-get install -y --no-install-recommends \
        apt-utils \
        bash-completion \
        build-essential \
        ca-certificates \
        cmake \
        curl \
-        wget \
+        g++ \
        gcc \
        git \
        git-lfs \
        libbz2-dev \
        libboost-all-dev \
        libgsm1-dev \
        libltdl-dev \
        liblzma-dev \
        libmagic-dev \
        libpng-dev \
        libsox-fmt-mp3 \
        libsox-dev \
        locales \
        openjdk-8-jdk \
        pkg-config \
        python3 \
        python3-dev \
        python3-pip \
        python3-wheel \
        python3-numpy \
        libcurl3-dev  \
        ca-certificates \
        gcc \
        sox \
-        libsox-fmt-mp3 \
+        unzip \
-        htop \
+        wget \
-        nano \
+        zlib1g-dev
        cmake \
        libboost-all-dev \
        zlib1g-dev \
        libbz2-dev \
        liblzma-dev \
        locales \
        pkg-config \
        libpng-dev \
        libsox-dev \
        libmagic-dev \
        libgsm1-dev \
        libltdl-dev \
        openjdk-8-jdk \
        bash-completion \
        g++ \
        unzip
-RUN ln -s -f /usr/bin/python3 /usr/bin/python
+RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
-
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 # Install NCCL 2.2
 RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0
 # Install Bazel
 RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb"
 RUN dpkg -i bazel_*.deb
 # Install CUDA CLI Tools
 RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0
 # Install pip
 RUN wget https://bootstrap.pypa.io/get-pip.py && \
    python3 get-pip.py && \
    rm get-pip.py
 # << END Install base software
 # >> START Configure Tensorflow Build
 # Clone TensorFlow from Mozilla repo
@ -70,14 +58,13 @@ RUN git clone https://github.com/mozilla/tensorflow/
 WORKDIR /tensorflow
 RUN git checkout r1.15
 # GPU Environment Setup
 ENV TF_NEED_CUDA 1
-ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
+ENV TF_CUDA_PATHS "/usr,/usr/local/cuda,/usr/lib/x86_64-linux-gnu/"
 ENV TF_CUDA_VERSION 10.0
-ENV TF_CUDNN_VERSION 7
+ENV TF_CUDNN_VERSION 7.6
 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0
-ENV TF_NCCL_VERSION 2.3
+ENV TF_NCCL_VERSION 2.4
 # Common Environment Setup
 ENV TF_BUILD_CONTAINER_TYPE GPU
@ -105,14 +92,12 @@ ENV TF_NEED_TENSORRT 0
 ENV TF_NEED_GDR 0
 ENV TF_NEED_VERBS 0
 ENV TF_NEED_OPENCL_SYCL 0
 ENV PYTHON_BIN_PATH /usr/bin/python3.6
 ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages
 # << END Configure Tensorflow Build
 # >> START Configure Bazel
 # Running bazel inside a `docker build` command causes trouble, cf:
@ -124,39 +109,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
    >>/etc/bazel.bazelrc
 # Put cuda libraries to where they are expected to be
 RUN mkdir /usr/local/cuda/lib &&  \
    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
    ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \
    ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
    ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h
 # Set library paths
 ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/
 # << END Configure Bazel
 WORKDIR /
-# Copy DeepSpeech repo contents to container's /DeepSpeech
+RUN git clone $DEEPSPEECH_REPO
 COPY . /DeepSpeech/
 # Alternative clone from GitHub 
 # RUN apt-get update && apt-get install -y git-lfs 
 # WORKDIR /
 # RUN git lfs install
 # RUN git clone https://github.com/mozilla/DeepSpeech.git
 WORKDIR /DeepSpeech
-
+RUN git checkout $DEEPSPEECH_SHA
 RUN DS_NODECODER=1 pip3 --no-cache-dir install .
 # Link DeepSpeech native_client libs to tf folder
 RUN ln -s /DeepSpeech/native_client /tensorflow
 # >> START Build and bind
 WORKDIR /tensorflow
@ -170,59 +133,60 @@ RUN ./configure
 # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment
 # Build DeepSpeech
-RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
+RUN bazel build \
-
+	--workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \
-###
+	--config=monolithic \
-### Using TensorFlow upstream should work
+	--config=cuda \
-###
+	-c opt \
-# # Build TF pip package
+	--copt=-O3 \
-# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
+	--copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-#
+	--copt=-mtune=generic \
-# # Build wheel
+	--copt=-march=x86-64 \
-# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
+	--copt=-msse \
-#
+	--copt=-msse2 \
-# # Install tensorflow from our custom wheel
+	--copt=-msse3 \
-# RUN pip3 install /tmp/tensorflow_pkg/*.whl
+	--copt=-msse4.1 \
 	--copt=-msse4.2 \
 	--copt=-mavx \
 	--copt=-fvisibility=hidden \
 	//native_client:libdeepspeech.so \
 	--verbose_failures \
 	--action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
 # Copy built libs to /DeepSpeech/native_client
 RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/
 # Install TensorFlow
 WORKDIR /DeepSpeech/
 RUN pip3 install tensorflow-gpu==1.15.0
 # Build client.cc and install Python client and decoder bindings
 ENV TFDIR /tensorflow
 RUN nproc
 WORKDIR /DeepSpeech/native_client
-RUN make deepspeech
+RUN make NUM_PROCESSES=$(nproc) deepspeech
 WORKDIR /DeepSpeech
-RUN cd native_client/python && make bindings
+RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings
 RUN pip3 install --upgrade native_client/python/dist/*.whl
-RUN cd native_client/ctcdecode && make bindings
+RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
 RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
 # << END Build and bind
 # Allow Python printing utf-8
 ENV PYTHONIOENCODING UTF-8
 # Build KenLM in /DeepSpeech/native_client/kenlm folder
 WORKDIR /DeepSpeech/native_client
-RUN rm -rf kenlm \
+RUN rm -rf kenlm && \
-    && git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \
+	git clone https://github.com/kpu/kenlm && \
-    && mkdir -p build \
+	cd kenlm && \
-    && cd build \
+	git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \
-    && cmake .. \
+	mkdir -p build && \
-    && make -j 4
+	cd build && \
 	cmake .. && \
 	make -j $(nproc)
 # Done
 WORKDIR /DeepSpeech
--- a/Dockerfile.train.tmpl
+++ b/Dockerfile.train.tmpl
@ -0,0 +1,44 @@
 # Please refer to the TRAINING documentation, "Basic Dockerfile for training"
 FROM tensorflow/tensorflow:1.15.2-gpu-py3
 ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO#
 ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA#
 RUN apt-get update && apt-get install -y --no-install-recommends \
        apt-utils \
        bash-completion \
        build-essential \
        curl \
        git \
        git-lfs \
        libbz2-dev \
        locales \
        python3-venv \
        unzip \
        wget
 WORKDIR /
 RUN git lfs install
 RUN git clone $DEEPSPEECH_REPO
 WORKDIR /DeepSpeech
 RUN git checkout $DEEPSPEECH_SHA
 # Setup a virtualenv otherwise we mess with the system and this is BAD.
 RUN python3 -m venv venv/
 ENV VIRTUAL_ENV=/DeepSpeech/venv
 ENV PATH=$VIRTUAL_ENV/bin:$PATH
 # Build CTC decoder first, to avoid clashes on incompatible versions upgrades
 RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings
 RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl
 # Prepare deps
 RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3
 # Install DeepSpeech, no need for the decoder since we did it earlier
 RUN DS_NODECODER=y pip3 install --upgrade --force-reinstall -e .
 RUN ./bin/run-ldc93s1.sh
--- a/8
+++ b/8
@ -0,0 +1,8 @@
 DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git
 DEEPSPEECH_SHA  ?= origin/master
 Dockerfile%: Dockerfile%.tmpl
 	sed \
 		-e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \
 		-e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \
 		< $< > $@
--- a/doc/Scorer.rst
+++ b/doc/Scorer.rst
@ -24,7 +24,7 @@ Then use the ``generate_lm.py`` script to generate ``lm.binary`` and ``vocab-500
 As input you can use a plain text (e.g. ``file.txt``) or gzipped (e.g. ``file.txt.gz``) text file with one sentence in each line.
-If you are using a container created from the Dockerfile, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``.
+If you are using a container created from ``Dockerfile.build``, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``.
 Else you have to build `KenLM <https://github.com/kpu/kenlm>`_ first and then pass the build directory to the script.
 .. code-block:: bash
@ -54,4 +54,4 @@ The LibriSpeech LM training text used by our scorer is around 4GB uncompressed,
 With a text corpus in hand, you can then re-use the ``generate_lm.py`` and ``generate_package.py`` scripts to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit <https://kheafield.com/code/kenlm/>`_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior.
 After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_package.py`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script <lm_optimizer.py>` which can be used to find good default values for alpha and beta. To use it, you must first 
-generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values.
+generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values.
--- a/doc/TRAINING.rst
+++ b/doc/TRAINING.rst
@ -76,6 +76,22 @@ It has been reported for some people failure at training:
 Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation <cuda-deps>`.
 Basic Dockerfile for training
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. You need to generate the Dockerfile from the template using:
 This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image.
 .. code-block:: bash
   make Dockerfile.train
 If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters:
 .. code-block:: bash
   make Dockerfile.train DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch
 Common Voice training data
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/doc/USING.rst
+++ b/doc/USING.rst
@ -186,6 +186,22 @@ Installing bindings from source
 If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow the :github:`native client build and installation instructions <native_client/README.rst>`.
 Dockerfile for building from source
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 We provide ``Dockerfile.build`` to automatically build ``libdeepspeech.so``, the C++ native client, Python bindings, and KenLM.
 You need to generate the Dockerfile from the template using:
 .. code-block:: bash
   make Dockerfile.build
 If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters:
 .. code-block:: bash
   make Dockerfile.build DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch
 Third party bindings
 ^^^^^^^^^^^^^^^^^^^^
--- a/taskcluster/docker-build-base.tyml
+++ b/taskcluster/docker-build-base.tyml
@ -31,11 +31,12 @@ then:
        in: >
          apt-get -qq -y remove --purge ubuntu-advantage-tools &&
          ${aptEc2Mirrors} &&
-          apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common &&
+          apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common make &&
          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - &&
          add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" &&
          apt-get -qq update && apt-get -qq -y install docker-ce && mkdir -p /opt/deepspeech &&
          git clone --quiet ${event.head.repo.url} /opt/deepspeech && cd /opt/deepspeech && git checkout --quiet ${event.head.sha} &&
          make ${dockerfile} DEEPSPEECH_REPO=${event.head.repo.url} DEEPSPEECH_SHA=${event.head.sha} &&
          docker build --file ${dockerfile} .
    artifacts:
--- a/taskcluster/docker-image-build.yml
+++ b/taskcluster/docker-image-build.yml
@ -1,6 +1,6 @@
 build:
  template_file: docker-build-base.tyml
-  dockerfile: "Dockerfile"
+  dockerfile: "Dockerfile.build"
  metadata:
    name: "DeepSpeech Docker build"
-    description: "Testing |docker build| of DeepSpeech"
+    description: "Testing |docker build| of DeepSpeech build image"
--- a/taskcluster/docker-image-train.yml
+++ b/taskcluster/docker-image-train.yml
@ -0,0 +1,6 @@
 build:
  template_file: docker-build-base.tyml
  dockerfile: "Dockerfile.train"
  metadata:
    name: "DeepSpeech Docker train"
    description: "Testing |docker build| of DeepSpeech train image"