diff --git a/Dockerfile.build.tmpl b/Dockerfile.build.tmpl index c8ad3e74..f64d8e0b 100644 --- a/Dockerfile.build.tmpl +++ b/Dockerfile.build.tmpl @@ -114,14 +114,14 @@ RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ WORKDIR / RUN git clone --recursive $MOZILLA_VOICE_STT_REPO -WORKDIR /DeepSpeech +WORKDIR /STT RUN git checkout $MOZILLA_VOICE_STT_SHA RUN git submodule sync tensorflow/ RUN git submodule update --init tensorflow/ # >> START Build and bind -WORKDIR /DeepSpeech/tensorflow +WORKDIR /STT/tensorflow # Fix for not found script https://github.com/tensorflow/tensorflow/issues/471 RUN ./configure @@ -132,7 +132,7 @@ RUN ./configure # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment -# Build DeepSpeech +# Build Mozilla Voice STT RUN bazel build \ --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ --config=monolithic \ @@ -153,18 +153,18 @@ RUN bazel build \ --verbose_failures \ --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} -# Copy built libs to /DeepSpeech/native_client -RUN cp bazel-bin/native_client/libmozilla_voice_stt.so /DeepSpeech/native_client/ +# Copy built libs to /STT/native_client +RUN cp bazel-bin/native_client/libmozilla_voice_stt.so /STT/native_client/ # Build client.cc and install Python client and decoder bindings -ENV TFDIR /DeepSpeech/tensorflow +ENV TFDIR /STT/tensorflow RUN nproc -WORKDIR /DeepSpeech/native_client +WORKDIR /STT/native_client RUN make NUM_PROCESSES=$(nproc) mozilla_voice_stt -WORKDIR /DeepSpeech +WORKDIR /STT RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/python/dist/*.whl @@ -176,8 +176,8 @@ RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl # Allow Python printing utf-8 ENV PYTHONIOENCODING UTF-8 -# Build KenLM in /DeepSpeech/native_client/kenlm folder -WORKDIR /DeepSpeech/native_client +# Build KenLM in /STT/native_client/kenlm folder +WORKDIR /STT/native_client RUN rm -rf kenlm && \ git clone https://github.com/kpu/kenlm && \ cd kenlm && \ @@ -188,4 +188,4 @@ RUN rm -rf kenlm && \ make -j $(nproc) # Done -WORKDIR /DeepSpeech +WORKDIR /STT diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index b289cf7a..790f35f4 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -31,9 +31,11 @@ RUN apt-get install -y --no-install-recommends libopus0 libsndfile1 RUN rm -rf /var/lib/apt/lists/* WORKDIR / +RUN echo git clone $MOZILLA_VOICE_STT_REPO RUN git clone $MOZILLA_VOICE_STT_REPO -WORKDIR /DeepSpeech +WORKDIR /STT +RUN echo git checkout $MOZILLA_VOICE_STT_SHA RUN git checkout $MOZILLA_VOICE_STT_SHA # Build CTC decoder first, to avoid clashes on incompatible versions upgrades @@ -43,7 +45,7 @@ RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl # Prepare deps RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 -# Install DeepSpeech +# Install Mozilla Voice STT # - No need for the decoder since we did it earlier # - There is already correct TensorFlow GPU installed on the base image, # we don't want to break that @@ -54,7 +56,7 @@ RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \ --artifact convert_graphdef_memmapped_format --target . # Build KenLM to generate new scorers -WORKDIR /DeepSpeech/native_client +WORKDIR /STT/native_client RUN rm -rf kenlm && \ git clone https://github.com/kpu/kenlm && \ cd kenlm && \ @@ -63,6 +65,6 @@ RUN rm -rf kenlm && \ cd build && \ cmake .. && \ make -j $(nproc) -WORKDIR /DeepSpeech +WORKDIR /STT RUN ./bin/run-ldc93s1.sh diff --git a/setup.py b/setup.py index b617fee1..72679fdf 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,7 @@ def main(): setup( name='mozilla_voice_stt_training', version=version, - description='Training code for mozilla DeepSpeech', + description='Training code for Mozilla Voice STT', url='https://github.com/mozilla/STT', author='Mozilla', license='MPL-2.0',