diff --git a/.github/actions/cpp-tests/action.yml b/.github/actions/cpp-tests/action.yml index ce32c593..5de92837 100644 --- a/.github/actions/cpp-tests/action.yml +++ b/.github/actions/cpp-tests/action.yml @@ -18,5 +18,10 @@ runs: if [ "${{ inputs.build-flavor }}" = "tflite" ]; then build="_tflite" fi - ./ci_scripts/cpp${build}-ds-tests${{ inputs.model-kind }}.sh ${{ inputs.bitrate }} + + model_kind="" + if [ "${{ inputs.model-kind }}" = "prod" ]; then + model_kind="-prod" + fi + ./ci_scripts/cpp${build}-ds-tests${model_kind}.sh ${{ inputs.bitrate }} shell: bash diff --git a/.github/workflows/macOS-amd64.yml b/.github/workflows/macOS-amd64.yml index 58f22bf3..38f5e37a 100644 --- a/.github/workflows/macOS-amd64.yml +++ b/.github/workflows/macOS-amd64.yml @@ -38,6 +38,101 @@ jobs: with: name: ${{ github.job }} path: ${{ github.workspace }}/build-static/ + build-ctc-decoder: + name: "Build CTC decoder Python package for testing" + needs: [ swig_macOS ] + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: 3.6 + - run: | + python --version + pip --version + - uses: actions/download-artifact@v2 + with: + name: "swig_macOS" + path: ${{ github.workspace }}/native_client/ds-swig/ + - run: | + ls -hal ${{ github.workspace }}/native_client/ds-swig/bin + ln -s ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig + chmod +x ${{ github.workspace }}/native_client/ds-swig/bin/ds-swig ${{ github.workspace }}/native_client/ds-swig/bin/swig + - run: | + make -C native_client/ctcdecode/ \ + NUM_PROCESSES=$(nproc) \ + bindings + - uses: actions/upload-artifact@v2 + with: + name: "ds_ctcdecoder-test.whl" + path: ${{ github.workspace }}/native_client/ctcdecode/dist/*.whl + - run: | + make -C native_client/ctcdecode clean-keep-third-party + train-test-model: + name: "Train a test model" + needs: [ "build-ctc-decoder" ] + runs-on: ubuntu-20.04 + strategy: + matrix: + build-flavor: ["tf", "tflite"] + bitrate: ["8k", "16k"] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - uses: actions/setup-python@v2 + with: + python-version: 3.6 + - uses: actions/download-artifact@v2 + with: + name: "ds_ctcdecoder-test.whl" + - run: | + python --version + pip --version + - run: | + pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 + - run: | + pip install ds_ctcdecoder-*-cp36-cp36m-manylinux1_x86_64.whl + DS_NODECODER=y pip install --upgrade . + - run: | + bits="" + if [ "${{ matrix.bitrate }}" = "8k" ]; then + bits=8000 + fi + if [ "${{ matrix.bitrate }}" = "16k" ]; then + bits=16000 + fi + + # Easier to rename to that we can exercize the LDC93S1 importer code to + # generate the CSV file. + echo "Moving ${bits} to LDC93S1.wav" + mv data/smoke_test/LDC93S1_pcms16le_1_${bits}.wav data/smoke_test/LDC93S1.wav + + ./bin/run-tc-ldc93s1_new.sh 249 ${bits} + if [ "${{ matrix.build-flavor }}" = "tflite" ]; then + ./bin/run-tc-ldc93s1_tflite.sh ${bits} + fi + - run: | + curl -vsSL https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/linux.amd64.convert_graphdef_memmapped_format.xz | xz -d > /tmp/convert_graphdef_memmapped_format + chmod +x /tmp/convert_graphdef_memmapped_format + /tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train/output_graph.pb --out_graph=/tmp/train/output_graph.pbmm + if: matrix.build-flavor == 'tf' + - run: | + cp /tmp/train*/output_graph.* /tmp/ + - run: | + tar -cf - \ + -C /tmp/ckpt/ . \ + | xz -9 -T0 > /tmp/checkpoint.tar.xz + - uses: actions/upload-artifact@v2 + with: + name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" + path: /tmp/output_graph.* + - uses: actions/upload-artifact@v2 + with: + name: "test-checkpoint.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" + path: /tmp/checkpoint.tar.xz tensorflow_opt-macOS: name: "Check cache for TensorFlow" runs-on: ubuntu-20.04 @@ -216,16 +311,17 @@ jobs: test-cpp-macOS: name: "Test C++ binary on macOS" runs-on: macos-10.15 - needs: build-lib_macOS + needs: [ build-lib_macOS, train-test-model ] strategy: matrix: build-flavor: ["tf", "tflite"] - models: ["-prod"] + models: ["test", "prod"] bitrate: ["8k", "16k"] env: TASKCLUSTER_TMP_DIR: ${{ github.workspace }}/tmp/ DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pb DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.7.0-alpha.3/output_graph.pbmm + DEEPSPEECH_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 @@ -238,6 +334,14 @@ jobs: - run: | cd ${{ env.TASKCLUSTER_TMP_DIR }} mkdir ds && cd ds && tar xf ../native_client.tar.xz + - uses: actions/download-artifact@v2 + with: + name: "test-model.${{ matrix.build-flavor }}-${{ matrix.bitrate }}.zip" + path: ${{ env.TASKCLUSTER_TMP_DIR }} + if: matrix.models == 'test' + - run: | + ls -hal ${{ env.TASKCLUSTER_TMP_DIR }}/ + if: matrix.models == 'test' - uses: ./.github/actions/cpp-tests with: build-flavor: ${{ matrix.build-flavor }} diff --git a/ci_scripts/all-utils.sh b/ci_scripts/all-utils.sh index 19f7f9ed..0be2bf2f 100755 --- a/ci_scripts/all-utils.sh +++ b/ci_scripts/all-utils.sh @@ -21,14 +21,17 @@ set_ldc_sample_filename() esac } -download_data() +download_model_prod() { local _model_source_file=$(basename "${model_source}") ${WGET} "${model_source}" -O - | gunzip --force > "${TASKCLUSTER_TMP_DIR}/${_model_source_file}" local _model_source_mmap_file=$(basename "${model_source_mmap}") ${WGET} "${model_source_mmap}" -O - | gunzip --force > "${TASKCLUSTER_TMP_DIR}/${_model_source_mmap_file}" +} +download_data() +{ cp ${DS_DSDIR}/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ cp ${DS_DSDIR}/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer cp ${DS_DSDIR}/data/smoke_test/pruned_lm.bytes.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.bytes.scorer @@ -38,9 +41,6 @@ download_data() download_material() { - target_dir=$1 - - # TODO: FIXME download_native_client_files "${target_dir}" download_data ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} ${TASKCLUSTER_TMP_DIR}/LDC93S1*.wav diff --git a/ci_scripts/cpp-ds-tests-prod.sh b/ci_scripts/cpp-ds-tests-prod.sh index 138aa788..1a9e1ea3 100755 --- a/ci_scripts/cpp-ds-tests-prod.sh +++ b/ci_scripts/cpp-ds-tests-prod.sh @@ -15,7 +15,9 @@ model_name=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP} model_name_mmap=$(basename "${model_source_mmap}") -download_material "${TASKCLUSTER_TMP_DIR}/ds" +download_model_prod + +download_material export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH diff --git a/ci_scripts/cpp-ds-tests.sh b/ci_scripts/cpp-ds-tests.sh index 597b2c73..5d010f0d 100755 --- a/ci_scripts/cpp-ds-tests.sh +++ b/ci_scripts/cpp-ds-tests.sh @@ -9,7 +9,7 @@ source $(dirname "$0")/asserts.sh bitrate=$1 set_ldc_sample_filename "${bitrate}" -download_material "${TASKCLUSTER_TMP_DIR}/ds" +download_data export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH diff --git a/ci_scripts/cpp_tflite-ds-tests-prod.sh b/ci_scripts/cpp_tflite-ds-tests-prod.sh index aff737a1..9ecb1528 100755 --- a/ci_scripts/cpp_tflite-ds-tests-prod.sh +++ b/ci_scripts/cpp_tflite-ds-tests-prod.sh @@ -15,6 +15,8 @@ model_name_mmap=$(basename "${model_source}") model_source_mmap=${DEEPSPEECH_PROD_MODEL_MMAP//.pbmm/.tflite} export DATA_TMP_DIR=${TASKCLUSTER_TMP_DIR} +download_model_prod + download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index 4cadaefb..2a565b69 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -3,6 +3,7 @@ include ../definitions.mk NUM_PROCESSES ?= 1 +DS_SWIG_DEP ?= ds-swig # Allow to disable the ds-swig dependency, useful for GitHub Actions move # ARM64 can't find the proper libm.so without this ifeq ($(TARGET),rpi3-armv8) @@ -43,14 +44,14 @@ workspace_status.cc: # Enforce PATH here because swig calls from build_ext looses track of some # variables over several runs -bindings: clean-keep-third-party workspace_status.cc ds-swig +bindings: clean-keep-third-party workspace_status.cc $(DS_SWIG_DEP) pip3 install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0 DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python3 ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python3 ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build -bindings-debug: clean-keep-third-party workspace_status.cc ds-swig +bindings-debug: clean-keep-third-party workspace_status.cc $(DS_SWIG_DEP) pip3 install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==45.0.0 DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python3 ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS)