diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 591bfb4c..e8c01bd8 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -607,6 +607,96 @@ jobs: bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 + training-basic-tests: + name: "Lin|Basic training tests" + runs-on: ubuntu-20.04 + needs: [build-ctc-decoder-Linux] + strategy: + matrix: + bitrate: [8k, 16k] + pyver: [3.6, 3.7] + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.pyver }} + - uses: actions/download-artifact@v2 + with: + name: "coqui_stt_ctcdecoder-Linux-${{ matrix.pyver }}.whl" + - run: | + wget -O temp.zip https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip + unzip temp.zip + rm temp.zip + chmod +x convert_graphdef_memmapped_format + mv convert_graphdef_memmapped_format /tmp + - run: | + mkdir -p ${CI_ARTIFACTS_DIR} || true + - name: Run basic training tests + run: | + python -m pip install coqui_stt_ctcdecoder-*.whl + ./ci_scripts/train-tests.sh ${{ matrix.bitrate }} + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.pb + path: ${{ github.workspace }}/artifacts/output_graph.pb + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.pbmm + path: ${{ github.workspace }}/artifacts/output_graph.pbmm + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.tflite + path: ${{ github.workspace }}/artifacts/output_graph.tflite + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-checkpoint.tar.xz + path: ${{ github.workspace }}/artifacts/checkpoint.tar.xz + training-extra-tests: + name: "Lin|Extra training tests" + runs-on: ubuntu-20.04 + needs: [build-ctc-decoder-Linux] + strategy: + matrix: + bitrate: [8k, 16k] + pyver: [3.6, 3.7] + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.pyver }} + - uses: actions/download-artifact@v2 + with: + name: "coqui_stt_ctcdecoder-Linux-${{ matrix.pyver }}.whl" + - run: | + wget -O temp.zip https://github.com/coqui-ai/STT/releases/download/v0.9.3/convert_graphdef_memmapped_format.linux.amd64.zip + unzip temp.zip + rm temp.zip + chmod +x convert_graphdef_memmapped_format + mv convert_graphdef_memmapped_format /tmp + - run: | + mkdir -p ${CI_ARTIFACTS_DIR} || true + - run: | + sudo apt-get install -y --no-install-recommends libopus0 + - name: Run extra training tests + run: | + python -m pip install coqui_stt_ctcdecoder-*.whl + ./ci_scripts/train-extra-tests.sh ${{ matrix.bitrate }} + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.pb + path: ${{ github.workspace }}/artifacts/output_graph.pb + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.pbmm + path: ${{ github.workspace }}/artifacts/output_graph.pbmm + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-output_graph.tflite + path: ${{ github.workspace }}/artifacts/output_graph.tflite + - uses: actions/upload-artifact@v2 + with: + name: ${{ github.job }}-checkpoint.tar.xz + path: ${{ github.workspace }}/artifacts/checkpoint.tar.xz # macOS jobs swig_macOS: name: "Mac|Build SWIG" diff --git a/bin/run-ci-ldc93s1_new_sdb_csv.sh b/bin/run-ci-ldc93s1_new_sdb_csv.sh index 36729501..ca8cd388 100755 --- a/bin/run-ci-ldc93s1_new_sdb_csv.sh +++ b/bin/run-ci-ldc93s1_new_sdb_csv.sh @@ -24,10 +24,10 @@ fi; export CUDA_VISIBLE_DEVICES=0 python -u train.py --show_progressbar false --early_stop false \ - --train_files ${ldc93s1_sdb},${ldc93s1_csv} --train_batch_size 1 \ + --train_files ${ldc93s1_sdb} ${ldc93s1_csv} --train_batch_size 1 \ --feature_cache '/tmp/ldc93s1_cache_sdb_csv' \ - --dev_files ${ldc93s1_sdb},${ldc93s1_csv} --dev_batch_size 1 \ - --test_files ${ldc93s1_sdb},${ldc93s1_csv} --test_batch_size 1 \ + --dev_files ${ldc93s1_sdb} ${ldc93s1_csv} --dev_batch_size 1 \ + --test_files ${ldc93s1_sdb} ${ldc93s1_csv} --test_batch_size 1 \ --n_hidden 100 --epochs $epoch_count \ --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_sdb_csv' \ --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_sdb_csv' \ diff --git a/ci_scripts/train-extra-tests.sh b/ci_scripts/train-extra-tests.sh new file mode 100755 index 00000000..1f76a0ed --- /dev/null +++ b/ci_scripts/train-extra-tests.sh @@ -0,0 +1,68 @@ +#!/bin/bash +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh + +bitrate=$1 +decoder_src=$2 +if [ "$decoder_src" != "--pypi" ]; then + # Use decoder package built in this CI group + export DS_NODECODER=1 +fi + +mkdir -p /tmp/train || true +mkdir -p /tmp/train_tflite || true + +set -o pipefail +python -m pip install --upgrade pip setuptools wheel | cat +python -m pip install --upgrade . | cat +set +o pipefail + +# Prepare correct arguments for training +case "${bitrate}" in + 8k) + sample_rate=8000 + sample_name='LDC93S1_pcms16le_1_8000.wav' + ;; + 16k) + sample_rate=16000 + sample_name='LDC93S1_pcms16le_1_16000.wav' + ;; +esac + +# Easier to rename to that we can exercize the LDC93S1 importer code to +# generate the CSV file. +echo "Moving ${sample_name} to LDC93S1.wav" +mv "data/smoke_test/${sample_name}" "data/smoke_test/LDC93S1.wav" + +# Testing single SDB source +time ./bin/run-ci-ldc93s1_new_sdb.sh 220 "${sample_rate}" +# Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features +time ./bin/run-ci-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" +time ./bin/run-ci-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" + +# Test --metrics_files training argument +time ./bin/run-ci-ldc93s1_new_metrics.sh 2 "${sample_rate}" + +# Test training with bytes output mode +time ./bin/run-ci-ldc93s1_new_bytes.sh 200 "${sample_rate}" +time ./bin/run-ci-ldc93s1_new_bytes_tflite.sh "${sample_rate}" + +tar -cf - \ + -C /tmp/ckpt/ . \ + | ${XZ} > ${CI_ARTIFACTS_DIR}/checkpoint.tar.xz + +# Save exported model artifacts from bytes output mode training +cp /tmp/train_bytes/output_graph.pb ${CI_ARTIFACTS_DIR}/output_graph.pb +cp /tmp/train_bytes_tflite/output_graph.tflite ${CI_ARTIFACTS_DIR}/output_graph.tflite + +/tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train_bytes/output_graph.pb --out_graph=/tmp/train_bytes/output_graph.pbmm +cp /tmp/train_bytes/output_graph.pbmm ${CI_ARTIFACTS_DIR} + +# Test resuming from checkpoints created above +# SDB, resuming from checkpoint +time ./bin/run-ci-ldc93s1_checkpoint_sdb.sh + +# Bytes output mode, resuming from checkpoint +time ./bin/run-ci-ldc93s1_checkpoint_bytes.sh diff --git a/ci_scripts/train-tests.sh b/ci_scripts/train-tests.sh new file mode 100755 index 00000000..9af8eb8e --- /dev/null +++ b/ci_scripts/train-tests.sh @@ -0,0 +1,54 @@ +#!/bin/bash +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh + +bitrate=$1 +decoder_src=$2 +if [ "$decoder_src" != "--pypi" ]; then + # Use decoder package built in this CI group + export DS_NODECODER=1 +fi + +mkdir -p /tmp/train || true +mkdir -p /tmp/train_tflite || true + +set -o pipefail +python -m pip install --upgrade pip setuptools wheel | cat +python -m pip install --upgrade . | cat +set +o pipefail + +# Prepare correct arguments for training +case "${bitrate}" in + 8k) + sample_rate=8000 + sample_name='LDC93S1_pcms16le_1_8000.wav' + ;; + 16k) + sample_rate=16000 + sample_name='LDC93S1_pcms16le_1_16000.wav' + ;; +esac + +# Easier to rename to that we can exercize the LDC93S1 importer code to +# generate the CSV file. +echo "Moving ${sample_name} to LDC93S1.wav" +mv "data/smoke_test/${sample_name}" "data/smoke_test/LDC93S1.wav" + +# Run twice to test preprocessed features +time ./bin/run-ci-ldc93s1_new.sh 249 "${sample_rate}" +time ./bin/run-ci-ldc93s1_new.sh 1 "${sample_rate}" +time ./bin/run-ci-ldc93s1_tflite.sh "${sample_rate}" + +tar -cf - \ + -C /tmp/ckpt/ . \ + | ${XZ} > ${CI_ARTIFACTS_DIR}/checkpoint.tar.xz + +cp /tmp/train/output_graph.pb ${CI_ARTIFACTS_DIR} +cp /tmp/train_tflite/output_graph.tflite ${CI_ARTIFACTS_DIR} + +/tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train/output_graph.pb --out_graph=/tmp/train/output_graph.pbmm +cp /tmp/train/output_graph.pbmm ${CI_ARTIFACTS_DIR} + +time ./bin/run-ci-ldc93s1_checkpoint.sh diff --git a/training/coqui_stt_training/util/config.py b/training/coqui_stt_training/util/config.py index 683ba113..d2897497 100755 --- a/training/coqui_stt_training/util/config.py +++ b/training/coqui_stt_training/util/config.py @@ -466,7 +466,7 @@ class _SttConfig(Coqpit): ) # Decoder - bytes_output_mode: float = field( + bytes_output_mode: bool = field( default=False, metadata=dict( help="enable Bytes Output Mode mode. When this is used the model outputs UTF-8 byte values directly rather than using an alphabet mapping. The --alphabet_config_path option will be ignored. See the training documentation for more details."