diff --git a/.github/actions/get_cache_key/action.yml b/.github/actions/get_cache_key/action.yml index d0b53878..2c8266c7 100644 --- a/.github/actions/get_cache_key/action.yml +++ b/.github/actions/get_cache_key/action.yml @@ -16,6 +16,7 @@ runs: steps: - id: compute_cache_key run: | + set -xe JOB=${{ github.job }} SUBMODULE=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f1) FLAVOR=$(echo $JOB | cut -d'-' -f1 | cut -d'_' -f2) diff --git a/.github/actions/host-build/action.yml b/.github/actions/libstt-build/action.yml similarity index 80% rename from .github/actions/host-build/action.yml rename to .github/actions/libstt-build/action.yml index 34522ca2..df246a25 100644 --- a/.github/actions/host-build/action.yml +++ b/.github/actions/libstt-build/action.yml @@ -1,5 +1,5 @@ -name: "Run build lib" -description: "Run build of lib" +name: "Build libstt.so" +description: "Build libstt.so" inputs: arch: description: "Target arch for loading script (host/armv7/aarch64)" diff --git a/.github/actions/setup-tensorflow/action.yml b/.github/actions/setup-tensorflow/action.yml index ec86a45f..f0b234dd 100644 --- a/.github/actions/setup-tensorflow/action.yml +++ b/.github/actions/setup-tensorflow/action.yml @@ -1,7 +1,12 @@ name: "Setup TensorFlow" description: "Setup TensorFlow Build" +inputs: + flavor: + description: "Target flavor for setup script (empty/android-armv7/android-arm64)" + required: false + default: "" runs: using: "composite" steps: - - run: ./ci_scripts/tf-setup.sh + - run: ./ci_scripts/tf-setup.sh ${{ inputs.flavor }} shell: bash diff --git a/.github/actions/upload-release-asset/action.yml b/.github/actions/upload-release-asset/action.yml index 93327473..9fc584f3 100644 --- a/.github/actions/upload-release-asset/action.yml +++ b/.github/actions/upload-release-asset/action.yml @@ -19,10 +19,6 @@ inputs: description: "Tag of release to check artifacts under" required: false default: "v0.10.0-alpha.7" - should-create-release: - description: "Whether this action should automatically create a release for the given tag if one doesn't already exist" - required: false - default: false runs: using: "composite" steps: @@ -46,15 +42,11 @@ runs: fi done - # If no asset name is specified, use filename - [ "$asset_name" ] || asset_name=$(basename "$filename") - AUTH="Authorization: token ${{ inputs.token }}" owner=$(echo "${{inputs.repo}}" | cut -f1 -d/) repo=$(echo "${{inputs.repo}}" | cut -f2 -d/) tag="${{ inputs.release-tag }}" - should_create="${{ inputs.should-create-release }}" GH_REPO="https://api.github.com/repos/${owner}/${repo}" @@ -78,30 +70,20 @@ runs: response=$(curl -sH "$AUTH" $GH_TAGS) eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=') [ "$id" ] || { - # If release does not exist, create it - if [[ "$should_create" == "true" ]]; then - echo "Tag does not have corresponding release, creating release for tag: $tag..." - response=$(curl -X POST -sH "$AUTH" -H "Content-Type: application/json" "${GH_REPO}/releases" -d '{"tag_name":"$tag","name":"Coqui STT $tag","prerelease":true}') - eval $(echo "$response" | grep -m 1 "id.:" | grep -w id | tr : = | tr -cd '[[:alnum:]]=') - [ "$id" ] || { - echo "Error: Could not create release for tag: $tag" - echo "$response" | awk 'length($0)<100' >&2 - exit 1 - } - else - echo "Error: Could not find release for tag: $tag" - echo "$response" | awk 'length($0)<100' >&2 - exit 1 - fi + echo "Error: Could not find release for tag: $tag" + echo "$response" | awk 'length($0)<100' >&2 + exit 1 } # Upload assets - for $file in $filenames; do + for file in $filenames; do if [ -z $asset_name ]; then - asset_name=$(basename $file) + asset=$(basename $file) + else + asset=$asset_name fi - echo "Uploading asset with name: $asset_name from file: $file" - GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset_name}" - curl -T $filename -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET + echo "Uploading asset with name: $asset from file: $file" + GH_ASSET="https://uploads.github.com/repos/${owner}/${repo}/releases/${id}/assets?name=${asset}" + curl -T $file -X POST -H "${AUTH}" -H "Content-Type: application/octet-stream" $GH_ASSET done shell: bash diff --git a/.github/actions/win-install-sox/action.yml b/.github/actions/win-install-sox/action.yml index 81ebdbd7..c232192b 100644 --- a/.github/actions/win-install-sox/action.yml +++ b/.github/actions/win-install-sox/action.yml @@ -5,7 +5,7 @@ runs: steps: - run: | set -ex - wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip + curl -sSLO https://github.com/coqui-ai/STT/releases/download/v0.10.0-alpha.7/sox-14.4.2-win32.zip "C:/Program Files/7-Zip/7z.exe" x -o`pwd`/bin/ -tzip -aoa sox-14.4.2-win32.zip rm sox-*zip echo "`pwd`/bin/sox-14.4.2/" >> $GITHUB_PATH diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 09b2af3a..89946849 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -10,6 +10,7 @@ env: # Shared variables CI_TASK_DIR: ${{ github.workspace }} CI_ARTIFACTS_DIR: ${{ github.workspace }}/artifacts + EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-14-g4bdd3955115" # macOS specific MACOSX_DEPLOYMENT_TARGET: "10.10" @@ -22,6 +23,59 @@ defaults: run: shell: bash jobs: + create-release: + name: "Create release for tag" + runs-on: ubuntu-20.04 + outputs: + release-tag: ${{ steps.check-version.outputs.release-tag }} + is-prerelease: ${{ steps.check-version.outputs.is-prerelease }} + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Check VERSION file matches pushed Git tag and check if prerelease + id: check-version + run: | + set -xe + if [[ "${{ startsWith(github.ref, 'refs/tags/') }}" != "true" ]]; then + echo "Should never happen (this job only runs on tag pushes)" + exit 1 + fi + + VERSION="v$(cat VERSION)" + if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then + echo "Pushed tag does not match VERSION file. Aborting release." + exit 1 + fi + + # Tag for this release (version with leading v) + tag=$(echo "${{ github.ref }}" | sed -e 's|^refs/tags/||') + echo ::set-output name=release-tag::${tag} + + # Version without leading v + version=$(cat VERSION) + echo ::set-output name=version::${version} + + # Is this a prerelease or not? + pip install semver + cat <> $GITHUB_PATH @@ -1945,7 +2008,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH @@ -1999,7 +2061,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH @@ -2062,7 +2123,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH @@ -2175,7 +2235,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: @@ -2229,7 +2288,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: @@ -2285,7 +2343,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: @@ -2337,7 +2394,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - uses: actions/checkout@v2 with: @@ -2391,7 +2447,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH @@ -2455,7 +2510,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" steps: - name: Switch git-bash shell to MSYS2 shell by adding MSYS2 path to PATH front run: echo "$MSYS2_SHELL_PATH" >> $GITHUB_PATH @@ -2624,7 +2678,7 @@ jobs: uses: ./.github/actions/multistrap with: arch: armv7 - - uses: ./.github/actions/host-build + - uses: ./.github/actions/libstt-build with: arch: armv7 - uses: ./.github/actions/package @@ -2661,7 +2715,7 @@ jobs: uses: ./.github/actions/multistrap with: arch: aarch64 - - uses: ./.github/actions/host-build + - uses: ./.github/actions/libstt-build with: arch: aarch64 - uses: ./.github/actions/package @@ -3004,7 +3058,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} steps: - name: "Install QEMU" @@ -3066,7 +3119,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple https://lissyx.github.io/deepspeech-python-wheels/" steps: @@ -3130,7 +3182,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} steps: - name: "Install QEMU" @@ -3194,7 +3245,6 @@ jobs: STT_PROD_MODEL: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pb STT_PROD_MODEL_MMAP: https://github.com/reuben/STT/releases/download/v0.7.0-alpha.3/output_graph.pbmm STT_TEST_MODEL: ${{ github.workspace }}/tmp/output_graph.pb - EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v2.3.0-6-g23ad988" SYSTEM_RASPBIAN: ${{ github.workspace }}/chroot-${{ matrix.arch }} DISPLAY: ":99.0" steps: @@ -3258,3 +3308,254 @@ jobs: run: | cat ${{ env.CI_TMP_DIR }}/xvfb.pid sudo kill -9 $(cat ${{ env.CI_TMP_DIR }}/xvfb.pid) + # Android jobs + tensorflow_opt-AndroidArmv7: + name: "AndroidArmv7|Check TensorFlow cache" + runs-on: ubuntu-20.04 + outputs: + status: ${{ steps.check_artifact_exists.outputs.status }} + cache_key: ${{ steps.get_cache_key.outputs.key }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - id: get_cache_key + uses: ./.github/actions/get_cache_key + with: + extras: "1" + - id: check_artifact_exists + uses: ./.github/actions/check_artifact_exists + with: + name: ${{ steps.get_cache_key.outputs.key }}.tar.xz + build-tensorflow-AndroidArmv7: + name: "AndroidArmv7|Build TensorFlow (opt)" + needs: tensorflow_opt-AndroidArmv7 + runs-on: ubuntu-20.04 + steps: + - run: true + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'found' + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: 'recursive' + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + - name: Use Java 8 instead of Java 11 + run: echo "JAVA_HOME=$JAVA_HOME_8_X64" >> $GITHUB_ENV + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + - uses: ./.github/actions/setup-tensorflow + with: + flavor: "--android-armv7" + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + - uses: ./.github/actions/build-tensorflow + with: + flavor: "--android-armv7" + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + - uses: ./.github/actions/package-tensorflow + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + - uses: ./.github/actions/upload-release-asset + with: + name: ${{ needs.tensorflow_opt-AndroidArmv7.outputs.cache_key }}.tar.xz + path: ${{ github.workspace }}/artifacts/home.tar.xz + if: needs.tensorflow_opt-AndroidArmv7.outputs.status == 'missing' + build-lib_AndroidArmv7: + name: "AndroidArmv7|Build libstt+client" + runs-on: ubuntu-20.04 + needs: [ build-tensorflow-AndroidArmv7, tensorflow_opt-AndroidArmv7 ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: ./.github/actions/check_artifact_exists + with: + name: ${{ needs.tensorflow_opt-AndroidArmv7.outputs.cache_key }}.tar.xz + path: ${{ github.workspace }}/ + download: true + - run: | + tar --skip-old-files -xf ${{ needs.tensorflow_opt-AndroidArmv7.outputs.cache_key }}.tar.xz + rm ${{ needs.tensorflow_opt-AndroidArmv7.outputs.cache_key }}.tar.xz + - uses: ./.github/actions/libstt-build + with: + arch: android-armv7 + - run: ./ci_scripts/android-package.sh armeabi-v7a + - uses: actions/upload-artifact@v2 + with: + name: "native_client.tflite.android.armv7.tar.xz" + path: ${{ github.workspace }}/artifacts/native_client.tar.xz + tensorflow_opt-AndroidArm64: + name: "AndroidArm64|Check TensorFlow cache" + runs-on: ubuntu-20.04 + outputs: + status: ${{ steps.check_artifact_exists.outputs.status }} + cache_key: ${{ steps.get_cache_key.outputs.key }} + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 1 + - id: get_cache_key + uses: ./.github/actions/get_cache_key + with: + extras: "1" + - id: check_artifact_exists + uses: ./.github/actions/check_artifact_exists + with: + name: ${{ steps.get_cache_key.outputs.key }}.tar.xz + build-tensorflow-AndroidArm64: + name: "AndroidArm64|Build TensorFlow (opt)" + needs: tensorflow_opt-AndroidArm64 + runs-on: ubuntu-20.04 + steps: + - run: true + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'found' + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + submodules: 'recursive' + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + - name: Use Java 8 instead of Java 11 + run: echo "JAVA_HOME=$JAVA_HOME_8_X64" >> $GITHUB_ENV + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + - uses: ./.github/actions/setup-tensorflow + with: + flavor: "--android-arm64" + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + - uses: ./.github/actions/build-tensorflow + with: + flavor: "--android-arm64" + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + - uses: ./.github/actions/package-tensorflow + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + - uses: ./.github/actions/upload-release-asset + with: + name: ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + path: ${{ github.workspace }}/artifacts/home.tar.xz + if: needs.tensorflow_opt-AndroidArm64.outputs.status == 'missing' + build-lib_AndroidArm64: + name: "AndroidArm64|Build libstt+client" + runs-on: ubuntu-20.04 + needs: [ build-tensorflow-AndroidArm64, tensorflow_opt-AndroidArm64 ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: ./.github/actions/check_artifact_exists + with: + name: ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + path: ${{ github.workspace }}/ + download: true + - run: | + tar --skip-old-files -xf ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + rm ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + - uses: ./.github/actions/libstt-build + with: + arch: android-arm64 + - run: ./ci_scripts/android-package.sh arm64-v8a + - uses: actions/upload-artifact@v2 + with: + name: "native_client.tflite.android.arm64.tar.xz" + path: ${{ github.workspace }}/artifacts/native_client.tar.xz + build-lib_Androidx86_64: + name: "Androidx86_64|Build libstt+client" + runs-on: ubuntu-20.04 + needs: [ build-tensorflow-AndroidArm64, tensorflow_opt-AndroidArm64 ] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: ./.github/actions/check_artifact_exists + with: + name: ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + path: ${{ github.workspace }}/ + download: true + - run: | + tar --skip-old-files -xf ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + rm ${{ needs.tensorflow_opt-AndroidArm64.outputs.cache_key }}.tar.xz + - uses: ./.github/actions/libstt-build + with: + arch: android-x86_64 + - run: ./ci_scripts/android-package.sh x86_64 + - uses: actions/upload-artifact@v2 + with: + name: "native_client.tflite.android.x86_64.tar.xz" + path: ${{ github.workspace }}/artifacts/native_client.tar.xz + build-android-apk-aar: + name: "Android|Build AAR+APK" + runs-on: ubuntu-20.04 + needs: [build-lib_AndroidArmv7, build-lib_AndroidArm64, build-lib_Androidx86_64] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.android.armv7.tar.xz + path: /tmp/nc + - run: | + mkdir -p native_client/java/libstt/libs/armeabi-v7a + cd /tmp/nc + tar xvf native_client.tar.xz + mv libstt.so ${CI_TASK_DIR}/native_client/java/libstt/libs/armeabi-v7a/libstt.so + rm -f * + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.android.arm64.tar.xz + path: /tmp/nc + - run: | + mkdir -p native_client/java/libstt/libs/arm64-v8a + cd /tmp/nc + tar xvf native_client.tar.xz + mv libstt.so ${CI_TASK_DIR}/native_client/java/libstt/libs/arm64-v8a/libstt.so + rm -f * + - uses: actions/download-artifact@v2 + with: + name: native_client.tflite.android.x86_64.tar.xz + path: /tmp/nc + - run: | + mkdir -p native_client/java/libstt/libs/x86_64 + cd /tmp/nc + tar xvf native_client.tar.xz + mv libstt.so ${CI_TASK_DIR}/native_client/java/libstt/libs/x86_64/libstt.so + rm -f * + - name: Use Java 8 instead of Java 11 + run: echo "JAVA_HOME=$JAVA_HOME_8_X64" >> $GITHUB_ENV + # This particular version of CMake confuses Gradle by not being semver. + # We're fine with 3.10.2 which is also installed. Keep an eye on the + # virtual environments though: + # https://github.com/actions/virtual-environments/blob/main/images/macos/macos-10.15-Readme.md#android + - name: Remove CMake 3.18.1-g262b901 + run: | + ${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager --uninstall 'cmake;3.18.1' + - run: | + make GRADLE="./gradlew " -C native_client/java + - run: | + make GRADLE="./gradlew " -C native_client/java maven-bundle + - uses: actions/upload-artifact@v2 + with: + name: "app.apk" + path: ${{ github.workspace }}/native_client/java/app/build/outputs/apk/release/app*.apk + - uses: actions/upload-artifact@v2 + with: + name: "libstt.aar" + path: ${{ github.workspace }}/native_client/java/libstt/build/outputs/aar/libstt*.aar + - uses: actions/upload-artifact@v2 + with: + name: "libstt.maven.zip" + path: ${{ github.workspace }}/native_client/java/libstt/build/libstt-*.maven.zip + publish-android-aar: + name: "Android|Publish AAR" + runs-on: ubuntu-20.04 + needs: [create-release, build-android-apk-aar] + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v2 + with: + name: "libstt.aar" + path: ${{ github.workspace }}/ + - run: ls -lh + - uses: ./.github/actions/upload-release-asset + with: + name: '' # use filename + path: "*.aar" + release-tag: ${{ needs.create-release.outputs.release-tag }} diff --git a/Dockerfile.train b/Dockerfile.train index 3fbf3dcb..dba45e6e 100644 --- a/Dockerfile.train +++ b/Dockerfile.train @@ -2,7 +2,7 @@ # You can train "acoustic models" with audio + Tensorflow, and # you can create "scorers" with text + KenLM. -FROM ubuntu:20.04 AS kenlm-build +FROM nvcr.io/nvidia/tensorflow:20.06-tf1-py3 AS kenlm-build ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md new file mode 100644 index 00000000..2dc02cdf --- /dev/null +++ b/RELEASE_NOTES.md @@ -0,0 +1 @@ +Test automatic release notes. diff --git a/bin/run-ldc93s1.sh b/bin/run-ldc93s1.sh index 8fe87e87..2bd80c59 100755 --- a/bin/run-ldc93s1.sh +++ b/bin/run-ldc93s1.sh @@ -20,7 +20,8 @@ fi # and when trying to run on multiple devices (like GPUs), this will break export CUDA_VISIBLE_DEVICES=0 -python -u train.py --alphabet_config_path "data/alphabet.txt" \ +python -m coqui_stt_training.train \ + --alphabet_config_path "data/alphabet.txt" \ --show_progressbar false \ --train_files data/ldc93s1/ldc93s1.csv \ --test_files data/ldc93s1/ldc93s1.csv \ diff --git a/ci_scripts/android-arm64-build.sh b/ci_scripts/android-arm64-build.sh new file mode 100755 index 00000000..f7f7d62c --- /dev/null +++ b/ci_scripts/android-arm64-build.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh +source $(dirname "$0")/build-utils.sh + +source $(dirname "$0")/tf-vars.sh + +BAZEL_TARGETS=" +//native_client:libstt.so +//native_client:generate_scorer_package +" + +BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" +SYSTEM_TARGET= +SYSTEM_RASPBIAN= + +do_bazel_build + +do_stt_ndk_build "arm64-v8a" diff --git a/ci_scripts/android-armv7-build.sh b/ci_scripts/android-armv7-build.sh new file mode 100755 index 00000000..e67e8173 --- /dev/null +++ b/ci_scripts/android-armv7-build.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh +source $(dirname "$0")/build-utils.sh + +source $(dirname "$0")/tf-vars.sh + +BAZEL_TARGETS=" +//native_client:libstt.so +//native_client:generate_scorer_package +" + +BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS}" +BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" +SYSTEM_TARGET= +SYSTEM_RASPBIAN= + +do_bazel_build + +do_stt_ndk_build "armeabi-v7a" diff --git a/ci_scripts/android-package.sh b/ci_scripts/android-package.sh new file mode 100755 index 00000000..1c1ffdf6 --- /dev/null +++ b/ci_scripts/android-package.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/package-utils.sh + +mkdir -p ${CI_ARTIFACTS_DIR} || true + +cp ${DS_DSDIR}/tensorflow/bazel*.log ${CI_ARTIFACTS_DIR}/ + +arm_flavor=$1 + +package_native_client_ndk "native_client.tar.xz" "${arm_flavor}" diff --git a/ci_scripts/android-x86_64-build.sh b/ci_scripts/android-x86_64-build.sh new file mode 100755 index 00000000..9df15ca5 --- /dev/null +++ b/ci_scripts/android-x86_64-build.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh +source $(dirname "$0")/build-utils.sh + +source $(dirname "$0")/tf-vars.sh + +BAZEL_TARGETS=" +//native_client:libstt.so +//native_client:generate_scorer_package +" + +BAZEL_BUILD_FLAGS="${BAZEL_ANDROID_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" +SYSTEM_TARGET= +SYSTEM_RASPBIAN= + +do_bazel_build + +do_stt_ndk_build "x86_64" diff --git a/ci_scripts/build-utils.sh b/ci_scripts/build-utils.sh index 77106b3a..6190f3e1 100755 --- a/ci_scripts/build-utils.sh +++ b/ci_scripts/build-utils.sh @@ -37,3 +37,18 @@ do_stt_binary_build() EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \ stt${PLATFORM_EXE_SUFFIX} } + +do_stt_ndk_build() +{ + arch_abi=$1 + + cd ${DS_DSDIR}/native_client/ + + ${ANDROID_NDK_HOME}/ndk-build \ + APP_PLATFORM=android-21 \ + APP_BUILD_SCRIPT=$(pwd)/Android.mk \ + NDK_PROJECT_PATH=$(pwd) \ + APP_STL=c++_shared \ + TFDIR=${DS_TFDIR} \ + TARGET_ARCH_ABI=${arch_abi} +} diff --git a/ci_scripts/notebook-tests.sh b/ci_scripts/notebook-tests.sh new file mode 100755 index 00000000..3872b14c --- /dev/null +++ b/ci_scripts/notebook-tests.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh + +set -o pipefail +pip install --upgrade pip setuptools wheel | cat +pip install --upgrade . | cat +set +o pipefail + +for python_notebook in ./notebooks/*.ipynb; do + time jupyter nbconvert --to notebook --execute $python_notebook +done diff --git a/ci_scripts/package-utils.sh b/ci_scripts/package-utils.sh index 088b28d1..66747312 100755 --- a/ci_scripts/package-utils.sh +++ b/ci_scripts/package-utils.sh @@ -30,6 +30,10 @@ package_native_client() win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so.if.lib" fi; + if [ -f "${tensorflow_dir}/bazel-bin/native_client/libtflitedelegates.so.if.lib" ]; then + win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/native_client/ libtflitedelegates.so.if.lib" + fi; + if [ -f "${tensorflow_dir}/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib" ]; then win_lib="$win_lib -C ${tensorflow_dir}/bazel-bin/tensorflow/lite/ libtensorflowlite.so.if.lib" fi; @@ -43,6 +47,7 @@ package_native_client() --transform='flags=r;s|README.coqui|KenLM_License_Info.txt|' \ -C ${tensorflow_dir}/bazel-bin/native_client/ libstt.so \ -C ${tensorflow_dir}/bazel-bin/native_client/ libkenlm.so \ + -C ${tensorflow_dir}/bazel-bin/native_client/ libtflitedelegates.so \ -C ${tensorflow_dir}/bazel-bin/tensorflow/lite/ libtensorflowlite.so \ ${win_lib} \ ${libsox_lib} \ @@ -80,6 +85,9 @@ package_native_client_ndk() ${TAR} --verbose -cf - \ -C ${stt_dir}/native_client/libs/${arch_abi}/ stt \ -C ${stt_dir}/native_client/libs/${arch_abi}/ libstt.so \ + -C ${stt_dir}/native_client/libs/${arch_abi}/ libkenlm.so \ + -C ${stt_dir}/native_client/libs/${arch_abi}/ libtflitedelegates.so \ + -C ${stt_dir}/native_client/libs/${arch_abi}/ libtensorflowlite.so \ -C ${tensorflow_dir}/bazel-bin/native_client/ generate_scorer_package \ -C ${stt_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \ -C ${stt_dir}/native_client/ coqui-stt.h \ @@ -114,6 +122,7 @@ package_libstt_as_zip() ${ZIP} -r9 --junk-paths "${artifacts_dir}/${artifact_name}" \ ${tensorflow_dir}/bazel-bin/native_client/libstt.so \ ${tensorflow_dir}/bazel-bin/native_client/libkenlm.so \ + ${tensorflow_dir}/bazel-bin/native_client/libtflitedelegates.so \ ${libsox_lib} \ ${tensorflow_dir}/bazel-bin/tensorflow/lite/libtensorflowlite.so } diff --git a/ci_scripts/tf-vars.sh b/ci_scripts/tf-vars.sh index 191941bc..14022e91 100755 --- a/ci_scripts/tf-vars.sh +++ b/ci_scripts/tf-vars.sh @@ -151,12 +151,13 @@ export BAZEL_OUTPUT_USER_ROOT NVCC_COMPUTE="3.5" -BAZEL_ARM_FLAGS="--config=rpi3 --config=rpi3_opt --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_ARM_FLAGS="--config=rpi3_opt" +BAZEL_ARM64_FLAGS="--config=rpi3-armv8_opt" +BAZEL_ANDROID_ARM_FLAGS="--config=android_arm" +BAZEL_ANDROID_ARM64_FLAGS="--config=android_arm64" +BAZEL_ANDROID_X86_64_FLAGS="--config=android_x86_64" +BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64" +BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64" if [ "${OS}" != "${CI_MSYS_VERSION}" ]; then BAZEL_EXTRA_FLAGS="--config=noaws --config=nogcp --config=nohdfs --config=nonccl" diff --git a/doc/BUILDING.rst b/doc/BUILDING.rst index ff4ed381..87281a6d 100644 --- a/doc/BUILDING.rst +++ b/doc/BUILDING.rst @@ -76,7 +76,7 @@ You can now use Bazel to build the main 🐸STT library, ``libstt.so``. Add ``-- .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libstt.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt="-D_GLIBCXX_USE_CXX11_ABI=0" //native_client:libstt.so The generated binaries will be saved to ``bazel-bin/native_client/``. @@ -90,7 +90,7 @@ Using the example from above you can build the library and that binary at the sa .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libstt.so //native_client:generate_scorer_package + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --copt="-D_GLIBCXX_USE_CXX11_ABI=0" //native_client:libstt.so //native_client:generate_scorer_package The generated binaries will be saved to ``bazel-bin/native_client/``. @@ -126,7 +126,7 @@ Included are a set of generated Python bindings. After following the above build make bindings pip install dist/stt-* -The API mirrors the C++ API and is demonstrated in `client.py `_. Refer to `coqui-stt.h `_ for documentation. +`Reference documentation `_ is available for the Python bindings, as well as examples in the `STT-examples repository `_ and the `source code for the CLI tool installed alongside the Python bindings `_. Install NodeJS / ElectronJS bindings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -186,22 +186,22 @@ Cross-building RPi3 ARMv7 and LePotato ARM64 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We do support cross-compilation. Please refer to our ``coqui-ai/tensorflow`` fork, where we define the following ``--config`` flags: +We support cross-compilation from Linux hosts. The following ``--config`` flags can be specified when building with bazel: -* ``--config=rpi3`` and ``--config=rpi3_opt`` for Raspbian / ARMv7 -* ``--config=rpi3-armv8`` and ``--config=rpi3-armv8_opt`` for ARMBian / ARM64 +* ``--config=rpi3_opt`` for Raspbian / ARMv7 +* ``--config=rpi3-armv8_opt`` for ARMBian / ARM64 So your command line for ``RPi3`` and ``ARMv7`` should look like: .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libstt.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=rpi3_opt //native_client:libstt.so And your command line for ``LePotato`` and ``ARM64`` should look like: .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libstt.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" -c opt --config=rpi3-armv8_opt //native_client:libstt.so While we test only on RPi3 Raspbian Buster and LePotato ARMBian Buster, anything compatible with ``armv7-a cortex-a53`` or ``armv8-a cortex-a53`` should be fine. @@ -213,63 +213,40 @@ The path of the system tree can be overridden from the default values defined in cd ../STT/native_client make TARGET= stt -Android devices support ------------------------ - -We have support for Android relying on TensorFlow Lite, with Java and JNI bindinds. For more details on how to experiment with those, please refer to the section below. - -Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required). - -Using the library from Android project -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Due to the discontinuation of Bintray JCenter we do not have pre-built Android packages published for now. We are working to move to Maven Central and will update this section when it's available. - -.. We provide uptodate and tested ``libstt`` usable as an ``AAR`` package, - for Android versions starting with 7.0 to 11.0. The package is published on - `JCenter `_, - and the ``JCenter`` repository should be available by default in any Android - project. Please make sure your project is setup to pull from this repository. - You can then include the library by just adding this line to your - ``gradle.build``, adjusting ``VERSION`` to the version you need: - - .. code-block:: - - implementation 'stt.coqui.ai:libstt:VERSION@aar' - Building ``libstt.so`` for Android -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +---------------------------------- -You can build the ``libstt.so`` using (ARMv7): +Prerequisites +^^^^^^^^^^^^^ + +Beyond the general prerequisites listed above, you'll also need the Android-specific dependencies for TensorFlow, namely you'll need to install the `Android SDK `_ and the `Android NDK version r18b `_. After that's done, export the environment variables ``ANDROID_SDK_HOME`` and ``ANDROID_NDK_HOME`` to the corresponding folders where the SDK and NDK were installed. Finally, configure the TensorFlow build and make sure you answer yes when the script asks if you want to set-up an Android build. + +Then, you can build the ``libstt.so`` using (ARMv7): .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 //native_client:libstt.so Or (ARM64): .. code-block:: - bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 //native_client:libstt.so Building ``libstt.aar`` ^^^^^^^^^^^^^^^^^^^^^^^ -In the unlikely event you have to rebuild the JNI bindings, source code is -available under the ``libstt`` subdirectory. Building depends on shared -object: please ensure to place ``libstt.so`` into the -``libstt/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` matching subdirectories. +In order to build the JNI bindings, source code is available under the ``native_client/java/libstt`` directory. Building the AAR package requires having previously built ``libstt.so`` for all desired architectures and placed the corresponding binaries into the ``native_client/java/libstt/libs/{arm64-v8a,armeabi-v7a,x86_64}/`` subdirectories. If you don't want to build the AAR package for all of ARM64, ARMv7 and x86_64, you can edit the ``native_client/java/libstt/gradle.properties`` file to remove unneeded architectures. -Building the bindings is managed by ``gradle`` and should be limited to issuing -``./gradlew libstt:build``, producing an ``AAR`` package in -``./libstt/build/outputs/aar/``. +Building the bindings is managed by ``gradle`` and can be done by calling ``./gradlew libstt:build`` inside the ``native_client/java`` folder, producing an ``AAR`` package in +``native_client/java/libstt/build/outputs/aar/``. Please note that you might have to copy the file to a local Maven repository and adapt file naming (when missing, the error message should states what filename it expects and where). -Building C++ ``stt`` binary -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Building C++ ``stt`` binary for Android +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Building the ``stt`` binary will happen through ``ndk-build`` (ARMv7): diff --git a/doc/C-Examples.rst b/doc/C-Examples.rst index ae16b7e7..fd24b057 100644 --- a/doc/C-Examples.rst +++ b/doc/C-Examples.rst @@ -13,8 +13,8 @@ Creating a model instance and loading model :start-after: sphinx-doc: c_ref_model_start :end-before: sphinx-doc: c_ref_model_stop -Deploying trained model ------------------------ +Transcribing audio with the loaded model +---------------------------------------- .. literalinclude:: ../native_client/client.cc :language: c diff --git a/doc/DEPLOYMENT.rst b/doc/DEPLOYMENT.rst index acf36f19..dab8be1c 100644 --- a/doc/DEPLOYMENT.rst +++ b/doc/DEPLOYMENT.rst @@ -14,6 +14,7 @@ You can deploy 🐸STT models either via a command-line client or a language bin * :ref:`The Python package + language binding ` * :ref:`The Node.JS package + language binding ` +* :ref:`The Android libstt AAR package ` * :ref:`The command-line client ` * :ref:`The native C API ` @@ -133,6 +134,31 @@ See the `release notes `_ to find whic See the :ref:`TypeScript client ` for an example of how to use the bindings programatically. +.. _android-usage: + +Using the Android AAR libstt package +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A pre-built ``libstt`` Android AAR package can be downloaded from GitHub Releases, for Android versions 7.0+. In order to use it in your Android application, first modify your app's ``build.gradle`` file to add a local dir as a repository. In the ``repository`` section, add the following definition: + +.. code-block:: groovy + + repositories { + flatDir { + dirs 'libs' + } + } + +Then, create a libs directory inside your app's folder, and place the libstt AAR file there. Finally, add the following dependency declaration in your app's ``build.gradle`` file: + +.. code-block:: groovy + + dependencies { + implementation fileTree(dir: 'libs', include: ['*.aar']) + } + +This will link all .aar files in the ``libs`` directory you just created, including libstt. + .. _cli-usage: Using the command-line client diff --git a/doc/DotNet-Examples.rst b/doc/DotNet-Examples.rst index 7d4e14c9..ef422900 100644 --- a/doc/DotNet-Examples.rst +++ b/doc/DotNet-Examples.rst @@ -13,8 +13,8 @@ Creating a model instance and loading model :start-after: sphinx-doc: csharp_ref_model_start :end-before: sphinx-doc: csharp_ref_model_stop -Deploying trained model ------------------------ +Transcribing audio with the loaded model +---------------------------------------- .. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs :language: csharp diff --git a/doc/Java-Examples.rst b/doc/Java-Examples.rst index 5d90d9eb..64eb0bd2 100644 --- a/doc/Java-Examples.rst +++ b/doc/Java-Examples.rst @@ -13,8 +13,8 @@ Creating a model instance and loading model :start-after: sphinx-doc: java_ref_model_start :end-before: sphinx-doc: java_ref_model_stop -Deploying trained model ------------------------ +Transcribing audio with the loaded model +---------------------------------------- .. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/sttexampleapp/STTActivity.java :language: java diff --git a/doc/NodeJS-Examples.rst b/doc/NodeJS-Examples.rst index 211b6691..830c3486 100644 --- a/doc/NodeJS-Examples.rst +++ b/doc/NodeJS-Examples.rst @@ -15,8 +15,8 @@ Creating a model instance and loading model :start-after: sphinx-doc: js_ref_model_start :end-before: sphinx-doc: js_ref_model_stop -Deploying trained model ------------------------ +Transcribing audio with the loaded model +---------------------------------------- .. literalinclude:: ../native_client/javascript/client.ts :language: javascript diff --git a/doc/Python-API.rst b/doc/Python-API.rst index 9aec57f0..ea87a0f5 100644 --- a/doc/Python-API.rst +++ b/doc/Python-API.rst @@ -1,3 +1,5 @@ +.. _python-api: + Python ====== diff --git a/doc/Python-Examples.rst b/doc/Python-Examples.rst index fe871a46..3e3e60c8 100644 --- a/doc/Python-Examples.rst +++ b/doc/Python-Examples.rst @@ -15,8 +15,8 @@ Creating a model instance and loading model :start-after: sphinx-doc: python_ref_model_start :end-before: sphinx-doc: python_ref_model_stop -Deploying trained model ------------------------ +Transcribing audio with the loaded model +---------------------------------------- .. literalinclude:: ../native_client/python/client.py :language: python diff --git a/doc/index.rst b/doc/index.rst index 1707d2ac..01dbfe87 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -26,7 +26,7 @@ Quickstart: Deployment ^^^^^^^^^^^^^^^^^^^^^^ -The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python 3.5 or higher (*Note - only Linux supported at this time. We are working to get our normally supported packages back up and running.*): +The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python 3.6, 3.7, 3.8 or 3.9: .. code-block:: bash @@ -39,7 +39,7 @@ The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python $ python -m pip install stt # Download 🐸's pre-trained English models - $ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.pbmm + $ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.tflite $ curl -LO https://github.com/coqui-ai/STT/releases/download/v0.9.3/coqui-stt-0.9.3-models.scorer # Download some example audio files @@ -47,7 +47,7 @@ The fastest way to deploy a pre-trained 🐸STT model is with `pip` with Python $ tar -xvf audio-0.9.3.tar.gz # Transcribe an audio file - $ stt --model coqui-stt-0.9.3-models.pbmm --scorer coqui-stt-0.9.3-models.scorer --audio audio/2830-3980-0043.wav + $ stt --model coqui-stt-0.9.3-models.tflite --scorer coqui-stt-0.9.3-models.scorer --audio audio/2830-3980-0043.wav .. toctree:: :maxdepth: 1 diff --git a/native_client/Android.mk b/native_client/Android.mk index 49bf8f93..7eff72a4 100644 --- a/native_client/Android.mk +++ b/native_client/Android.mk @@ -5,10 +5,25 @@ LOCAL_MODULE := stt-prebuilt LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libstt.so include $(PREBUILT_SHARED_LIBRARY) +include $(CLEAR_VARS) +LOCAL_MODULE := kenlm-prebuilt +LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libkenlm.so +include $(PREBUILT_SHARED_LIBRARY) + +include $(CLEAR_VARS) +LOCAL_MODULE := tensorflowlite-prebuilt +LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/tensorflow/lite/libtensorflowlite.so +include $(PREBUILT_SHARED_LIBRARY) + +include $(CLEAR_VARS) +LOCAL_MODULE := tflitedelegates-prebuilt +LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libtflitedelegates.so +include $(PREBUILT_SHARED_LIBRARY) + include $(CLEAR_VARS) LOCAL_CPP_EXTENSION := .cc .cxx .cpp LOCAL_MODULE := stt LOCAL_SRC_FILES := client.cc -LOCAL_SHARED_LIBRARIES := stt-prebuilt +LOCAL_SHARED_LIBRARIES := stt-prebuilt kenlm-prebuilt tensorflowlite-prebuilt tflitedelegates-prebuilt LOCAL_LDFLAGS := -Wl,--no-as-needed include $(BUILD_EXECUTABLE) diff --git a/native_client/BUILD b/native_client/BUILD index d0a5f5d9..54a5d993 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -110,12 +110,12 @@ cc_binary( ) cc_library( - name="kenlm", + name = "kenlm", hdrs = glob([ "kenlm/lm/*.hh", "kenlm/util/*.hh", ]), - srcs = ["libkenlm.so"], + srcs = [":libkenlm.so"], copts = ["-std=c++11"], defines = ["KENLM_MAX_ORDER=6"], includes = ["kenlm"], @@ -131,12 +131,17 @@ cc_library( ) cc_library( - name="tflite", + name = "tflite", hdrs = [ "//tensorflow/lite:model.h", "//tensorflow/lite/kernels:register.h", - "//tensorflow/lite/tools/evaluation:utils.h", - ], + ] + select({ + "//tensorflow:android": [ + "//tensorflow/lite/delegates/gpu:delegate.h", + "//tensorflow/lite/delegates/hexagon:hexagon_delegate.h", + ], + "//conditions:default": [], + }), srcs = [ "//tensorflow/lite:libtensorflowlite.so", ], @@ -144,6 +149,37 @@ cc_library( deps = ["//tensorflow/lite:libtensorflowlite.so"], ) +cc_binary( + name = "libtflitedelegates.so", + deps = [ + "//tensorflow/lite/tools/evaluation:utils", + ], + linkshared = 1, + linkopts = select({ + "//tensorflow:ios": [ + "-Wl,-install_name,@rpath/libtflitedelegates.so", + ], + "//tensorflow:macos": [ + "-Wl,-install_name,@rpath/libtflitedelegates.so", + ], + "//tensorflow:windows": [], + "//conditions:default": [ + "-Wl,-soname,libtflitedelegates.so", + ], + }), +) + +cc_library( + name = "tflitedelegates", + hdrs = [ + "//tensorflow/lite/tools/evaluation:utils.h", + ], + deps = [ + "//tensorflow/lite/tools/evaluation:utils", + ], + srcs = [":libtflitedelegates.so"], +) + cc_library( name = "coqui_stt_bundle", srcs = [ @@ -178,12 +214,13 @@ cc_library( # We simply force the linker option manually here as a hacky fix. "//tensorflow:windows": [ "bazel-out/x64_windows-opt/bin/native_client/libkenlm.so.if.lib", + "bazel-out/x64_windows-opt/bin/native_client/libtflitedelegates.so.if.lib", "bazel-out/x64_windows-opt/bin/tensorflow/lite/libtensorflowlite.so.if.lib", ], "//conditions:default": [], }) + DECODER_LINKOPTS, includes = DECODER_INCLUDES, - deps = [":kenlm", ":tflite"], + deps = [":kenlm", ":tflite", ":tflitedelegates"], ) cc_binary( diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 1f55b83b..99aba3b2 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -21,7 +21,7 @@ endif STT_BIN := stt$(PLATFORM_EXE_SUFFIX) CFLAGS_STT := -std=c++11 -o $(STT_BIN) -LINK_STT := -lstt -lkenlm -ltensorflowlite +LINK_STT := -lstt -lkenlm -ltflitedelegates -ltensorflowlite LINK_PATH_STT := -L${TFDIR}/bazel-bin/native_client -L${TFDIR}/bazel-bin/tensorflow/lite ifeq ($(TARGET),host) @@ -61,7 +61,7 @@ TOOL_CC := cl.exe TOOL_CXX := cl.exe TOOL_LD := link.exe TOOL_LIBEXE := lib.exe -LINK_STT := $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libstt.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libkenlm.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib") +LINK_STT := $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libstt.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libkenlm.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/native_client/libtflitedelegates.so.if.lib") $(shell cygpath "$(TFDIR)/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib") LINK_PATH_STT := CFLAGS_STT := -nologo -Fe$(STT_BIN) SOX_CFLAGS := @@ -184,8 +184,8 @@ define copy_missing_libs if [ "$(OS)" = "Darwin" ]; then \ new_missing="$$( (for f in $$(otool -L $$lib 2>/dev/null | tail -n +2 | awk '{ print $$1 }' | grep -v '$$lib'); do ls -hal $$f; done;) 2>&1 | grep 'No such' | cut -d':' -f2 | xargs basename -a)"; \ missing_libs="$$missing_libs $$new_missing"; \ - elif [ "$(OS)" = "${CI_MSYS_VERSION}" ]; then \ - missing_libs="libstt.so libkenlm.so libtensorflowlite.so"; \ + elif [ "$(OS)" = "${CI_MSYS_VERSION}" ]; then \ + missing_libs="libstt.so libkenlm.so libtflitedelegates.so libtensorflowlite.so"; \ else \ missing_libs="$$missing_libs $$($(LDD) $$lib | grep 'not found' | awk '{ print $$1 }')"; \ fi; \ diff --git a/native_client/java/app/src/androidTest/java/ai/coqui/sttexampleapp/ExampleInstrumentedTest.java b/native_client/java/app/src/androidTest/java/ai/coqui/sttexampleapp/ExampleInstrumentedTest.java index 0a68a324..77c8e376 100644 --- a/native_client/java/app/src/androidTest/java/ai/coqui/sttexampleapp/ExampleInstrumentedTest.java +++ b/native_client/java/app/src/androidTest/java/ai/coqui/sttexampleapp/ExampleInstrumentedTest.java @@ -1,4 +1,4 @@ -package ai.coqui.sttexampleapp +package ai.coqui.sttexampleapp; import android.content.Context; import android.support.test.InstrumentationRegistry; diff --git a/native_client/java/app/src/test/java/ai/coqui/sttexampleapp/ExampleUnitTest.java b/native_client/java/app/src/test/java/ai/coqui/sttexampleapp/ExampleUnitTest.java index a2b67ba8..0f0a6ebd 100644 --- a/native_client/java/app/src/test/java/ai/coqui/sttexampleapp/ExampleUnitTest.java +++ b/native_client/java/app/src/test/java/ai/coqui/sttexampleapp/ExampleUnitTest.java @@ -1,4 +1,4 @@ -package ai.coqui.sttexampleapp +package ai.coqui.sttexampleapp; import org.junit.Test; diff --git a/native_client/java/libstt/src/main/java/ai/coqui/libstt/STTModel.java b/native_client/java/libstt/src/main/java/ai/coqui/libstt/STTModel.java index 3fce310f..c986a95c 100644 --- a/native_client/java/libstt/src/main/java/ai/coqui/libstt/STTModel.java +++ b/native_client/java/libstt/src/main/java/ai/coqui/libstt/STTModel.java @@ -15,8 +15,8 @@ public class STTModel { private SWIGTYPE_p_ModelState _msp; private void evaluateErrorCode(int errorCode) { - STT_Error_Codes code = STT_Error_Codes.swigToEnum(errorCode); - if (code != STT_Error_Codes.ERR_OK) { + Error_Codes code = Error_Codes.swigToEnum(errorCode); + if (code != Error_Codes.ERR_OK) { throw new RuntimeException("Error: " + impl.ErrorCodeToErrorMessage(errorCode) + " (0x" + Integer.toHexString(errorCode) + ")."); } } diff --git a/native_client/javascript/binding.gyp b/native_client/javascript/binding.gyp index af949066..4ce4140c 100644 --- a/native_client/javascript/binding.gyp +++ b/native_client/javascript/binding.gyp @@ -27,6 +27,7 @@ "libraries": [ "../../../tensorflow/bazel-bin/native_client/libstt.so.if.lib", "../../../tensorflow/bazel-bin/native_client/libkenlm.so.if.lib", + "../../../tensorflow/bazel-bin/native_client/libtflitedelegates.so.if.lib", "../../../tensorflow/bazel-bin/tensorflow/lite/libtensorflowlite.so.if.lib", ], }, @@ -34,6 +35,7 @@ "libraries": [ "../../../tensorflow/bazel-bin/native_client/libstt.so", "../../../tensorflow/bazel-bin/native_client/libkenlm.so", + "../../../tensorflow/bazel-bin/native_client/libtflitedelegates.so", "../../../tensorflow/bazel-bin/tensorflow/lite/libtensorflowlite.so", ], }, diff --git a/native_client/tflitemodelstate.cc b/native_client/tflitemodelstate.cc index 20ac855d..a18ba062 100644 --- a/native_client/tflitemodelstate.cc +++ b/native_client/tflitemodelstate.cc @@ -1,7 +1,9 @@ #include "tflitemodelstate.h" -#include "tensorflow/lite/string_util.h" #include "workspace_status.h" +#include "tensorflow/lite/string_util.h" +#include "tensorflow/lite/tools/evaluation/utils.h" + #ifdef __ANDROID__ #include #define LOG_TAG "libstt" diff --git a/native_client/tflitemodelstate.h b/native_client/tflitemodelstate.h index ace62ecf..11532e64 100644 --- a/native_client/tflitemodelstate.h +++ b/native_client/tflitemodelstate.h @@ -6,7 +6,6 @@ #include "tensorflow/lite/model.h" #include "tensorflow/lite/kernels/register.h" -#include "tensorflow/lite/tools/evaluation/utils.h" #include "modelstate.h" diff --git a/notebooks/README.md b/notebooks/README.md index 06fd5867..d2c1bc02 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,4 +1,7 @@ # Python Notebooks for 🐸 STT -1. Train a new Speech-to-Text model from scratch [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train-your-first-coqui-STT-model.ipynb) -2. Transfer learning (English --> Russian) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy-transfer-learning.ipynb) +| Notebook title | Language(s) | Link to Colab | +|----------------|---------------|-------------| +|Train your first 🐸 STT model | English | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_your_first_coqui_STT_model.ipynb) | +|Easy Transfer learning | English --> Russian | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy_transfer_learning.ipynb)| +| Train a model with Common Voice | Serbian | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) | diff --git a/notebooks/easy-transfer-learning.ipynb b/notebooks/easy_transfer_learning.ipynb similarity index 97% rename from notebooks/easy-transfer-learning.ipynb rename to notebooks/easy_transfer_learning.ipynb index 4631db82..0ce1466d 100644 --- a/notebooks/easy-transfer-learning.ipynb +++ b/notebooks/easy_transfer_learning.ipynb @@ -34,9 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "## Install Coqui STT if you need to\n", - "# !git clone --depth 1 https://github.com/coqui-ai/STT.git\n", - "# !cd STT; pip install -U pip wheel setuptools; pip install ." + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training" ] }, { @@ -147,7 +147,7 @@ " alphabet_config_path=\"russian/alphabet.txt\",\n", " train_files=[\"russian/ru.csv\"],\n", " dev_files=[\"russian/ru.csv\"],\n", - " epochs=200,\n", + " epochs=100,\n", " load_cudnn=True,\n", ")" ] @@ -251,7 +251,7 @@ "metadata": {}, "outputs": [], "source": [ - "from coqui_stt_training.train import test\n", + "from coqui_stt_training.evaluate import test\n", "\n", "test()" ] diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb new file mode 100644 index 00000000..b492f4f2 --- /dev/null +++ b/notebooks/train_with_common_voice.ipynb @@ -0,0 +1,265 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "colab": { + "name": "train-with-common-voice-data.ipynb", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "f79d99ef" + }, + "source": [ + "# Train a 🐸 STT model with Common Voice data πŸ’«\n", + "\n", + "πŸ‘‹ Hello and welcome to Coqui (🐸) STT \n", + "\n", + "This notebook shows a **typical workflow** for **training** and **testing** an 🐸 STT model on data from Common Voice.\n", + "\n", + "In this notebook, we will:\n", + "\n", + "1. Download Common Voice data (pre-formatted for 🐸 STT)\n", + "2. Configure the training and testing runs\n", + "3. Train a new model\n", + "4. Test the model and display its performance\n", + "\n", + "So, let's jump right in!\n", + "\n", + "*PS - If you just want a working, off-the-shelf model, check out the [🐸 Model Zoo](https://www.coqui.ai/models)*" + ], + "id": "f79d99ef" + }, + { + "cell_type": "code", + "metadata": { + "id": "fa2aec78" + }, + "source": [ + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training\n", + "## Install opus tools\n", + "! apt-get install libopusfile0 libopus-dev libopusfile-dev" + ], + "id": "fa2aec78", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "be5fe49c" + }, + "source": [ + "## βœ… Download & format sample data for Serbian\n", + "\n", + "**First things first**: we need some data.\n", + "\n", + "We're training a Speech-to-Text model, so we want _speech_ and we want _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", + "\n", + "To focus on model training, we formatted the Common Voice data for you already, and you will find CSV files for `{train,test,dev}.csv` in the data directory.\n", + "\n", + "Let's download some data for Serbian 😊\n" + ], + "id": "be5fe49c" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "608d203f", + "metadata": {}, + "outputs": [], + "source": [ + "### Download pre-formatted Common Voice data\n", + "import os\n", + "import tarfile\n", + "from coqui_stt_training.util.downloader import maybe_download\n", + "\n", + "def download_preformatted_data():\n", + " if not os.path.exists(\"serbian/sr-data\"):\n", + " maybe_download(\"sr-data.tar\", \"serbian/\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", + " print('\\nExtracting data...')\n", + " tar = tarfile.open(\"serbian/sr-data.tar\", mode=\"r:\")\n", + " tar.extractall(\"serbian/\")\n", + " tar.close()\n", + " print('\\nFinished extracting data...')\n", + " else:\n", + " print('Found data - not extracting.')\n", + "\n", + "# Download + extract Common Voice data\n", + "download_preformatted_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "96e8b708" + }, + "source": [ + "### πŸ‘€ Take a look at the data" + ], + "id": "96e8b708" + }, + { + "cell_type": "code", + "metadata": { + "id": "fa2aec77" + }, + "source": [ + "! ls serbian/sr-data\n", + "! wc -l serbian/sr-data/*.csv" + ], + "id": "fa2aec77", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d9dfac21" + }, + "source": [ + "## βœ… Configure & set hyperparameters\n", + "\n", + "Coqui STT comes with a long list of hyperparameters you can tweak. We've set default values, but you will often want to set your own. You can use `initialize_globals_from_args()` to do this. \n", + "\n", + "You must **always** configure the paths to your data, and you must **always** configure your alphabet. Additionally, here we show how you can specify the size of hidden layers (`n_hidden`), the number of epochs to train for (`epochs`), and to initialize a new model from scratch (`load_train=\"init\"`).\n", + "\n", + "If you're training on a GPU, you can uncomment the (larger) training batch sizes for faster training." + ], + "id": "d9dfac21" + }, + { + "cell_type": "code", + "metadata": { + "id": "d264fdec" + }, + "source": [ + "from coqui_stt_training.util.config import initialize_globals_from_args\n", + "\n", + "initialize_globals_from_args(\n", + " train_files=[\"serbian/sr-data/train.csv\"],\n", + " dev_files=[\"serbian/sr-data/dev.csv\"],\n", + " test_files=[\"serbian/sr-data/test.csv\"],\n", + " checkpoint_dir=\"serbian/checkpoints/\",\n", + " load_train=\"init\",\n", + " n_hidden=200,\n", + " epochs=1,\n", + " beam_width=1,\n", + " #train_batch_size=128,\n", + " #dev_batch_size=128,\n", + " #test_batch_size=128,\n", + ")" + ], + "id": "d264fdec", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "799c1425" + }, + "source": [ + "### πŸ‘€ View all config settings" + ], + "id": "799c1425" + }, + { + "cell_type": "code", + "metadata": { + "id": "03b33d2b" + }, + "source": [ + "from coqui_stt_training.util.config import Config\n", + "\n", + "print(Config.to_json())" + ], + "id": "03b33d2b", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ae82fd75" + }, + "source": [ + "## βœ… Train a new model\n", + "\n", + "Let's kick off a training run πŸš€πŸš€πŸš€ (using the configure you set above)." + ], + "id": "ae82fd75" + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "550a504e" + }, + "source": [ + "from coqui_stt_training.train import train\n", + "\n", + "train()" + ], + "id": "550a504e", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9f6dc959" + }, + "source": [ + "## βœ… Test the model\n", + "\n", + "We made it! πŸ™Œ\n", + "\n", + "Let's kick off the testing run, which displays performance metrics.\n", + "\n", + "The settings we used here are for demonstration purposes, so you don't want to deploy this model into production. In this notebook we're focusing on the workflow itself, so it's forgivable πŸ˜‡\n", + "\n", + "You can still train a more State-of-the-Art model by finding better hyperparameters, so go for it πŸ’ͺ" + ], + "id": "9f6dc959" + }, + { + "cell_type": "code", + "metadata": { + "id": "dd42bc7a" + }, + "source": [ + "from coqui_stt_training.evaluate import test\n", + "\n", + "test()" + ], + "id": "dd42bc7a", + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/notebooks/train-your-first-coqui-STT-model.ipynb b/notebooks/train_your_first_coqui_STT_model.ipynb similarity index 91% rename from notebooks/train-your-first-coqui-STT-model.ipynb rename to notebooks/train_your_first_coqui_STT_model.ipynb index bcb10d89..df885b2d 100644 --- a/notebooks/train-your-first-coqui-STT-model.ipynb +++ b/notebooks/train_your_first_coqui_STT_model.ipynb @@ -32,9 +32,9 @@ "metadata": {}, "outputs": [], "source": [ - "## Install Coqui STT if you need to\n", - "# !git clone --depth 1 https://github.com/coqui-ai/STT.git\n", - "# !cd STT; pip install -U pip wheel setuptools; pip install ." + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training" ] }, { @@ -54,9 +54,9 @@ "2. the **size** of that audio file\n", "3. the **transcript** of that audio file.\n", "\n", - "Formatting the audio and transcript isn't too difficult in this case. We define a custom data importer called `download_sample_data()` which does all the work. If you have a custom dataset, you will probably want to write a custom data importer.\n", + "Formatting the audio and transcript isn't too difficult in this case. We define `download_sample_data()` which does all the work. If you have a custom dataset, you will want to write a custom data importer.\n", "\n", - "**Second things second**: we want an alphabet. The output layer of a typical* 🐸 STT model represents letters in the alphabet, and you should specify this alphabet before training. Let's download an English alphabet from Coqui and use that.\n", + "**Second things second**: we want an alphabet. The output layer of a typical* 🐸 STT model represents letters in the alphabet. Let's download an English alphabet from Coqui and use that.\n", "\n", "*_If you are working with languages with large character sets (e.g. Chinese), you can set `bytes_output_mode=True` instead of supplying an `alphabet.txt` file. In this case, the output layer of the STT model will correspond to individual UTF-8 bytes instead of individual characters._" ] @@ -98,7 +98,7 @@ "id": "96e8b708", "metadata": {}, "source": [ - "### Take a look at the data (*Optional* )" + "### πŸ‘€ Take a look at the data" ] }, { @@ -150,8 +150,8 @@ " dev_files=[\"english/ldc93s1.csv\"],\n", " test_files=[\"english/ldc93s1.csv\"],\n", " load_train=\"init\",\n", - " n_hidden=100,\n", - " epochs=200,\n", + " n_hidden=200,\n", + " epochs=100,\n", ")" ] }, @@ -160,7 +160,7 @@ "id": "799c1425", "metadata": {}, "source": [ - "### View all Config settings (*Optional*) " + "### πŸ‘€ View all Config settings" ] }, { @@ -230,7 +230,7 @@ "metadata": {}, "outputs": [], "source": [ - "from coqui_stt_training.train import test\n", + "from coqui_stt_training.evaluate import test\n", "\n", "test()" ] diff --git a/tensorflow b/tensorflow index 23ad988f..4bdd3955 160000 --- a/tensorflow +++ b/tensorflow @@ -1 +1 @@ -Subproject commit 23ad988fcde60fb01f9533e95004bbc4877a9143 +Subproject commit 4bdd3955115cc08df61cf94e16a4ea8e0f4847c4 diff --git a/training/coqui_stt_training/VERSION b/training/coqui_stt_training/VERSION index 51ca89e3..651fe010 100644 --- a/training/coqui_stt_training/VERSION +++ b/training/coqui_stt_training/VERSION @@ -1 +1 @@ -0.10.0-alpha.14 +0.10.0-alpha.29 diff --git a/training/coqui_stt_training/train.py b/training/coqui_stt_training/train.py index 98f9e407..d396fd7f 100644 --- a/training/coqui_stt_training/train.py +++ b/training/coqui_stt_training/train.py @@ -266,11 +266,9 @@ def early_training_checks(): ) -def create_training_datasets() -> ( - tf.data.Dataset, - [tf.data.Dataset], - [tf.data.Dataset], -): +def create_training_datasets( + epoch_ph: tf.Tensor = None, +) -> (tf.data.Dataset, [tf.data.Dataset], [tf.data.Dataset],): """Creates training datasets from input flags. Returns a single training dataset and two lists of datasets for validation @@ -288,6 +286,7 @@ def create_training_datasets() -> ( reverse=Config.reverse_train, limit=Config.limit_train, buffering=Config.read_buffer, + epoch_ph=epoch_ph, ) dev_sets = [] @@ -331,7 +330,8 @@ def train(): tfv1.reset_default_graph() tfv1.set_random_seed(Config.random_seed) - train_set, dev_sets, metrics_sets = create_training_datasets() + epoch_ph = tf.placeholder(tf.int64, name="epoch_ph") + train_set, dev_sets, metrics_sets = create_training_datasets(epoch_ph) iterator = tfv1.data.Iterator.from_structure( tfv1.data.get_output_types(train_set), @@ -488,7 +488,7 @@ def train(): ).start() # Initialize iterator to the appropriate dataset - session.run(init_op) + session.run(init_op, {epoch_ph: epoch}) # Batch loop while True: @@ -507,7 +507,7 @@ def train(): non_finite_files, step_summaries_op, ], - feed_dict=feed_dict, + feed_dict={**feed_dict, **{epoch_ph: epoch}}, ) except tf.errors.OutOfRangeError: break diff --git a/training/coqui_stt_training/util/config.py b/training/coqui_stt_training/util/config.py index 85493d4b..4ddc62b2 100644 --- a/training/coqui_stt_training/util/config.py +++ b/training/coqui_stt_training/util/config.py @@ -340,6 +340,22 @@ class _SttConfig(Coqpit): help='after how many epochs the feature cache is invalidated again - 0 for "never"' ), ) + shuffle_batches: bool = field( + default=False, + metadata=dict( + help="reshuffle batches every epoch, starting after N epochs, where N is set by the shuffle_start flag." + ), + ) + shuffle_start: int = field( + default=1, + metadata=dict(help="epoch to start shuffling batches from (zero-based)."), + ) + shuffle_buffer: int = field( + default=1000, + metadata=dict( + help="how many batches to keep in shuffle buffer when shuffling batches." + ), + ) feature_win_len: int = field( default=32, diff --git a/training/coqui_stt_training/util/feeding.py b/training/coqui_stt_training/util/feeding.py index 80ff0c20..bf506375 100644 --- a/training/coqui_stt_training/util/feeding.py +++ b/training/coqui_stt_training/util/feeding.py @@ -140,6 +140,7 @@ def create_dataset( limit=0, process_ahead=None, buffering=1 * MEGABYTE, + epoch_ph=None, ): epoch_counter = Counter() # survives restarts of the dataset and its generator @@ -207,11 +208,18 @@ def create_dataset( ).map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) if cache_path: dataset = dataset.cache(cache_path) - dataset = ( - dataset.window(batch_size, drop_remainder=train_phase) - .flat_map(batch_fn) - .prefetch(len(Config.available_devices)) - ) + dataset = dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn) + + if Config.shuffle_batches and epoch_ph is not None: + with tf.control_dependencies([tf.print("epoch:", epoch_ph)]): + epoch_buffer_size = tf.cond( + tf.less(epoch_ph, Config.shuffle_start), + lambda: tf.constant(1, tf.int64), + lambda: tf.constant(Config.shuffle_buffer, tf.int64), + ) + dataset = dataset.shuffle(epoch_buffer_size, seed=epoch_ph) + + dataset = dataset.prefetch(len(Config.available_devices)) return dataset