From 058f53af3ab66082b1e566c9e6c0c19e6c77020f Mon Sep 17 00:00:00 2001 From: Karan Sagar Date: Fri, 10 Jul 2020 15:24:45 -0400 Subject: [PATCH 01/33] Update TRAINING.rst I'm new to DeepSpeech, but I noticed when following the training instructions that the filenames _appear_ to be relative paths in the CSV. Let me know if I'm misinterpreting. Thanks! --- doc/TRAINING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index f42cf819..4cdf41e5 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -124,7 +124,7 @@ It will also add the following ``.csv`` files: * ``clips/dev.csv`` * ``clips/test.csv`` -All entries in these CSV files refer to their samples by absolute paths. So moving this sub-directory would require another import or tweaking the CSV files accordingly. +All entries in these CSV files refer to their samples by relative paths. To use Common Voice data during training, validation and testing, you pass (comma separated combinations of) their filenames into ``--train_files``\ , ``--dev_files``\ , ``--test_files`` parameters of ``DeepSpeech.py``. From 36a2f3b38d7527e3603e96692f449b773a5b6fff Mon Sep 17 00:00:00 2001 From: Karan Sagar Date: Thu, 16 Jul 2020 12:38:59 -0400 Subject: [PATCH 02/33] Update TRAINING.rst Update wording on relative / absolute paths. --- doc/TRAINING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 4cdf41e5..764088b5 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -124,7 +124,7 @@ It will also add the following ``.csv`` files: * ``clips/dev.csv`` * ``clips/test.csv`` -All entries in these CSV files refer to their samples by relative paths. +Entries in CSV files can refer to samples by their absolute or relative paths. Here, the importer produces relative paths. To use Common Voice data during training, validation and testing, you pass (comma separated combinations of) their filenames into ``--train_files``\ , ``--dev_files``\ , ``--test_files`` parameters of ``DeepSpeech.py``. From 0e8a28de578aa36e8040774952515b5c029ba6da Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 19 Jul 2020 21:15:44 +0200 Subject: [PATCH 03/33] Bump caches and fix linker issues in new workers --- taskcluster/.shared.yml | 48 ++++++++++----------- taskcluster/tc-cpp-ds-tests-prod.sh | 4 -- taskcluster/tc-cpp-ds-tests.sh | 4 -- taskcluster/tc-cpp_tflite-ds-tests.sh | 4 -- taskcluster/tc-cpp_tflite-tests-prod.sh | 4 -- taskcluster/tc-cpp_tflite_basic-ds-tests.sh | 4 -- 6 files changed, 24 insertions(+), 44 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 765ad3a0..7ec3f366 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -68,11 +68,11 @@ system: url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.node-gyp-cache.7/artifacts/public/node-gyp-cache.tar.gz' namespace: 'project.deepspeech.node-gyp-cache.7' homebrew_builds: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.7/artifacts/public/homebrew_builds.tar.gz' - namespace: 'project.deepspeech.homebrew_builds.7' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.8/artifacts/public/homebrew_builds.tar.gz' + namespace: 'project.deepspeech.homebrew_builds.8' homebrew_tests: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.8/artifacts/public/homebrew_tests.tar.gz' - namespace: 'project.deepspeech.homebrew_tests.8' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.9/artifacts/public/homebrew_tests.tar.gz' + namespace: 'project.deepspeech.homebrew_tests.9' android_cache: arm64_v8a: android_24: @@ -122,8 +122,8 @@ system: url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.8/artifacts/public/pyenv.tar.gz' namespace: 'project.deepspeech.pyenv.linux.8' osx: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.8/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.osx.8' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.9/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.osx.9' win: url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.8/artifacts/public/pyenv.tar.gz' namespace: 'project.deepspeech.pyenv.win.8' @@ -142,32 +142,32 @@ system: namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" tensorflow: linux_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.cpu/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.cpu" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu" linux_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda" linux_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm" linux_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.0.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx" android_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" android_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-armv7/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1.1.bf55d362bb11e973b8f5.1.a3e5bf44d.1.android-armv7" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.1.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/tc-cpp-ds-tests-prod.sh b/taskcluster/tc-cpp-ds-tests-prod.sh index c1dcfeb6..9edd5673 100644 --- a/taskcluster/tc-cpp-ds-tests-prod.sh +++ b/taskcluster/tc-cpp-ds-tests-prod.sh @@ -17,10 +17,6 @@ download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -if [ "${OS}" = "Darwin" ]; then - export DYLD_LIBRARY_PATH=$TESTS_BREW/lib/:$DYLD_LIBRARY_PATH -fi; - check_versions run_prod_inference_tests "${bitrate}" diff --git a/taskcluster/tc-cpp-ds-tests.sh b/taskcluster/tc-cpp-ds-tests.sh index 4147941a..67d5d92f 100644 --- a/taskcluster/tc-cpp-ds-tests.sh +++ b/taskcluster/tc-cpp-ds-tests.sh @@ -11,10 +11,6 @@ download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -if [ "${OS}" = "Darwin" ]; then - export DYLD_LIBRARY_PATH=$TESTS_BREW/lib/:$DYLD_LIBRARY_PATH -fi; - check_versions run_all_inference_tests diff --git a/taskcluster/tc-cpp_tflite-ds-tests.sh b/taskcluster/tc-cpp_tflite-ds-tests.sh index 48b46ab5..313475ef 100644 --- a/taskcluster/tc-cpp_tflite-ds-tests.sh +++ b/taskcluster/tc-cpp_tflite-ds-tests.sh @@ -16,10 +16,6 @@ download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -if [ "${OS}" = "Darwin" ]; then - export DYLD_LIBRARY_PATH=$TESTS_BREW/lib/:$DYLD_LIBRARY_PATH -fi; - check_versions run_all_inference_tests diff --git a/taskcluster/tc-cpp_tflite-tests-prod.sh b/taskcluster/tc-cpp_tflite-tests-prod.sh index 631eabc3..5acd4016 100644 --- a/taskcluster/tc-cpp_tflite-tests-prod.sh +++ b/taskcluster/tc-cpp_tflite-tests-prod.sh @@ -18,10 +18,6 @@ download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -if [ "${OS}" = "Darwin" ]; then - export DYLD_LIBRARY_PATH=$TESTS_BREW/lib/:$DYLD_LIBRARY_PATH -fi; - check_versions run_prodtflite_inference_tests "${bitrate}" diff --git a/taskcluster/tc-cpp_tflite_basic-ds-tests.sh b/taskcluster/tc-cpp_tflite_basic-ds-tests.sh index ddf88ec4..7370e8a3 100644 --- a/taskcluster/tc-cpp_tflite_basic-ds-tests.sh +++ b/taskcluster/tc-cpp_tflite_basic-ds-tests.sh @@ -16,10 +16,6 @@ download_material "${TASKCLUSTER_TMP_DIR}/ds" export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH -if [ "${OS}" = "Darwin" ]; then - export DYLD_LIBRARY_PATH=$TESTS_BREW/lib/:$DYLD_LIBRARY_PATH -fi; - check_versions run_tflite_basic_inference_tests From 3ce07afae045ce9bf014a36719a0dfec75662187 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 30 Jun 2020 13:01:11 +0200 Subject: [PATCH 04/33] Add TensorFlow iOS tasks --- taskcluster/.build.yml | 2 +- taskcluster/.shared.yml | 42 +++++++++++++---------- taskcluster/tf_darwin-amd64-opt.yml | 2 +- taskcluster/tf_ios-arm64-opt.yml | 18 ++++++++++ taskcluster/tf_ios-x86_64-opt.yml | 18 ++++++++++ taskcluster/tf_linux-amd64-cpu-opt.yml | 2 +- taskcluster/tf_tc-build.sh | 46 +++++++++++++------------- taskcluster/tf_tc-vars.sh | 2 ++ taskcluster/tf_win-amd64-cpu-opt.yml | 2 +- tensorflow | 2 +- 10 files changed, 90 insertions(+), 46 deletions(-) create mode 100644 taskcluster/tf_ios-arm64-opt.yml create mode 100644 taskcluster/tf_ios-x86_64-opt.yml diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index fd261359..76fc9703 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -25,7 +25,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v2.2.0-15-g518c1d0' + tensorflow_git_desc: 'TensorFlow: v2.2.0-16-gfc46411' test_model_task: '' homebrew: url: '' diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 7ec3f366..9323c995 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -142,32 +142,38 @@ system: namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" tensorflow: linux_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu" linux_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda" linux_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm" linux_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx" android_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64" android_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda" + ios_arm64: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64" + ios_x86_64: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/tf_darwin-amd64-opt.yml b/taskcluster/tf_darwin-amd64-opt.yml index 21c19021..365e1700 100644 --- a/taskcluster/tf_darwin-amd64-opt.yml +++ b/taskcluster/tf_darwin-amd64-opt.yml @@ -10,7 +10,7 @@ build: ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh --osx" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 28800 metadata: diff --git a/taskcluster/tf_ios-arm64-opt.yml b/taskcluster/tf_ios-arm64-opt.yml new file mode 100644 index 00000000..9f253b3f --- /dev/null +++ b/taskcluster/tf_ios-arm64-opt.yml @@ -0,0 +1,18 @@ +build: + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.tensorflow.ios_arm64.url} + artifact_namespace: ${system.tensorflow.ios_arm64.namespace} + generic: + workerType: "ds-macos-heavy" + system_config: + > + ${tensorflow.packages_macos.brew} + scripts: + setup: "taskcluster/tf_tc-setup.sh" + build: "taskcluster/tf_tc-build.sh --ios-arm64" + package: "taskcluster/tf_tc-package.sh" + maxRunTime: 28800 + metadata: + name: "TensorFlow iOS ARM64 TFLite" + description: "Building TensorFlow for iOS ARM64, TFLite, optimized version" diff --git a/taskcluster/tf_ios-x86_64-opt.yml b/taskcluster/tf_ios-x86_64-opt.yml new file mode 100644 index 00000000..c56ad3ca --- /dev/null +++ b/taskcluster/tf_ios-x86_64-opt.yml @@ -0,0 +1,18 @@ +build: + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.tensorflow.ios_x86_64.url} + artifact_namespace: ${system.tensorflow.ios_x86_64.namespace} + generic: + workerType: "ds-macos-heavy" + system_config: + > + ${tensorflow.packages_macos.brew} + scripts: + setup: "taskcluster/tf_tc-setup.sh" + build: "taskcluster/tf_tc-build.sh --ios-x86_64" + package: "taskcluster/tf_tc-package.sh" + maxRunTime: 28800 + metadata: + name: "TensorFlow iOS x86_64 TFLite" + description: "Building TensorFlow for iOS x86_64, TFLite, optimized version" diff --git a/taskcluster/tf_linux-amd64-cpu-opt.yml b/taskcluster/tf_linux-amd64-cpu-opt.yml index d869d280..36b051b1 100644 --- a/taskcluster/tf_linux-amd64-cpu-opt.yml +++ b/taskcluster/tf_linux-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: ${tensorflow.packages_xenial.apt} && ${java.packages_xenial.apt} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 14400 metadata: diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index ad8085ee..cb620906 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -4,56 +4,48 @@ set -ex source $(dirname $0)/tf_tc-vars.sh -build_amd64=yes +build_amd64=no build_gpu=no build_android_arm=no build_android_arm64=no build_linux_arm=no build_linux_arm64=no +build_ios_arm64=no +build_ios_x86_64=no + +if [ "$1" = "--cpu" ]; then + build_amd64=yes +fi if [ "$1" = "--gpu" ]; then build_amd64=yes build_gpu=yes - build_android_arm=no - build_android_arm64=no - build_linux_arm=no - build_linux_arm64=no fi if [ "$1" = "--arm" ]; then build_amd64=yes - build_gpu=no - build_android_arm=no - build_android_arm64=no build_linux_arm=yes - build_linux_arm64=no fi if [ "$1" = "--arm64" ]; then build_amd64=yes - build_gpu=no - build_android_arm=no - build_android_arm64=no - build_linux_arm=no build_linux_arm64=yes fi if [ "$1" = "--android-armv7" ]; then - build_amd64=no - build_gpu=no build_android_arm=yes - build_android_arm64=no - build_linux_arm=no - build_linux_arm64=no fi if [ "$1" = "--android-arm64" ]; then - build_amd64=no - build_gpu=no - build_android_arm=no build_android_arm64=yes - build_linux_arm=no - build_linux_arm64=no +fi + +if [ "$1" = "--ios-arm64" ]; then + build_ios_arm64=yes +fi + +if [ "$1" = "--ios-x86_64" ]; then + build_ios_x86_64=yes fi pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ @@ -98,6 +90,14 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; + if [ "${build_ios_arm64}" = "yes" ]; then + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + fi; + + if [ "${build_ios_x86_64}" = "yes" ]; then + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + fi; + if [ $? -ne 0 ]; then # There was a failure, just account for it. echo "Build failure, please check the output above. Exit code was: $?" diff --git a/taskcluster/tf_tc-vars.sh b/taskcluster/tf_tc-vars.sh index 8150bb8d..8c30ea2a 100755 --- a/taskcluster/tf_tc-vars.sh +++ b/taskcluster/tf_tc-vars.sh @@ -171,6 +171,8 @@ BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_W BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_CUDA_FLAGS="--config=cuda" +BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --copt=-DTFLITE_WITH_RUY_GEMV" if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then # Somehow, even with Python being in the PATH, Bazel on windows struggles diff --git a/taskcluster/tf_win-amd64-cpu-opt.yml b/taskcluster/tf_win-amd64-cpu-opt.yml index 4b4d1105..99b4d8a3 100644 --- a/taskcluster/tf_win-amd64-cpu-opt.yml +++ b/taskcluster/tf_win-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: ${tensorflow.packages_win.pacman} && ${tensorflow.packages_win.msys64} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 14400 metadata: diff --git a/tensorflow b/tensorflow index 518c1d04..fc464111 160000 --- a/tensorflow +++ b/tensorflow @@ -1 +1 @@ -Subproject commit 518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d +Subproject commit fc464111ac5c49791e44122e5946e521b25840bd From c85f95f781ee8c007502bcfc4ab2e24fc3e3e24c Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 9 Jun 2020 19:58:04 +0200 Subject: [PATCH 05/33] Add DeepSpeech iOS tasks --- taskcluster/ios-arm64-tflite-opt.yml | 21 +++++++++++++++++++++ taskcluster/ios-build.sh | 17 +++++++++++++++++ taskcluster/ios-x86_64-tflite-opt.yml | 21 +++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 taskcluster/ios-arm64-tflite-opt.yml create mode 100755 taskcluster/ios-build.sh create mode 100644 taskcluster/ios-x86_64-tflite-opt.yml diff --git a/taskcluster/ios-arm64-tflite-opt.yml b/taskcluster/ios-arm64-tflite-opt.yml new file mode 100644 index 00000000..dab0d24b --- /dev/null +++ b/taskcluster/ios-arm64-tflite-opt.yml @@ -0,0 +1,21 @@ +build: + template_file: darwin-opt-base.tyml + dependencies: + - "swig-darwin-amd64" + - "node-gyp-cache" + - "homebrew_builds-darwin-amd64" + - "pyenv-darwin-amd64" + - "tf_ios-arm64-opt" + routes: + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.ios_arm64-tflite" + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.ios_arm64-tflite" + - "index.project.deepspeech.deepspeech.native_client.ios_arm64-tflite.${event.head.sha}" + tensorflow: ${system.tensorflow.ios_arm64.url} + scripts: + build: "taskcluster/ios-build.sh" + package: "taskcluster/package.sh" + nc_asset_name: "native_client.arm64.tflite.ios.tar.xz" + maxRunTime: 14400 + metadata: + name: "DeepSpeech iOS ARM64 TFLite" + description: "Building DeepSpeech for iOS ARM64, TFLite, optimized version" diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh new file mode 100755 index 00000000..1b913130 --- /dev/null +++ b/taskcluster/ios-build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh + +BAZEL_TARGETS=" +//native_client:libdeepspeech.so +" + +BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" + +BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" + +do_bazel_build diff --git a/taskcluster/ios-x86_64-tflite-opt.yml b/taskcluster/ios-x86_64-tflite-opt.yml new file mode 100644 index 00000000..144f0bd5 --- /dev/null +++ b/taskcluster/ios-x86_64-tflite-opt.yml @@ -0,0 +1,21 @@ +build: + template_file: darwin-opt-base.tyml + dependencies: + - "swig-darwin-amd64" + - "node-gyp-cache" + - "homebrew_builds-darwin-amd64" + - "pyenv-darwin-amd64" + - "tf_ios-x86_64-opt" + routes: + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.ios_x86_64-tflite" + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.ios_x86_64-tflite" + - "index.project.deepspeech.deepspeech.native_client.ios_x86_64-tflite.${event.head.sha}" + tensorflow: ${system.tensorflow.ios_x86_64.url} + scripts: + build: "taskcluster/ios-build.sh" + package: "taskcluster/package.sh" + nc_asset_name: "native_client.x86_64.tflite.ios.tar.xz" + maxRunTime: 14400 + metadata: + name: "DeepSpeech iOS x86_64 TFLite" + description: "Building DeepSpeech for iOS x86_64, TFLite, optimized version" From 4ca0f94d78658d4b57c784197046c669122c7b7d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 9 Jun 2020 20:41:36 +0200 Subject: [PATCH 06/33] client.cc iOS build --- native_client/client.cc | 10 +++++++--- native_client/definitions.mk | 22 ++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 1b335955..46a16115 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -12,7 +12,11 @@ #include #include -#if defined(__ANDROID__) || defined(_MSC_VER) +#ifdef __APPLE__ +#include +#endif + +#if defined(__ANDROID__) || defined(_MSC_VER) || TARGET_OS_IPHONE #define NO_SOX #endif @@ -244,7 +248,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) sox_false // Reverse endianness }; -#ifdef __APPLE__ +#if TARGET_OS_OSX // It would be preferable to use sox_open_memstream_write here, but OS-X // doesn't support POSIX 2008, which it requires. See Issue #461. // Instead, we write to a temporary file. @@ -348,7 +352,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) fclose(wave); #endif // NO_SOX -#ifdef __APPLE__ +#if TARGET_OS_OSX res.buffer_size = (size_t)(output->olength * 2); res.buffer = (char*)malloc(sizeof(char) * res.buffer_size); FILE* output_file = fopen(output_name, "rb"); diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 2f6afbf4..0c8ab656 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -101,6 +101,20 @@ NODE_PLATFORM_TARGET := --target_arch=arm64 --target_platform=linux TOOLCHAIN_LDD_OPTS := --root $(RASPBIAN)/ endif # ($(TARGET),rpi3-armv8) +ifeq ($(TARGET),ios-simulator) +CFLAGS := -isysroot $(shell xcrun -sdk iphonesimulator13.5 -show-sdk-path) +SOX_CFLAGS := +SOX_LDFLAGS := +LDFLAGS := +endif + +ifeq ($(TARGET),ios-arm64) +CFLAGS := -target arm64-apple-ios -isysroot $(shell xcrun -sdk iphoneos13.5 -show-sdk-path) +SOX_CFLAGS := +SOX_LDFLAGS := +LDFLAGS := +endif + # -Wl,--no-as-needed is required to force linker not to evict libs it thinks we # dont need ; will fail the build on OSX because that option does not exists ifeq ($(OS),Linux) @@ -108,9 +122,13 @@ LDFLAGS_NEEDED := -Wl,--no-as-needed LDFLAGS_RPATH := -Wl,-rpath,\$$ORIGIN endif ifeq ($(OS),Darwin) -CXXFLAGS += -stdlib=libc++ -mmacosx-version-min=10.10 -LDFLAGS_NEEDED := -stdlib=libc++ -mmacosx-version-min=10.10 +CXXFLAGS += -stdlib=libc++ +LDFLAGS_NEEDED := -stdlib=libc++ LDFLAGS_RPATH := -Wl,-rpath,@executable_path +ifeq ($(TARGET),host) +CXXFLAGS += -mmacosx-version-min=10.10 +LDFLAGS_NEEDED += -mmacosx-version-min=10.10 +endif endif CFLAGS += $(EXTRA_CFLAGS) From a1aa873259a96d12969039fdc958d9a2d9243ec3 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 14 Jun 2020 11:46:12 +0200 Subject: [PATCH 07/33] Embed bitcode when linking --- native_client/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/native_client/BUILD b/native_client/BUILD index b38979e5..92eb788c 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -130,6 +130,7 @@ tf_cc_shared_object( }) + tflite_copts(), linkopts = select({ "//tensorflow:macos": [], + "//tensorflow:ios": ["-fembed-bitcode"], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, "//tensorflow:rpi3": LINUX_LINKOPTS, "//tensorflow:rpi3-armv8": LINUX_LINKOPTS, From a274c26a89c7fb4334473b8f715325c8e1e5c30d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 14 Jun 2020 12:05:45 +0200 Subject: [PATCH 08/33] Add Swift wrapper framework --- native_client/swift/.gitignore | 4 + .../deepspeech_ios.xcodeproj/project.pbxproj | 499 ++++++++++++++ .../contents.xcworkspacedata | 7 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcschemes/deepspeech_ios.xcscheme | 77 +++ .../contents.xcworkspacedata | 10 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcshareddata/WorkspaceSettings.xcsettings | 8 + .../swift/deepspeech_ios/DeepSpeech.swift | 301 +++++++++ native_client/swift/deepspeech_ios/Info.plist | 22 + .../swift/deepspeech_ios/deepspeech.h | 357 ++++++++++ .../swift/deepspeech_ios/deepspeech_ios.h | 13 + .../deepspeech_ios/deepspeech_ios.modulemap | 12 + .../project.pbxproj | 637 ++++++++++++++++++ .../contents.xcworkspacedata | 7 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcschemes/deepspeech_ios_test.xcscheme | 106 +++ .../deepspeech_ios_test/AppDelegate.swift | 255 +++++++ .../AppIcon.appiconset/Contents.json | 98 +++ .../Assets.xcassets/Contents.json | 6 + .../Base.lproj/LaunchScreen.storyboard | 25 + .../deepspeech_ios_test/ContentView.swift | 21 + .../deepspeech_ios_test/Info.plist | 60 ++ .../Preview Assets.xcassets/Contents.json | 6 + .../deepspeech_ios_test/SceneDelegate.swift | 64 ++ .../deepspeech_ios_testTests/Info.plist | 22 + .../deepspeech_ios_testTests.swift | 34 + .../deepspeech_ios_testUITests/Info.plist | 22 + .../deepspeech_ios_testUITests.swift | 43 ++ 29 files changed, 2740 insertions(+) create mode 100644 native_client/swift/.gitignore create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings create mode 100644 native_client/swift/deepspeech_ios/DeepSpeech.swift create mode 100644 native_client/swift/deepspeech_ios/Info.plist create mode 100644 native_client/swift/deepspeech_ios/deepspeech.h create mode 100644 native_client/swift/deepspeech_ios/deepspeech_ios.h create mode 100644 native_client/swift/deepspeech_ios/deepspeech_ios.modulemap create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift diff --git a/native_client/swift/.gitignore b/native_client/swift/.gitignore new file mode 100644 index 00000000..0351cff4 --- /dev/null +++ b/native_client/swift/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +build/ +xcuserdata/ +/deepspeech_ios/libdeepspeech.dylib diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj new file mode 100644 index 00000000..59927e9e --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj @@ -0,0 +1,499 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 505B136124960D550007DADA /* deepspeech_ios.framework */; }; + 505B137224960D550007DADA /* deepspeech_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* deepspeech_ios.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 505B137D24961AF20007DADA /* deepspeech.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* deepspeech.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* DeepSpeech.swift */; }; + 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD39A24B61FA100409BBB /* libdeepspeech.so */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 505B136C24960D550007DADA /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 505B135824960D550007DADA /* Project object */; + proxyType = 1; + remoteGlobalIDString = 505B136024960D550007DADA; + remoteInfo = deepspeech_ios; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 505B138A249628290007DADA /* Embed Libraries */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 12; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + ); + name = "Embed Libraries"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 505B136124960D550007DADA /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 505B136424960D550007DADA /* deepspeech_ios.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = deepspeech_ios.h; sourceTree = ""; }; + 505B136524960D550007DADA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_iosTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 505B137B249619C90007DADA /* deepspeech_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = deepspeech_ios.modulemap; sourceTree = ""; }; + 505B137C24961AF20007DADA /* deepspeech.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = deepspeech.h; path = ../../deepspeech.h; sourceTree = ""; }; + 505B137E24961BA70007DADA /* DeepSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepSpeech.swift; sourceTree = ""; }; + 507CD39A24B61FA100409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 505B135E24960D550007DADA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136724960D550007DADA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 505B135724960D550007DADA = { + isa = PBXGroup; + children = ( + 505B136324960D550007DADA /* deepspeech_ios */, + 505B136224960D550007DADA /* Products */, + 505B1380249620C60007DADA /* Frameworks */, + ); + sourceTree = ""; + }; + 505B136224960D550007DADA /* Products */ = { + isa = PBXGroup; + children = ( + 505B136124960D550007DADA /* deepspeech_ios.framework */, + 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 505B136324960D550007DADA /* deepspeech_ios */ = { + isa = PBXGroup; + children = ( + 505B137C24961AF20007DADA /* deepspeech.h */, + 505B136424960D550007DADA /* deepspeech_ios.h */, + 505B137E24961BA70007DADA /* DeepSpeech.swift */, + 505B137B249619C90007DADA /* deepspeech_ios.modulemap */, + 505B136524960D550007DADA /* Info.plist */, + ); + path = deepspeech_ios; + sourceTree = ""; + }; + 505B1380249620C60007DADA /* Frameworks */ = { + isa = PBXGroup; + children = ( + 507CD39A24B61FA100409BBB /* libdeepspeech.so */, + ); + name = Frameworks; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 505B135C24960D550007DADA /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B137224960D550007DADA /* deepspeech_ios.h in Headers */, + 505B137D24961AF20007DADA /* deepspeech.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 505B136024960D550007DADA /* deepspeech_ios */ = { + isa = PBXNativeTarget; + buildConfigurationList = 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */; + buildPhases = ( + 505B135C24960D550007DADA /* Headers */, + 505B135D24960D550007DADA /* Sources */, + 505B135E24960D550007DADA /* Frameworks */, + 505B135F24960D550007DADA /* Resources */, + 505B138A249628290007DADA /* Embed Libraries */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = deepspeech_ios; + productName = deepspeech_ios; + productReference = 505B136124960D550007DADA /* deepspeech_ios.framework */; + productType = "com.apple.product-type.framework"; + }; + 505B136924960D550007DADA /* deepspeech_iosTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */; + buildPhases = ( + 505B136624960D550007DADA /* Sources */, + 505B136724960D550007DADA /* Frameworks */, + 505B136824960D550007DADA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 505B136D24960D550007DADA /* PBXTargetDependency */, + ); + name = deepspeech_iosTests; + productName = deepspeech_iosTests; + productReference = 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 505B135824960D550007DADA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1150; + LastUpgradeCheck = 1150; + ORGANIZATIONNAME = Mozilla; + TargetAttributes = { + 505B136024960D550007DADA = { + CreatedOnToolsVersion = 11.5; + LastSwiftMigration = 1150; + }; + 505B136924960D550007DADA = { + CreatedOnToolsVersion = 11.5; + }; + }; + }; + buildConfigurationList = 505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 505B135724960D550007DADA; + productRefGroup = 505B136224960D550007DADA /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 505B136024960D550007DADA /* deepspeech_ios */, + 505B136924960D550007DADA /* deepspeech_iosTests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 505B135F24960D550007DADA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136824960D550007DADA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 505B135D24960D550007DADA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136624960D550007DADA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 505B136D24960D550007DADA /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 505B136024960D550007DADA /* deepspeech_ios */; + targetProxy = 505B136C24960D550007DADA /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 505B137324960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Debug; + }; + 505B137424960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Release; + }; + 505B137624960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = AWCG9S27P7; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + INFOPLIST_FILE = deepspeech_ios/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/deepspeech_ios", + "$(PROJECT_DIR)", + ); + MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 505B137724960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = AWCG9S27P7; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + INFOPLIST_FILE = deepspeech_ios/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/deepspeech_ios", + "$(PROJECT_DIR)", + ); + MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; + 505B137924960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_iosTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 505B137A24960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_iosTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137324960D550007DADA /* Debug */, + 505B137424960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137624960D550007DADA /* Debug */, + 505B137724960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137924960D550007DADA /* Debug */, + 505B137A24960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 505B135824960D550007DADA /* Project object */; +} diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..e763e6ba --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme new file mode 100644 index 00000000..b3ba3705 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..73975e36 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,10 @@ + + + + + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings new file mode 100644 index 00000000..f9b0d7c5 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings @@ -0,0 +1,8 @@ + + + + + PreviewsEnabled + + + diff --git a/native_client/swift/deepspeech_ios/DeepSpeech.swift b/native_client/swift/deepspeech_ios/DeepSpeech.swift new file mode 100644 index 00000000..b694995b --- /dev/null +++ b/native_client/swift/deepspeech_ios/DeepSpeech.swift @@ -0,0 +1,301 @@ +// +// DeepSpeech.swift +// deepspeech_ios +// +// Created by Reuben Morais on 14.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import deepspeech_ios.libdeepspeech_Private + +public enum DeepSpeechError: Error { + // Should be kept in sync with deepspeech.h + case noModel(errorCode: Int32) + case invalidAlphabet(errorCode: Int32) + case invalidShape(errorCode: Int32) + case invalidScorer(errorCode: Int32) + case modelIncompatible(errorCode: Int32) + case scorerNotEnabled(errorCode: Int32) + case scorerUnreadable(errorCode: Int32) + case scorerInvalidLm(errorCode: Int32) + case scorerNoTrie(errorCode: Int32) + case scorerInvalidTrie(errorCode: Int32) + case scorerVersionMismatch(errorCode: Int32) + case failInitMmap(errorCode: Int32) + case failInitSess(errorCode: Int32) + case failInterpreter(errorCode: Int32) + case failRunSess(errorCode: Int32) + case failCreateStream(errorCode: Int32) + case failReadProtobuf(errorCode: Int32) + case failCreateSess(errorCode: Int32) + case failCreateModel(errorCode: Int32) + + // Additional case for invalid error codes, should never happen unless the user has mixed header and binary versions + case invalidErrorCode(errorCode: Int32) +} + +extension DeepSpeechError : LocalizedError { + public var errorDescription: String? { + switch self { + case .noModel(let errorCode), + .invalidAlphabet(let errorCode), + .invalidShape(let errorCode), + .invalidScorer(let errorCode), + .modelIncompatible(let errorCode), + .scorerNotEnabled(let errorCode), + .scorerUnreadable(let errorCode), + .scorerInvalidLm(let errorCode), + .scorerNoTrie(let errorCode), + .scorerInvalidTrie(let errorCode), + .scorerVersionMismatch(let errorCode), + .failInitMmap(let errorCode), + .failInitSess(let errorCode), + .failInterpreter(let errorCode), + .failRunSess(let errorCode), + .failCreateStream(let errorCode), + .failReadProtobuf(let errorCode), + .failCreateSess(let errorCode), + .failCreateModel(let errorCode), + .invalidErrorCode(let errorCode): + let result = DS_ErrorCodeToErrorMessage(errorCode) + defer { DS_FreeString(result) } + return String(cString: result!) + } + } +} + +private func errorCodeToEnum(errorCode: Int32) -> DeepSpeechError { + switch Int(errorCode) { + case Int(DS_ERR_NO_MODEL.rawValue): + return DeepSpeechError.noModel(errorCode: errorCode) + case Int(DS_ERR_INVALID_ALPHABET.rawValue): + return DeepSpeechError.invalidAlphabet(errorCode: errorCode) + case Int(DS_ERR_INVALID_SHAPE.rawValue): + return DeepSpeechError.invalidShape(errorCode: errorCode) + case Int(DS_ERR_INVALID_SCORER.rawValue): + return DeepSpeechError.invalidScorer(errorCode: errorCode) + case Int(DS_ERR_MODEL_INCOMPATIBLE.rawValue): + return DeepSpeechError.modelIncompatible(errorCode: errorCode) + case Int(DS_ERR_SCORER_NOT_ENABLED.rawValue): + return DeepSpeechError.scorerNotEnabled(errorCode: errorCode) + case Int(DS_ERR_SCORER_UNREADABLE.rawValue): + return DeepSpeechError.scorerUnreadable(errorCode: errorCode) + case Int(DS_ERR_SCORER_INVALID_LM.rawValue): + return DeepSpeechError.scorerInvalidLm(errorCode: errorCode) + case Int(DS_ERR_SCORER_NO_TRIE.rawValue): + return DeepSpeechError.scorerNoTrie(errorCode: errorCode) + case Int(DS_ERR_SCORER_INVALID_TRIE.rawValue): + return DeepSpeechError.scorerInvalidTrie(errorCode: errorCode) + case Int(DS_ERR_SCORER_VERSION_MISMATCH.rawValue): + return DeepSpeechError.scorerVersionMismatch(errorCode: errorCode) + case Int(DS_ERR_FAIL_INIT_MMAP.rawValue): + return DeepSpeechError.failInitMmap(errorCode: errorCode) + case Int(DS_ERR_FAIL_INIT_SESS.rawValue): + return DeepSpeechError.failInitSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_INTERPRETER.rawValue): + return DeepSpeechError.failInterpreter(errorCode: errorCode) + case Int(DS_ERR_FAIL_RUN_SESS.rawValue): + return DeepSpeechError.failRunSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_STREAM.rawValue): + return DeepSpeechError.failCreateStream(errorCode: errorCode) + case Int(DS_ERR_FAIL_READ_PROTOBUF.rawValue): + return DeepSpeechError.failReadProtobuf(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_SESS.rawValue): + return DeepSpeechError.failCreateSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_MODEL.rawValue): + return DeepSpeechError.failCreateModel(errorCode: errorCode) + default: + return DeepSpeechError.invalidErrorCode(errorCode: errorCode) + } +} + +private func evaluateErrorCode(errorCode: Int32) throws { + if errorCode != Int32(DS_ERR_OK.rawValue) { + throw errorCodeToEnum(errorCode: errorCode) + } +} + +public struct DeepSpeechTokenMetadata { + let text: String + let timestep: Int + let startTime: Float + + internal init(fromInternal: TokenMetadata) { + text = String(cString: fromInternal.text) + timestep = Int(fromInternal.timestep) + startTime = fromInternal.start_time + } +} + +public struct DeepSpeechCandidateTranscript { + private(set) var tokens: [DeepSpeechTokenMetadata] = [] + let confidence: Double + + internal init(fromInternal: CandidateTranscript) { + let tokensBuffer = UnsafeBufferPointer(start: fromInternal.tokens, count: Int(fromInternal.num_tokens)) + for tok in tokensBuffer { + tokens.append(DeepSpeechTokenMetadata(fromInternal: tok)) + } + confidence = fromInternal.confidence + } +} + +public struct DeepSpeechMetadata { + private(set) var transcripts: [DeepSpeechCandidateTranscript] = [] + + internal init(fromInternal: UnsafeMutablePointer) { + let md = fromInternal.pointee + let transcriptsBuffer = UnsafeBufferPointer(start: md.transcripts, count: Int(md.num_transcripts)) + + for tr in transcriptsBuffer { + transcripts.append(DeepSpeechCandidateTranscript(fromInternal: tr)) + } + } +} + +public class DeepSpeechStream { + private var streamCtx: OpaquePointer! + + internal init(streamContext: OpaquePointer) { + streamCtx = streamContext + } + + deinit { + if streamCtx != nil { + DS_FreeStream(streamCtx) + streamCtx = nil + } + } + + public func feedAudioContent(buffer: Array) { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + buffer.withUnsafeBufferPointer { unsafeBufferPointer in + feedAudioContent(buffer: unsafeBufferPointer) + } + } + + public func feedAudioContent(buffer: UnsafeBufferPointer) { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) + } + + public func intermediateDecode() -> String { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_IntermediateDecode(streamCtx) + defer { DS_FreeString(result) } + return String(cString: result!) + } + + public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata { + precondition(streamCtx != nil, "calling method on invalidated Stream") + let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + + public func finishStream() -> String { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_FinishStream(streamCtx) + defer { + DS_FreeString(result) + streamCtx = nil + } + return String(cString: result!) + } +} + +public class DeepSpeechModel { + private var modelCtx: OpaquePointer! + + public init(modelPath: String) throws { + let err = DS_CreateModel(modelPath, &modelCtx) + try evaluateErrorCode(errorCode: err) + } + + deinit { + DS_FreeModel(modelCtx) + modelCtx = nil + } + + public func getBeamWidth() -> Int { + return Int(DS_GetModelBeamWidth(modelCtx)) + } + + public func setBeamWidth(beamWidth: Int) throws { + let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) + try evaluateErrorCode(errorCode: err) + } + + public var sampleRate: Int { + get { + return Int(DS_GetModelSampleRate(modelCtx)) + } + } + + public func enableExternalScorer(scorerPath: String) throws { + let err = DS_EnableExternalScorer(modelCtx, scorerPath) + try evaluateErrorCode(errorCode: err) + } + + public func disableExternalScorer() throws { + let err = DS_DisableExternalScorer(modelCtx) + try evaluateErrorCode(errorCode: err) + } + + public func setScorerAlphaBeta(alpha: Float, beta: Float) throws { + let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta) + try evaluateErrorCode(errorCode: err) + } + + public func speechToText(buffer: Array) -> String { + return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> String in + return speechToText(buffer: unsafeBufferPointer) + } + } + + public func speechToText(buffer: UnsafeBufferPointer) -> String { + let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) + defer { DS_FreeString(result) } + return String(cString: result!) + } + + public func speechToTextWithMetadata(buffer: Array, numResults: Int) -> DeepSpeechMetadata { + return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> DeepSpeechMetadata in + let result = DS_SpeechToTextWithMetadata(modelCtx, unsafeBufferPointer.baseAddress, UInt32(buffer.count), UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + } + + public func createStream() throws -> DeepSpeechStream { + var streamContext: OpaquePointer! + let err = DS_CreateStream(modelCtx, &streamContext) + try evaluateErrorCode(errorCode: err) + return DeepSpeechStream(streamContext: streamContext) + } + + public class func open(path: String, scorerPath: Optional = nil) -> OpaquePointer { + var fooOpaque: OpaquePointer! + DS_CreateModel(path, &fooOpaque) + if let scorerPath = scorerPath { + DS_EnableExternalScorer(fooOpaque, scorerPath) + } + return fooOpaque + } + + public class func createStream(modelState: OpaquePointer) -> OpaquePointer { + var fooOpaque: OpaquePointer! + DS_CreateStream(modelState, &fooOpaque) + return fooOpaque + } +} + +public func DeepSpeechVersion() -> String { + let result = DS_Version() + defer { DS_FreeString(result) } + return String(cString: result!) +} diff --git a/native_client/swift/deepspeech_ios/Info.plist b/native_client/swift/deepspeech_ios/Info.plist new file mode 100644 index 00000000..9bcb2444 --- /dev/null +++ b/native_client/swift/deepspeech_ios/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + $(CURRENT_PROJECT_VERSION) + + diff --git a/native_client/swift/deepspeech_ios/deepspeech.h b/native_client/swift/deepspeech_ios/deepspeech.h new file mode 100644 index 00000000..1df3cf2e --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech.h @@ -0,0 +1,357 @@ +#ifndef DEEPSPEECH_H +#define DEEPSPEECH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SWIG + #if defined _MSC_VER + #define DEEPSPEECH_EXPORT __declspec(dllexport) + #else + #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) + #endif /*End of _MSC_VER*/ +#else + #define DEEPSPEECH_EXPORT +#endif + +typedef struct ModelState ModelState; + +typedef struct StreamingState StreamingState; + +/** + * @brief Stores text of an individual token, along with its timing information + */ +typedef struct TokenMetadata { + /** The text corresponding to this token */ + const char* const text; + + /** Position of the token in units of 20ms */ + const unsigned int timestep; + + /** Position of the token in seconds */ + const float start_time; +} TokenMetadata; + +/** + * @brief A single transcript computed by the model, including a confidence + * value and the metadata for its constituent tokens. + */ +typedef struct CandidateTranscript { + /** Array of TokenMetadata objects */ + const TokenMetadata* const tokens; + /** Size of the tokens array */ + const unsigned int num_tokens; + /** Approximated confidence value for this transcript. This is roughly the + * sum of the acoustic model logit values for each timestep/character that + * contributed to the creation of this transcript. + */ + const double confidence; +} CandidateTranscript; + +/** + * @brief An array of CandidateTranscript objects computed by the model. + */ +typedef struct Metadata { + /** Array of CandidateTranscript objects */ + const CandidateTranscript* const transcripts; + /** Size of the transcripts array */ + const unsigned int num_transcripts; +} Metadata; + +// sphinx-doc: error_code_listing_start + +#define DS_FOR_EACH_ERROR(APPLY) \ + APPLY(DS_ERR_OK, 0x0000, "No error.") \ + APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") + +// sphinx-doc: error_code_listing_end + +enum DeepSpeech_Error_Codes +{ +#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, +DS_FOR_EACH_ERROR(DEFINE) +#undef DEFINE +}; + +/** + * @brief An object providing an interface to a trained DeepSpeech model. + * + * @param aModelPath The path to the frozen model graph. + * @param[out] retval a ModelState pointer + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateModel(const char* aModelPath, + ModelState** retval); + +/** + * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + * was not called before, will return the default value loaded from the + * model file. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Beam width value used by the model. + */ +DEEPSPEECH_EXPORT +unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); + +/** + * @brief Set beam width value used by the model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aBeamWidth The beam width used by the model. A larger beam width value + * generates better results at the cost of decoding time. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetModelBeamWidth(ModelState* aCtx, + unsigned int aBeamWidth); + +/** + * @brief Return the sample rate expected by a model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Sample rate expected by the model for its input. + */ +DEEPSPEECH_EXPORT +int DS_GetModelSampleRate(const ModelState* aCtx); + +/** + * @brief Frees associated resources and destroys model object. + */ +DEEPSPEECH_EXPORT +void DS_FreeModel(ModelState* ctx); + +/** + * @brief Enable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aScorerPath The path to the external scorer file. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath); + +/** + * @brief Disable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_DisableExternalScorer(ModelState* aCtx); + +/** + * @brief Set hyperparameters alpha and beta of the external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. + * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta); + +/** + * @brief Use the DeepSpeech model to convert speech to text. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. Returns NULL on error. + */ +DEEPSPEECH_EXPORT +char* DS_SpeechToText(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Use the DeepSpeech model to convert speech to text and output results + * including metadata. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. + * + * @return Metadata struct containing multiple CandidateTranscript structs. Each + * transcript has per-token metadata including timing information. The + * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize, + unsigned int aNumResults); + +/** + * @brief Create a new streaming inference state. The streaming state returned + * by this function can then be passed to {@link DS_FeedAudioContent()} + * and {@link DS_FinishStream()}. + * + * @param aCtx The ModelState pointer for the model to use. + * @param[out] retval an opaque pointer that represents the streaming state. Can + * be NULL if an error occurs. + * + * @return Zero for success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateStream(ModelState* aCtx, + StreamingState** retval); + +/** + * @brief Feed audio samples to an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aBuffer An array of 16-bit, mono raw audio samples at the + * appropriate sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in @p aBuffer. + */ +DEEPSPEECH_EXPORT +void DS_FeedAudioContent(StreamingState* aSctx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT intermediate result. The user is responsible for freeing the + * string using {@link DS_FreeString()}. + */ +DEEPSPEECH_EXPORT +char* DS_IntermediateDecode(const StreamingState* aSctx); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference, + * return results including metadata. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * the result. Signals the end of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +char* DS_FinishStream(StreamingState* aSctx); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * results including metadata. Signals the end of an ongoing streaming + * inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Destroy a streaming state without decoding the computed logits. This + * can be used if you no longer need the result of an ongoing streaming + * inference and don't want to perform a costly decode operation. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +void DS_FreeStream(StreamingState* aSctx); + +/** + * @brief Free memory allocated for metadata information. + */ +DEEPSPEECH_EXPORT +void DS_FreeMetadata(Metadata* m); + +/** + * @brief Free a char* string returned by the DeepSpeech API. + */ +DEEPSPEECH_EXPORT +void DS_FreeString(char* str); + +/** + * @brief Returns the version of this library. The returned version is a semantic + * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + * + * @return The version string. + */ +DEEPSPEECH_EXPORT +char* DS_Version(); + +/** + * @brief Returns a textual description corresponding to an error code. + * The string returned must be freed with @{link DS_FreeString()}. + * + * @return The error description. + */ +DEEPSPEECH_EXPORT +char* DS_ErrorCodeToErrorMessage(int aErrorCode); + +#undef DEEPSPEECH_EXPORT + +#ifdef __cplusplus +} +#endif + +#endif /* DEEPSPEECH_H */ diff --git a/native_client/swift/deepspeech_ios/deepspeech_ios.h b/native_client/swift/deepspeech_ios/deepspeech_ios.h new file mode 100644 index 00000000..a40fb954 --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech_ios.h @@ -0,0 +1,13 @@ +// +// deepspeech_ios.h +// deepspeech_ios +// +// Created by Reuben Morais on 14.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +#import + +// In this header, you should import all the public headers of your framework using statements like #import + + diff --git a/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap new file mode 100644 index 00000000..078ac915 --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap @@ -0,0 +1,12 @@ +framework module deepspeech_ios { + umbrella header "deepspeech_ios.h" + + export * + module * { export * } + + explicit module libdeepspeech_Private { + header "deepspeech.h" + export * + link "deepspeech" + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj new file mode 100644 index 00000000..e9a7d0a2 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -0,0 +1,637 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; }; + 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; }; + 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; + 50F787F32497683900D52237 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F22497683900D52237 /* AppDelegate.swift */; }; + 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F42497683900D52237 /* SceneDelegate.swift */; }; + 50F787F72497683900D52237 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F62497683900D52237 /* ContentView.swift */; }; + 50F787F92497683A00D52237 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787F82497683A00D52237 /* Assets.xcassets */; }; + 50F787FC2497683A00D52237 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FB2497683A00D52237 /* Preview Assets.xcassets */; }; + 50F787FF2497683A00D52237 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */; }; + 50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */; }; + 50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 50F788062497683A00D52237 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 50F787E72497683900D52237 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 50F787EE2497683900D52237; + remoteInfo = deepspeech_ios_test; + }; + 50F788112497683A00D52237 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 50F787E72497683900D52237 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 50F787EE2497683900D52237; + remoteInfo = deepspeech_ios_test; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 50F2B10E2498EB59007CD876 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = ../libdeepspeech.so; sourceTree = ""; }; + 50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; + 50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; + 50F787F62497683900D52237 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; + 50F787F82497683A00D52237 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 50F787FB2497683A00D52237 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 50F787FE2497683A00D52237 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 50F788002497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testTests.swift; sourceTree = ""; }; + 50F7880B2497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testUITests.swift; sourceTree = ""; }; + 50F788162497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 50F787EC2497683900D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */, + 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788022497683A00D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880D2497683A00D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 50F2B0FC2498D6C7007CD876 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */, + 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 50F787E62497683900D52237 = { + isa = PBXGroup; + children = ( + 50F787F12497683900D52237 /* deepspeech_ios_test */, + 50F788082497683A00D52237 /* deepspeech_ios_testTests */, + 50F788132497683A00D52237 /* deepspeech_ios_testUITests */, + 50F787F02497683900D52237 /* Products */, + 50F2B0FC2498D6C7007CD876 /* Frameworks */, + ); + sourceTree = ""; + }; + 50F787F02497683900D52237 /* Products */ = { + isa = PBXGroup; + children = ( + 50F787EF2497683900D52237 /* deepspeech_ios_test.app */, + 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */, + 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 50F787F12497683900D52237 /* deepspeech_ios_test */ = { + isa = PBXGroup; + children = ( + 50F787F22497683900D52237 /* AppDelegate.swift */, + 50F787F42497683900D52237 /* SceneDelegate.swift */, + 50F787F62497683900D52237 /* ContentView.swift */, + 50F787F82497683A00D52237 /* Assets.xcassets */, + 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */, + 50F788002497683A00D52237 /* Info.plist */, + 50F787FA2497683A00D52237 /* Preview Content */, + ); + path = deepspeech_ios_test; + sourceTree = ""; + }; + 50F787FA2497683A00D52237 /* Preview Content */ = { + isa = PBXGroup; + children = ( + 50F787FB2497683A00D52237 /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 50F788082497683A00D52237 /* deepspeech_ios_testTests */ = { + isa = PBXGroup; + children = ( + 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */, + 50F7880B2497683A00D52237 /* Info.plist */, + ); + path = deepspeech_ios_testTests; + sourceTree = ""; + }; + 50F788132497683A00D52237 /* deepspeech_ios_testUITests */ = { + isa = PBXGroup; + children = ( + 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */, + 50F788162497683A00D52237 /* Info.plist */, + ); + path = deepspeech_ios_testUITests; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 50F787EE2497683900D52237 /* deepspeech_ios_test */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */; + buildPhases = ( + 50F787EB2497683900D52237 /* Sources */, + 50F787EC2497683900D52237 /* Frameworks */, + 50F787ED2497683900D52237 /* Resources */, + 50F2B10E2498EB59007CD876 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = deepspeech_ios_test; + productName = deepspeech_ios_test; + productReference = 50F787EF2497683900D52237 /* deepspeech_ios_test.app */; + productType = "com.apple.product-type.application"; + }; + 50F788042497683A00D52237 /* deepspeech_ios_testTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */; + buildPhases = ( + 50F788012497683A00D52237 /* Sources */, + 50F788022497683A00D52237 /* Frameworks */, + 50F788032497683A00D52237 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 50F788072497683A00D52237 /* PBXTargetDependency */, + ); + name = deepspeech_ios_testTests; + productName = deepspeech_ios_testTests; + productReference = 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; + 50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */; + buildPhases = ( + 50F7880C2497683A00D52237 /* Sources */, + 50F7880D2497683A00D52237 /* Frameworks */, + 50F7880E2497683A00D52237 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 50F788122497683A00D52237 /* PBXTargetDependency */, + ); + name = deepspeech_ios_testUITests; + productName = deepspeech_ios_testUITests; + productReference = 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */; + productType = "com.apple.product-type.bundle.ui-testing"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 50F787E72497683900D52237 /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1150; + LastUpgradeCheck = 1150; + ORGANIZATIONNAME = Mozilla; + TargetAttributes = { + 50F787EE2497683900D52237 = { + CreatedOnToolsVersion = 11.5; + }; + 50F788042497683A00D52237 = { + CreatedOnToolsVersion = 11.5; + TestTargetID = 50F787EE2497683900D52237; + }; + 50F7880F2497683A00D52237 = { + CreatedOnToolsVersion = 11.5; + TestTargetID = 50F787EE2497683900D52237; + }; + }; + }; + buildConfigurationList = 50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 50F787E62497683900D52237; + productRefGroup = 50F787F02497683900D52237 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 50F787EE2497683900D52237 /* deepspeech_ios_test */, + 50F788042497683A00D52237 /* deepspeech_ios_testTests */, + 50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 50F787ED2497683900D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F787FF2497683A00D52237 /* LaunchScreen.storyboard in Resources */, + 50F787FC2497683A00D52237 /* Preview Assets.xcassets in Resources */, + 50F787F92497683A00D52237 /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788032497683A00D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880E2497683A00D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 50F787EB2497683900D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F787F32497683900D52237 /* AppDelegate.swift in Sources */, + 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */, + 50F787F72497683900D52237 /* ContentView.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788012497683A00D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880C2497683A00D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 50F788072497683A00D52237 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 50F787EE2497683900D52237 /* deepspeech_ios_test */; + targetProxy = 50F788062497683A00D52237 /* PBXContainerItemProxy */; + }; + 50F788122497683A00D52237 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 50F787EE2497683900D52237 /* deepspeech_ios_test */; + targetProxy = 50F788112497683A00D52237 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin PBXVariantGroup section */ + 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 50F787FE2497683A00D52237 /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 50F788172497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 50F788182497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 50F7881A2497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\""; + DEVELOPMENT_TEAM = AWCG9S27P7; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = deepspeech_ios_test/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 50F7881B2497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\""; + DEVELOPMENT_TEAM = AWCG9S27P7; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = deepspeech_ios_test/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; + 50F7881D2497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test"; + }; + name = Debug; + }; + 50F7881E2497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test"; + }; + name = Release; + }; + 50F788202497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = deepspeech_ios_test; + }; + name = Debug; + }; + 50F788212497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = deepspeech_ios_test; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F788172497683A00D52237 /* Debug */, + 50F788182497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F7881A2497683A00D52237 /* Debug */, + 50F7881B2497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F7881D2497683A00D52237 /* Debug */, + 50F7881E2497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F788202497683A00D52237 /* Debug */, + 50F788212497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 50F787E72497683900D52237 /* Project object */; +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..bb9f69fc --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme new file mode 100644 index 00000000..c6adb9bb --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift new file mode 100644 index 00000000..b589df39 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift @@ -0,0 +1,255 @@ +// +// AppDelegate.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import UIKit +import Foundation +import AVFoundation +import AudioToolbox +import Accelerate + +import deepspeech_ios + +/// Holds audio information used for building waveforms +final class AudioContext { + + /// The audio asset URL used to load the context + public let audioURL: URL + + /// Total number of samples in loaded asset + public let totalSamples: Int + + /// Loaded asset + public let asset: AVAsset + + // Loaded assetTrack + public let assetTrack: AVAssetTrack + + private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) { + self.audioURL = audioURL + self.totalSamples = totalSamples + self.asset = asset + self.assetTrack = assetTrack + } + + public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) { + let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)]) + + guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else { + fatalError("Couldn't load AVAssetTrack") + } + + asset.loadValuesAsynchronously(forKeys: ["duration"]) { + var error: NSError? + let status = asset.statusOfValue(forKey: "duration", error: &error) + switch status { + case .loaded: + guard + let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription], + let audioFormatDesc = formatDescriptions.first, + let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc) + else { break } + + let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale)) + let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack) + completionHandler(audioContext) + return + + case .failed, .cancelled, .loading, .unknown: + print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")") + } + + completionHandler(nil) + } + } +} + +func render(audioContext: AudioContext?, stream: DeepSpeechStream) { + guard let audioContext = audioContext else { + fatalError("Couldn't create the audioContext") + } + + let sampleRange: CountableRange = 0..? + CMBlockBufferGetDataPointer(readBuffer, + atOffset: 0, + lengthAtOffsetOut: &readBufferLength, + totalLengthOut: nil, + dataPointerOut: &readBufferPointer) + sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength)) + CMSampleBufferInvalidate(readSampleBuffer) + + let totalSamples = sampleBuffer.count / MemoryLayout.size + print("read \(totalSamples) samples") + + sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in + let unsafeBufferPointer = samples.bindMemory(to: Int16.self) + stream.feedAudioContent(buffer: unsafeBufferPointer) + } + + sampleBuffer.removeAll() + } + + // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown) + guard reader.status == .completed else { + fatalError("Couldn't read the audio file") + } +} + +func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> ()) { + let url = URL(fileURLWithPath: audioPath) + + //var format = AudioStreamBasicDescription.init() + //format.mSampleRate = 16000; + //format.mFormatID = kAudioFormatLinearPCM; + //format.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked; + //format.mBitsPerChannel = 16; + //format.mChannelsPerFrame = 1; + //format.mBytesPerFrame = format.mChannelsPerFrame * format.mBitsPerChannel / 8; + //format.mFramesPerPacket = 1; + //format.mBytesPerPacket = format.mFramesPerPacket * format.mBytesPerFrame; + // + //var file = Optional.init(nilLiteral: ()); + //let status = ExtAudioFileCreateWithURL(url as CFURL, + // kAudioFileWAVEType, + // &format, + // nil, + // 0, + // &file) + //print("status: \(status)") + //let status2 = ExtAudioFileSetProperty(file!, + // kExtAudioFileProperty_ClientDataFormat, + // UInt32(MemoryLayout.size), + // &format) + //print("status: \(status2)") + // + //ExtAudioFileRead(file, <#T##ioNumberFrames: UnsafeMutablePointer##UnsafeMutablePointer#>, <#T##ioData: UnsafeMutablePointer##UnsafeMutablePointer#>) + + let stream = try! model.createStream() + print("\(audioPath)") + let start = CFAbsoluteTimeGetCurrent() + AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in + guard let audioContext = audioContext else { + fatalError("Couldn't create the audioContext") + } + render(audioContext: audioContext, stream: stream) + let result = stream.finishStream() + let end = CFAbsoluteTimeGetCurrent() + print("\"\(audioPath)\": \(end - start) - \(result)") + completion() + }) + + //let file = try! AVAudioFile(forReading: url) + //print("file length \(file.length)") + //let format = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: false)! + //let stream = createStream(modelState: modelState) + //while file.framePosition < file.length { + // let pcmBuf = AVAudioPCMBuffer.init(pcmFormat: format, frameCapacity: 8 * 1024)! // arbitrary frameCapacity + // try! file.read(into: pcmBuf) + // if pcmBuf.frameLength == 0 { + // break + // } + // print("read \(pcmBuf.frameLength) frames into buffer") + // let rawPtr = pcmBuf.audioBufferList.pointee.mBuffers.mData! + // let ptr = rawPtr.bindMemory(to: Int16.self, capacity: Int(pcmBuf.frameLength)) + // print("first few samples: \(ptr[0]) \(ptr[1]) \(ptr[2]) \(ptr[3]) ") + // DS_FeedAudioContent(stream, ptr, UInt32(pcmBuf.frameLength)) + //} + //let result = DS_FinishStream(stream) + //return String.init(cString: result!) +} + +@UIApplicationMain +class AppDelegate: UIResponder, UIApplicationDelegate { + func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { + let model = try! DeepSpeechModel(modelPath: Bundle.main.path(forResource: "output_graph", ofType: "tflite")!) + try! model.enableExternalScorer(scorerPath: Bundle.main.path(forResource: "librispeech_en_utf8_nonpruned_o6", ofType: "scorer")!) + + let files = [ + "5639-40744-0008", + "1089-134686-0019", + "2094-142345-0053", + "8463-294825-0010", + "121-123852-0001", + "7021-79740-0008", + "6930-76324-0010", + "5105-28240-0001", + "1089-134691-0012", + "5142-33396-0027", + "260-123288-0004", + "6930-75918-0008", + "8463-294828-0005", + "61-70970-0002" + ] + + let serialQueue = DispatchQueue(label: "serialQueue") + let group = DispatchGroup() + group.enter() + serialQueue.async { + test(model: model, audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) { + group.leave() + } + } + for path in files { + group.wait() + group.enter() + test(model: model, audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) { + group.leave() + } + } + return true + } + + // MARK: UISceneSession Lifecycle + + func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration { + // Called when a new scene session is being created. + // Use this method to select a configuration to create the new scene with. + return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role) + } + + func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set) { + // Called when the user discards a scene session. + // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. + // Use this method to release any resources that were specific to the discarded scenes, as they will not return. + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000..9221b9bb --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,98 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "60x60" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "60x60" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "83.5x83.5" + }, + { + "idiom" : "ios-marketing", + "scale" : "1x", + "size" : "1024x1024" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 00000000..865e9329 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift new file mode 100644 index 00000000..5f7442f9 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift @@ -0,0 +1,21 @@ +// +// ContentView.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import SwiftUI + +struct ContentView: View { + var body: some View { + Text("Hello, World!") + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist new file mode 100644 index 00000000..9742bf0f --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist @@ -0,0 +1,60 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UIApplicationSceneManifest + + UIApplicationSupportsMultipleScenes + + UISceneConfigurations + + UIWindowSceneSessionRoleApplication + + + UISceneConfigurationName + Default Configuration + UISceneDelegateClassName + $(PRODUCT_MODULE_NAME).SceneDelegate + + + + + UILaunchStoryboardName + LaunchScreen + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift new file mode 100644 index 00000000..40d85e4a --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift @@ -0,0 +1,64 @@ +// +// SceneDelegate.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import UIKit +import SwiftUI + +class SceneDelegate: UIResponder, UIWindowSceneDelegate { + + var window: UIWindow? + + + func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) { + // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`. + // If using a storyboard, the `window` property will automatically be initialized and attached to the scene. + // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead). + + // Create the SwiftUI view that provides the window contents. + let contentView = ContentView() + + // Use a UIHostingController as window root view controller. + if let windowScene = scene as? UIWindowScene { + let window = UIWindow(windowScene: windowScene) + window.rootViewController = UIHostingController(rootView: contentView) + self.window = window + window.makeKeyAndVisible() + } + } + + func sceneDidDisconnect(_ scene: UIScene) { + // Called as the scene is being released by the system. + // This occurs shortly after the scene enters the background, or when its session is discarded. + // Release any resources associated with this scene that can be re-created the next time the scene connects. + // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead). + } + + func sceneDidBecomeActive(_ scene: UIScene) { + // Called when the scene has moved from an inactive state to an active state. + // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive. + } + + func sceneWillResignActive(_ scene: UIScene) { + // Called when the scene will move from an active state to an inactive state. + // This may occur due to temporary interruptions (ex. an incoming phone call). + } + + func sceneWillEnterForeground(_ scene: UIScene) { + // Called as the scene transitions from the background to the foreground. + // Use this method to undo the changes made on entering the background. + } + + func sceneDidEnterBackground(_ scene: UIScene) { + // Called as the scene transitions from the foreground to the background. + // Use this method to save data, release shared resources, and store enough scene-specific state information + // to restore the scene back to its current state. + } + + +} + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist new file mode 100644 index 00000000..64d65ca4 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift new file mode 100644 index 00000000..0e5b449d --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift @@ -0,0 +1,34 @@ +// +// deepspeech_ios_testTests.swift +// deepspeech_ios_testTests +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import XCTest +@testable import deepspeech_ios_test + +class deepspeech_ios_testTests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // This is an example of a functional test case. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testPerformanceExample() throws { + // This is an example of a performance test case. + self.measure { + // Put the code you want to measure the time of here. + } + } + +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist new file mode 100644 index 00000000..64d65ca4 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift new file mode 100644 index 00000000..493a6b8d --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift @@ -0,0 +1,43 @@ +// +// deepspeech_ios_testUITests.swift +// deepspeech_ios_testUITests +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import XCTest + +class deepspeech_ios_testUITests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + + // In UI tests it is usually best to stop immediately when a failure occurs. + continueAfterFailure = false + + // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // UI tests must launch the application that they test. + let app = XCUIApplication() + app.launch() + + // Use recording to get started writing UI tests. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testLaunchPerformance() throws { + if #available(macOS 10.15, iOS 13.0, tvOS 13.0, *) { + // This measures how long it takes to launch your application. + measure(metrics: [XCTOSSignpostMetric.applicationLaunch]) { + XCUIApplication().launch() + } + } + } +} From f7c50663e1094631b1b6931aba5cb19cd695ac71 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 14 Jul 2020 16:05:00 +0200 Subject: [PATCH 09/33] Checkout fixed formulas commit in tf_tc-brew.sh --- taskcluster/darwin-opt-base.tyml | 7 +-- taskcluster/tf_tc-brew.sh | 76 ++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/taskcluster/darwin-opt-base.tyml b/taskcluster/darwin-opt-base.tyml index ea7ba95d..7503fd52 100644 --- a/taskcluster/darwin-opt-base.tyml +++ b/taskcluster/darwin-opt-base.tyml @@ -35,11 +35,8 @@ payload: # There is no VM yet running tasks on OSX # so one should install by hand: # - brew - # - xcode (brew would install) - # - brew install gnu-tar - # - brew install git - # - brew install pixz - # - brew cask install java + # - Xcode 10.1 in /Applications/Xcode.app, then sudo chown -R root:wheel /Applications/Xcode.app + # - brew install gnu-tar git pixz wget coreutils pyenv-virtualenv # - sudo easy_install pip command: diff --git a/taskcluster/tf_tc-brew.sh b/taskcluster/tf_tc-brew.sh index 297b2bba..8d4128a2 100755 --- a/taskcluster/tf_tc-brew.sh +++ b/taskcluster/tf_tc-brew.sh @@ -2,43 +2,55 @@ set -ex -if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then - echo "No TASKCLUSTER_TASK_DIR, aborting." - exit 1 -fi +# if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then +# echo "No TASKCLUSTER_TASK_DIR, aborting." +# exit 1 +# fi -LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" -export PATH=${LOCAL_BREW}/bin:$PATH -export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" -export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" +# LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" +# export PATH=${LOCAL_BREW}/bin:$PATH +# export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" +# export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" +# export HOMEBREW_FORMULAS_COMMIT=93fe256e0168db3b1c70c26a01941be59ce76311 +# export HOMEBREW_NO_AUTO_UPDATE=1 -# Never fail on pre-existing homebrew/ directory -mkdir -p "${LOCAL_BREW}" || true -mkdir -p "${HOMEBREW_CACHE}" || true +# # Never fail on pre-existing homebrew/ directory +# mkdir -p "${LOCAL_BREW}" || true +# mkdir -p "${HOMEBREW_CACHE}" || true -# Make sure to verify there is a 'brew' binary there, otherwise install things. -if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then - curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" -fi; +# # Make sure to verify there is a 'brew' binary there, otherwise install things. +# if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then +# curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" +# fi; -echo "local brew list (should be empty) ..." -brew list +# echo "local brew list (should be empty) ..." +# brew list -echo "local brew prefix ..." -local_prefix=$(brew --prefix) -echo "${local_prefix}" +# echo "local brew prefix ..." +# local_prefix=$(brew --prefix) +# echo "${local_prefix}" -if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then - echo "Weird state:" - echo "LOCAL_BREW=${LOCAL_BREW}" - echo "local_prefix=${local_prefix}" - exit 1 -fi; +# if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then +# echo "Weird state:" +# echo "LOCAL_BREW=${LOCAL_BREW}" +# echo "local_prefix=${local_prefix}" +# exit 1 +# fi; -# coreutils, pyenv-virtualenv required for build of tensorflow -all_pkgs="coreutils pyenv-virtualenv" -for pkg in ${all_pkgs}; -do - (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} -done; +# # Then we force onto a specific well-known commit +# mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" +# pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" +# git init +# git remote add origin https://github.com/Homebrew/homebrew-core.git +# git fetch origin +# git checkout ${HOMEBREW_FORMULAS_COMMIT} +# popd + +# # coreutils, pyenv-virtualenv required for build of tensorflow +# all_pkgs="coreutils pyenv-virtualenv" + +# for pkg in ${all_pkgs}; +# do +# (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} +# done; From e8d642bf44c9285f33efc0cba05e28d45e0cbad4 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 14 Jul 2020 22:33:48 +0200 Subject: [PATCH 10/33] Bump TensorFlow to remove usage of -z linker keyword on iOS --- taskcluster/.shared.yml | 44 ++++++++++++++++++++--------------------- tensorflow | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 9323c995..01d00a2a 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -142,38 +142,38 @@ system: namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" tensorflow: linux_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu" linux_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda" linux_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm" linux_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx" android_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" android_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/tensorflow b/tensorflow index fc464111..0854bb51 160000 --- a/tensorflow +++ b/tensorflow @@ -1 +1 @@ -Subproject commit fc464111ac5c49791e44122e5946e521b25840bd +Subproject commit 0854bb5188a3150a4d75a1c71ee610b0d45cfcb1 From 6c38d569685a8209295ccc021607e408b60c0d05 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jul 2020 10:10:42 +0200 Subject: [PATCH 11/33] Use submodule TF tc-vars.sh --- taskcluster/ios-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index 1b913130..bb885d36 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -4,7 +4,7 @@ set -xe source $(dirname "$0")/tc-tests-utils.sh -source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh +source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so From 2f568e77858c8bea93c67362b0f16c458bf481f7 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jul 2020 14:38:56 +0200 Subject: [PATCH 12/33] Don't use BAZEL_OPT_FLAGS in iOS builds --- taskcluster/.shared.yml | 8 ++++---- taskcluster/ios-build.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 01d00a2a..c64a5bc9 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -169,11 +169,11 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index bb885d36..bc8860e6 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -10,7 +10,7 @@ BAZEL_TARGETS=" //native_client:libdeepspeech.so " -BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" +BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" From aa8e9b064736c3c483391ccc62eacf61eb8bd472 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 01:46:19 +0200 Subject: [PATCH 13/33] Use correct build flags for ARM64 vs x86_64 --- taskcluster/ios-arm64-tflite-opt.yml | 2 +- taskcluster/ios-build.sh | 8 +++++++- taskcluster/ios-x86_64-tflite-opt.yml | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/taskcluster/ios-arm64-tflite-opt.yml b/taskcluster/ios-arm64-tflite-opt.yml index dab0d24b..cd85ca7f 100644 --- a/taskcluster/ios-arm64-tflite-opt.yml +++ b/taskcluster/ios-arm64-tflite-opt.yml @@ -12,7 +12,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.ios_arm64-tflite.${event.head.sha}" tensorflow: ${system.tensorflow.ios_arm64.url} scripts: - build: "taskcluster/ios-build.sh" + build: "taskcluster/ios-build.sh --arm64" package: "taskcluster/package.sh" nc_asset_name: "native_client.arm64.tflite.ios.tar.xz" maxRunTime: 14400 diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index bc8860e6..aa3e8ec6 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -2,6 +2,8 @@ set -xe +platform=$1 + source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh @@ -10,7 +12,11 @@ BAZEL_TARGETS=" //native_client:libdeepspeech.so " -BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_EXTRA_FLAGS}" +if [ "${platform}" = "--arm64" ]; then + BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +else + BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +fi BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" diff --git a/taskcluster/ios-x86_64-tflite-opt.yml b/taskcluster/ios-x86_64-tflite-opt.yml index 144f0bd5..b55cebe7 100644 --- a/taskcluster/ios-x86_64-tflite-opt.yml +++ b/taskcluster/ios-x86_64-tflite-opt.yml @@ -12,7 +12,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.ios_x86_64-tflite.${event.head.sha}" tensorflow: ${system.tensorflow.ios_x86_64.url} scripts: - build: "taskcluster/ios-build.sh" + build: "taskcluster/ios-build.sh --x86_64" package: "taskcluster/package.sh" nc_asset_name: "native_client.x86_64.tflite.ios.tar.xz" maxRunTime: 14400 From f0f4b0ddc17317c4f809898265eb6f422dfc328f Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 09:20:50 +0200 Subject: [PATCH 14/33] Remove even more bazel flags --- taskcluster/.shared.yml | 8 ++++---- taskcluster/ios-build.sh | 4 ++-- taskcluster/tf_tc-build.sh | 4 ++-- taskcluster/tf_tc-vars.sh | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index c64a5bc9..b8eef38b 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -169,11 +169,11 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index aa3e8ec6..ed99cb60 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -13,9 +13,9 @@ BAZEL_TARGETS=" " if [ "${platform}" = "--arm64" ]; then - BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" + BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS}" else - BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}" + BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS}" fi BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index cb620906..ed3c306e 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -91,11 +91,11 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ fi; if [ "${build_ios_arm64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; if [ "${build_ios_x86_64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; if [ $? -ne 0 ]; then diff --git a/taskcluster/tf_tc-vars.sh b/taskcluster/tf_tc-vars.sh index 8c30ea2a..b768ad4b 100755 --- a/taskcluster/tf_tc-vars.sh +++ b/taskcluster/tf_tc-vars.sh @@ -171,8 +171,8 @@ BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_W BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_CUDA_FLAGS="--config=cuda" -BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then # Somehow, even with Python being in the PATH, Bazel on windows struggles From be43b3fdc1ba0636e9a716a796d2ebb117485957 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 22:22:03 +0200 Subject: [PATCH 15/33] Bump caches for artifacts rebuilt on new worker --- taskcluster/.shared.yml | 4 ++-- taskcluster/tc-build-utils.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index b8eef38b..d80ef2f7 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -154,8 +154,8 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx" android_arm64: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index f4042fbd..75645582 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -33,6 +33,8 @@ do_deepspeech_python_build() virtualenv_activate "${pyalias}" "deepspeech" python --version + pip --version + pip3 --version which pip which pip3 From de7a249fcd9c7e5f3eebdee4357626d8098569d9 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 19 Jul 2020 15:04:04 +0200 Subject: [PATCH 16/33] Fix linker issues during tests with new workers --- taskcluster/.build.yml | 2 +- taskcluster/homebrew-build.sh | 9 +++++---- taskcluster/homebrew_builds-darwin-amd64.yml | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index 76fc9703..61fadf17 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -25,7 +25,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v2.2.0-16-gfc46411' + tensorflow_git_desc: 'TensorFlow: v2.2.0-17-g0854bb5' test_model_task: '' homebrew: url: '' diff --git a/taskcluster/homebrew-build.sh b/taskcluster/homebrew-build.sh index 6cdda80a..4ddb9ad8 100755 --- a/taskcluster/homebrew-build.sh +++ b/taskcluster/homebrew-build.sh @@ -42,12 +42,13 @@ do_prepare_homebrew() check_homebrew "${_brew_instance}" - # Force an upgrade to fetch formulae - brew search openssl - # Then we force onto a specific well-known commit + mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" - git fetch origin && git checkout ${BREW_FORMULAS_COMMIT} + git init + git remote add origin https://github.com/Homebrew/homebrew-core.git + git fetch origin + git checkout ${BREW_FORMULAS_COMMIT} popd } diff --git a/taskcluster/homebrew_builds-darwin-amd64.yml b/taskcluster/homebrew_builds-darwin-amd64.yml index d2fe03f5..f5d60902 100644 --- a/taskcluster/homebrew_builds-darwin-amd64.yml +++ b/taskcluster/homebrew_builds-darwin-amd64.yml @@ -9,4 +9,4 @@ build: package: "taskcluster/homebrew-package.sh --builds" metadata: name: "Builds Homebrew macOS AMD64" - description: "Setup a buildsl Homebrew for macOS/AMD64" + description: "Setup a builds Homebrew for macOS/AMD64" From 5e5db17371f167c57ad8255e8324629acf1dea7d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:16:57 +0200 Subject: [PATCH 17/33] Address review comments --- .../swift/deepspeech_ios/deepspeech.h | 357 ------------------ .../deepspeech_ios_test/AppDelegate.swift | 45 --- taskcluster/.shared.yml | 2 - taskcluster/tf_darwin-amd64-opt.yml | 3 - taskcluster/tf_ios-arm64-opt.yml | 3 - taskcluster/tf_ios-x86_64-opt.yml | 3 - taskcluster/tf_tc-brew.sh | 56 --- 7 files changed, 469 deletions(-) delete mode 100644 native_client/swift/deepspeech_ios/deepspeech.h delete mode 100755 taskcluster/tf_tc-brew.sh diff --git a/native_client/swift/deepspeech_ios/deepspeech.h b/native_client/swift/deepspeech_ios/deepspeech.h deleted file mode 100644 index 1df3cf2e..00000000 --- a/native_client/swift/deepspeech_ios/deepspeech.h +++ /dev/null @@ -1,357 +0,0 @@ -#ifndef DEEPSPEECH_H -#define DEEPSPEECH_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef SWIG - #if defined _MSC_VER - #define DEEPSPEECH_EXPORT __declspec(dllexport) - #else - #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) - #endif /*End of _MSC_VER*/ -#else - #define DEEPSPEECH_EXPORT -#endif - -typedef struct ModelState ModelState; - -typedef struct StreamingState StreamingState; - -/** - * @brief Stores text of an individual token, along with its timing information - */ -typedef struct TokenMetadata { - /** The text corresponding to this token */ - const char* const text; - - /** Position of the token in units of 20ms */ - const unsigned int timestep; - - /** Position of the token in seconds */ - const float start_time; -} TokenMetadata; - -/** - * @brief A single transcript computed by the model, including a confidence - * value and the metadata for its constituent tokens. - */ -typedef struct CandidateTranscript { - /** Array of TokenMetadata objects */ - const TokenMetadata* const tokens; - /** Size of the tokens array */ - const unsigned int num_tokens; - /** Approximated confidence value for this transcript. This is roughly the - * sum of the acoustic model logit values for each timestep/character that - * contributed to the creation of this transcript. - */ - const double confidence; -} CandidateTranscript; - -/** - * @brief An array of CandidateTranscript objects computed by the model. - */ -typedef struct Metadata { - /** Array of CandidateTranscript objects */ - const CandidateTranscript* const transcripts; - /** Size of the transcripts array */ - const unsigned int num_transcripts; -} Metadata; - -// sphinx-doc: error_code_listing_start - -#define DS_FOR_EACH_ERROR(APPLY) \ - APPLY(DS_ERR_OK, 0x0000, "No error.") \ - APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ - APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ - APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ - APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ - APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ - APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ - APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ - APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ - APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ - APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ - APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ - APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ - APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ - APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ - APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ - APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ - APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ - APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ - APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") - -// sphinx-doc: error_code_listing_end - -enum DeepSpeech_Error_Codes -{ -#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, -DS_FOR_EACH_ERROR(DEFINE) -#undef DEFINE -}; - -/** - * @brief An object providing an interface to a trained DeepSpeech model. - * - * @param aModelPath The path to the frozen model graph. - * @param[out] retval a ModelState pointer - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_CreateModel(const char* aModelPath, - ModelState** retval); - -/** - * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} - * was not called before, will return the default value loaded from the - * model file. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * - * @return Beam width value used by the model. - */ -DEEPSPEECH_EXPORT -unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); - -/** - * @brief Set beam width value used by the model. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * @param aBeamWidth The beam width used by the model. A larger beam width value - * generates better results at the cost of decoding time. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_SetModelBeamWidth(ModelState* aCtx, - unsigned int aBeamWidth); - -/** - * @brief Return the sample rate expected by a model. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * - * @return Sample rate expected by the model for its input. - */ -DEEPSPEECH_EXPORT -int DS_GetModelSampleRate(const ModelState* aCtx); - -/** - * @brief Frees associated resources and destroys model object. - */ -DEEPSPEECH_EXPORT -void DS_FreeModel(ModelState* ctx); - -/** - * @brief Enable decoding using an external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * @param aScorerPath The path to the external scorer file. - * - * @return Zero on success, non-zero on failure (invalid arguments). - */ -DEEPSPEECH_EXPORT -int DS_EnableExternalScorer(ModelState* aCtx, - const char* aScorerPath); - -/** - * @brief Disable decoding using an external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_DisableExternalScorer(ModelState* aCtx); - -/** - * @brief Set hyperparameters alpha and beta of the external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. - * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_SetScorerAlphaBeta(ModelState* aCtx, - float aAlpha, - float aBeta); - -/** - * @brief Use the DeepSpeech model to convert speech to text. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * - * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. Returns NULL on error. - */ -DEEPSPEECH_EXPORT -char* DS_SpeechToText(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize); - -/** - * @brief Use the DeepSpeech model to convert speech to text and output results - * including metadata. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. - * - * @return Metadata struct containing multiple CandidateTranscript structs. Each - * transcript has per-token metadata including timing information. The - * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - */ -DEEPSPEECH_EXPORT -Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize, - unsigned int aNumResults); - -/** - * @brief Create a new streaming inference state. The streaming state returned - * by this function can then be passed to {@link DS_FeedAudioContent()} - * and {@link DS_FinishStream()}. - * - * @param aCtx The ModelState pointer for the model to use. - * @param[out] retval an opaque pointer that represents the streaming state. Can - * be NULL if an error occurs. - * - * @return Zero for success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_CreateStream(ModelState* aCtx, - StreamingState** retval); - -/** - * @brief Feed audio samples to an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aBuffer An array of 16-bit, mono raw audio samples at the - * appropriate sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in @p aBuffer. - */ -DEEPSPEECH_EXPORT -void DS_FeedAudioContent(StreamingState* aSctx, - const short* aBuffer, - unsigned int aBufferSize); - -/** - * @brief Compute the intermediate decoding of an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @return The STT intermediate result. The user is responsible for freeing the - * string using {@link DS_FreeString()}. - */ -DEEPSPEECH_EXPORT -char* DS_IntermediateDecode(const StreamingState* aSctx); - -/** - * @brief Compute the intermediate decoding of an ongoing streaming inference, - * return results including metadata. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aNumResults The number of candidate transcripts to return. - * - * @return Metadata struct containing multiple candidate transcripts. Each transcript - * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - */ -DEEPSPEECH_EXPORT -Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, - unsigned int aNumResults); - -/** - * @brief Compute the final decoding of an ongoing streaming inference and return - * the result. Signals the end of an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -char* DS_FinishStream(StreamingState* aSctx); - -/** - * @brief Compute the final decoding of an ongoing streaming inference and return - * results including metadata. Signals the end of an ongoing streaming - * inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aNumResults The number of candidate transcripts to return. - * - * @return Metadata struct containing multiple candidate transcripts. Each transcript - * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, - unsigned int aNumResults); - -/** - * @brief Destroy a streaming state without decoding the computed logits. This - * can be used if you no longer need the result of an ongoing streaming - * inference and don't want to perform a costly decode operation. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -void DS_FreeStream(StreamingState* aSctx); - -/** - * @brief Free memory allocated for metadata information. - */ -DEEPSPEECH_EXPORT -void DS_FreeMetadata(Metadata* m); - -/** - * @brief Free a char* string returned by the DeepSpeech API. - */ -DEEPSPEECH_EXPORT -void DS_FreeString(char* str); - -/** - * @brief Returns the version of this library. The returned version is a semantic - * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. - * - * @return The version string. - */ -DEEPSPEECH_EXPORT -char* DS_Version(); - -/** - * @brief Returns a textual description corresponding to an error code. - * The string returned must be freed with @{link DS_FreeString()}. - * - * @return The error description. - */ -DEEPSPEECH_EXPORT -char* DS_ErrorCodeToErrorMessage(int aErrorCode); - -#undef DEEPSPEECH_EXPORT - -#ifdef __cplusplus -} -#endif - -#endif /* DEEPSPEECH_H */ diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift index b589df39..a2dcb427 100644 --- a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift @@ -138,32 +138,6 @@ func render(audioContext: AudioContext?, stream: DeepSpeechStream) { func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> ()) { let url = URL(fileURLWithPath: audioPath) - //var format = AudioStreamBasicDescription.init() - //format.mSampleRate = 16000; - //format.mFormatID = kAudioFormatLinearPCM; - //format.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked; - //format.mBitsPerChannel = 16; - //format.mChannelsPerFrame = 1; - //format.mBytesPerFrame = format.mChannelsPerFrame * format.mBitsPerChannel / 8; - //format.mFramesPerPacket = 1; - //format.mBytesPerPacket = format.mFramesPerPacket * format.mBytesPerFrame; - // - //var file = Optional.init(nilLiteral: ()); - //let status = ExtAudioFileCreateWithURL(url as CFURL, - // kAudioFileWAVEType, - // &format, - // nil, - // 0, - // &file) - //print("status: \(status)") - //let status2 = ExtAudioFileSetProperty(file!, - // kExtAudioFileProperty_ClientDataFormat, - // UInt32(MemoryLayout.size), - // &format) - //print("status: \(status2)") - // - //ExtAudioFileRead(file, <#T##ioNumberFrames: UnsafeMutablePointer##UnsafeMutablePointer#>, <#T##ioData: UnsafeMutablePointer##UnsafeMutablePointer#>) - let stream = try! model.createStream() print("\(audioPath)") let start = CFAbsoluteTimeGetCurrent() @@ -177,25 +151,6 @@ func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> print("\"\(audioPath)\": \(end - start) - \(result)") completion() }) - - //let file = try! AVAudioFile(forReading: url) - //print("file length \(file.length)") - //let format = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: false)! - //let stream = createStream(modelState: modelState) - //while file.framePosition < file.length { - // let pcmBuf = AVAudioPCMBuffer.init(pcmFormat: format, frameCapacity: 8 * 1024)! // arbitrary frameCapacity - // try! file.read(into: pcmBuf) - // if pcmBuf.frameLength == 0 { - // break - // } - // print("read \(pcmBuf.frameLength) frames into buffer") - // let rawPtr = pcmBuf.audioBufferList.pointee.mBuffers.mData! - // let ptr = rawPtr.bindMemory(to: Int16.self, capacity: Int(pcmBuf.frameLength)) - // print("first few samples: \(ptr[0]) \(ptr[1]) \(ptr[2]) \(ptr[3]) ") - // DS_FeedAudioContent(stream, ptr, UInt32(pcmBuf.frameLength)) - //} - //let result = DS_FinishStream(stream) - //return String.init(cString: result!) } @UIApplicationMain diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index d80ef2f7..03bdd3fd 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -14,8 +14,6 @@ deepspeech: tensorflow: packages_xenial: apt: 'apt-get -qq update && apt-get -qq -y install realpath build-essential python-virtualenv python-dev python-pip libblas-dev liblapack-dev gfortran wget software-properties-common pixz zip zlib1g-dev unzip' - packages_macos: - brew: '$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tf_tc-brew.sh' packages_win: pacman: 'pacman --noconfirm -S patch unzip tar' msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64' diff --git a/taskcluster/tf_darwin-amd64-opt.yml b/taskcluster/tf_darwin-amd64-opt.yml index 365e1700..64674b1f 100644 --- a/taskcluster/tf_darwin-amd64-opt.yml +++ b/taskcluster/tf_darwin-amd64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.darwin_amd64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --cpu" diff --git a/taskcluster/tf_ios-arm64-opt.yml b/taskcluster/tf_ios-arm64-opt.yml index 9f253b3f..edb3eb2b 100644 --- a/taskcluster/tf_ios-arm64-opt.yml +++ b/taskcluster/tf_ios-arm64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.ios_arm64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --ios-arm64" diff --git a/taskcluster/tf_ios-x86_64-opt.yml b/taskcluster/tf_ios-x86_64-opt.yml index c56ad3ca..8f82cb95 100644 --- a/taskcluster/tf_ios-x86_64-opt.yml +++ b/taskcluster/tf_ios-x86_64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.ios_x86_64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --ios-x86_64" diff --git a/taskcluster/tf_tc-brew.sh b/taskcluster/tf_tc-brew.sh deleted file mode 100755 index 8d4128a2..00000000 --- a/taskcluster/tf_tc-brew.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -set -ex - -# if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then -# echo "No TASKCLUSTER_TASK_DIR, aborting." -# exit 1 -# fi - -# LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" -# export PATH=${LOCAL_BREW}/bin:$PATH -# export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" -# export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" -# export HOMEBREW_FORMULAS_COMMIT=93fe256e0168db3b1c70c26a01941be59ce76311 -# export HOMEBREW_NO_AUTO_UPDATE=1 - -# # Never fail on pre-existing homebrew/ directory -# mkdir -p "${LOCAL_BREW}" || true -# mkdir -p "${HOMEBREW_CACHE}" || true - -# # Make sure to verify there is a 'brew' binary there, otherwise install things. -# if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then -# curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" -# fi; - -# echo "local brew list (should be empty) ..." -# brew list - -# echo "local brew prefix ..." -# local_prefix=$(brew --prefix) -# echo "${local_prefix}" - -# if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then -# echo "Weird state:" -# echo "LOCAL_BREW=${LOCAL_BREW}" -# echo "local_prefix=${local_prefix}" -# exit 1 -# fi; - - -# # Then we force onto a specific well-known commit -# mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" -# pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" -# git init -# git remote add origin https://github.com/Homebrew/homebrew-core.git -# git fetch origin -# git checkout ${HOMEBREW_FORMULAS_COMMIT} -# popd - -# # coreutils, pyenv-virtualenv required for build of tensorflow -# all_pkgs="coreutils pyenv-virtualenv" - -# for pkg in ${all_pkgs}; -# do -# (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} -# done; From d9dac13343b634996e0875770b7cd70259e60060 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:17:05 +0200 Subject: [PATCH 18/33] Clean up tf_tc-build.sh --- taskcluster/tf_tc-build.sh | 107 ++++++++----------------------------- 1 file changed, 22 insertions(+), 85 deletions(-) diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index ed3c306e..6ac03120 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -4,50 +4,6 @@ set -ex source $(dirname $0)/tf_tc-vars.sh -build_amd64=no -build_gpu=no -build_android_arm=no -build_android_arm64=no -build_linux_arm=no -build_linux_arm64=no -build_ios_arm64=no -build_ios_x86_64=no - -if [ "$1" = "--cpu" ]; then - build_amd64=yes -fi - -if [ "$1" = "--gpu" ]; then - build_amd64=yes - build_gpu=yes -fi - -if [ "$1" = "--arm" ]; then - build_amd64=yes - build_linux_arm=yes -fi - -if [ "$1" = "--arm64" ]; then - build_amd64=yes - build_linux_arm64=yes -fi - -if [ "$1" = "--android-armv7" ]; then - build_android_arm=yes -fi - -if [ "$1" = "--android-arm64" ]; then - build_android_arm64=yes -fi - -if [ "$1" = "--ios-arm64" ]; then - build_ios_arm64=yes -fi - -if [ "$1" = "--ios-x86_64" ]; then - build_ios_x86_64=yes -fi - pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ BAZEL_BUILD="bazel ${BAZEL_OUTPUT_USER_ROOT} build -s --explain bazel_monolithic_tf.log --verbose_explanations --experimental_strict_action_env --config=monolithic" @@ -58,51 +14,32 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ # Force toolchain sync (useful on macOS ?) bazel ${BAZEL_OUTPUT_USER_ROOT} sync --configure - if [ "${build_amd64}" = "yes" ]; then - # Pure amd64 CPU-only build - if [ "${OS}" = "${TC_MSYS_VERSION}" -a "${build_gpu}" = "no" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} - elif [ "${build_gpu}" = "no" -a "${build_linux_arm}" = "no" -a "${build_linux_arm64}" = "no" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} - fi - - # Cross RPi3 CPU-only build - if [ "${build_linux_arm}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi - - # Cross ARM64 Cortex-A53 build - if [ "${build_linux_arm64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi - - # Pure amd64 GPU-enabled build - if [ "${build_gpu}" = "yes" ]; then - eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} -c opt ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API} - fi - fi - - if [ "${build_android_arm}" = "yes" ]; then + case "$1" in + "--cpu") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} + ;; + "--gpu") + eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} -c opt ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API} + ;; + "--arm") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + ;; + "--arm64") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + ;; + "--android-armv7") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_android_arm64}" = "yes" ]; then + ;; + "--android-arm64") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_ios_arm64}" = "yes" ]; then + ;; + "--ios-arm64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_ios_x86_64}" = "yes" ]; then + ;; + "--ios-x86_64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ $? -ne 0 ]; then - # There was a failure, just account for it. - echo "Build failure, please check the output above. Exit code was: $?" - return 1 - fi + ;; + esac bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown popd From 267287861867d581f500cfc748c30ea3501a9608 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:52:35 +0200 Subject: [PATCH 19/33] Add docs to Swift bindings and missing methods --- .../swift/deepspeech_ios/DeepSpeech.swift | 193 ++++++++++++++++-- 1 file changed, 173 insertions(+), 20 deletions(-) diff --git a/native_client/swift/deepspeech_ios/DeepSpeech.swift b/native_client/swift/deepspeech_ios/DeepSpeech.swift index b694995b..50c32553 100644 --- a/native_client/swift/deepspeech_ios/DeepSpeech.swift +++ b/native_client/swift/deepspeech_ios/DeepSpeech.swift @@ -30,7 +30,8 @@ public enum DeepSpeechError: Error { case failCreateSess(errorCode: Int32) case failCreateModel(errorCode: Int32) - // Additional case for invalid error codes, should never happen unless the user has mixed header and binary versions + // Additional case for invalid error codes, should never happen unless the + // user has mixed header and binary versions. case invalidErrorCode(errorCode: Int32) } @@ -115,9 +116,15 @@ private func evaluateErrorCode(errorCode: Int32) throws { } } +/// Stores text of an individual token, along with its timing information public struct DeepSpeechTokenMetadata { + /// The text corresponding to this token let text: String + + /// Position of the token in units of 20ms let timestep: Int + + /// Position of the token in seconds let startTime: Float internal init(fromInternal: TokenMetadata) { @@ -127,8 +134,17 @@ public struct DeepSpeechTokenMetadata { } } +/** A single transcript computed by the model, including a confidence value and + the metadata for its constituent tokens +*/ public struct DeepSpeechCandidateTranscript { + /// Array of DeepSpeechTokenMetadata objects private(set) var tokens: [DeepSpeechTokenMetadata] = [] + + /** Approximated confidence value for this transcript. This corresponds to + both acoustic model and language model scores that contributed to the + creation of this transcript. + */ let confidence: Double internal init(fromInternal: CandidateTranscript) { @@ -140,12 +156,16 @@ public struct DeepSpeechCandidateTranscript { } } +/// An array of DeepSpeechCandidateTranscript objects computed by the model public struct DeepSpeechMetadata { + /// Array of DeepSpeechCandidateTranscript objects private(set) var transcripts: [DeepSpeechCandidateTranscript] = [] internal init(fromInternal: UnsafeMutablePointer) { let md = fromInternal.pointee - let transcriptsBuffer = UnsafeBufferPointer(start: md.transcripts, count: Int(md.num_transcripts)) + let transcriptsBuffer = UnsafeBufferPointer( + start: md.transcripts, + count: Int(md.num_transcripts)) for tr in transcriptsBuffer { transcripts.append(DeepSpeechCandidateTranscript(fromInternal: tr)) @@ -167,6 +187,13 @@ public class DeepSpeechStream { } } + /** Feed audio samples to an ongoing streaming inference. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Precondition: `finishStream()` has not been called on this stream. + */ public func feedAudioContent(buffer: Array) { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -175,12 +202,25 @@ public class DeepSpeechStream { } } + /** Feed audio samples to an ongoing streaming inference. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Precondition: `finishStream()` has not been called on this stream. + */ public func feedAudioContent(buffer: UnsafeBufferPointer) { precondition(streamCtx != nil, "calling method on invalidated Stream") DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) } + /** Compute the intermediate decoding of an ongoing streaming inference. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: The STT intermediate result. + */ public func intermediateDecode() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -189,6 +229,16 @@ public class DeepSpeechStream { return String(cString: result!) } + /** Compute the intermediate decoding of an ongoing streaming inference, + return results including metadata. + + - Parameter numResults: The number of candidate transcripts to return. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata { precondition(streamCtx != nil, "calling method on invalidated Stream") let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! @@ -196,6 +246,15 @@ public class DeepSpeechStream { return DeepSpeechMetadata(fromInternal: result) } + /** Compute the final decoding of an ongoing streaming inference and return + the result. Signals the end of an ongoing streaming inference. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: The STT result. + + - Postcondition: This method will invalidate this streaming context. + */ public func finishStream() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -206,11 +265,38 @@ public class DeepSpeechStream { } return String(cString: result!) } + + /** Compute the final decoding of an ongoing streaming inference and return + results including metadata. Signals the end of an ongoing streaming + inference. + + - Parameter numResults: The number of candidate transcripts to return. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + + - Postcondition: This method will invalidate this streaming context. + */ + public func finishStreamWithMetadata(numResults: Int) -> DeepSpeechMetadata { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_FinishStreamWithMetadata(streamCtx, UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } } +/// An object providing an interface to a trained DeepSpeech model. public class DeepSpeechModel { private var modelCtx: OpaquePointer! + /** + - Parameter modelPath: The path to the model file. + + - Throws: `DeepSpeechError` on failure. + */ public init(modelPath: String) throws { let err = DS_CreateModel(modelPath, &modelCtx) try evaluateErrorCode(errorCode: err) @@ -221,77 +307,144 @@ public class DeepSpeechModel { modelCtx = nil } + /** Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + was not called before, will return the default value loaded from the + model file. + + - Returns: Beam width value used by the model. + */ public func getBeamWidth() -> Int { return Int(DS_GetModelBeamWidth(modelCtx)) } + /** Set beam width value used by the model. + + - Parameter beamWidth: The beam width used by the model. A larger beam + width value generates better results at the cost + of decoding time. + + - Throws: `DeepSpeechError` on failure. + */ public func setBeamWidth(beamWidth: Int) throws { let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) try evaluateErrorCode(errorCode: err) } + // The sample rate expected by the model. public var sampleRate: Int { get { return Int(DS_GetModelSampleRate(modelCtx)) } } + /** Enable decoding using an external scorer. + + - Parameter scorerPath: The path to the external scorer file. + + - Throws: `DeepSpeechError` on failure. + */ public func enableExternalScorer(scorerPath: String) throws { let err = DS_EnableExternalScorer(modelCtx, scorerPath) try evaluateErrorCode(errorCode: err) } + /** Disable decoding using an external scorer. + + - Throws: `DeepSpeechError` on failure. + */ public func disableExternalScorer() throws { let err = DS_DisableExternalScorer(modelCtx) try evaluateErrorCode(errorCode: err) } + /** Set hyperparameters alpha and beta of the external scorer. + + - Parameter alpha: The alpha hyperparameter of the decoder. Language model weight. + - Parameter beta: The beta hyperparameter of the decoder. Word insertion weight. + + - Throws: `DeepSpeechError` on failure. + */ public func setScorerAlphaBeta(alpha: Float, beta: Float) throws { let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta) try evaluateErrorCode(errorCode: err) } + /** Use the DeepSpeech model to convert speech to text. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Returns: The STT result. + */ public func speechToText(buffer: Array) -> String { return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> String in return speechToText(buffer: unsafeBufferPointer) } } + /** Use the DeepSpeech model to convert speech to text. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Returns: The STT result. + */ public func speechToText(buffer: UnsafeBufferPointer) -> String { let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) defer { DS_FreeString(result) } return String(cString: result!) } + /** Use the DeepSpeech model to convert speech to text and output results + including metadata. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + - Parameter numResults: The maximum number of DeepSpeechCandidateTranscript + structs to return. Returned value might be smaller than this. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ public func speechToTextWithMetadata(buffer: Array, numResults: Int) -> DeepSpeechMetadata { return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> DeepSpeechMetadata in - let result = DS_SpeechToTextWithMetadata(modelCtx, unsafeBufferPointer.baseAddress, UInt32(buffer.count), UInt32(numResults))! - defer { DS_FreeMetadata(result) } - return DeepSpeechMetadata(fromInternal: result) + return speechToTextWithMetadata(buffer: unsafeBufferPointer, numResults: numResults) } } + /** Use the DeepSpeech model to convert speech to text and output results + including metadata. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + - Parameter numResults: The maximum number of DeepSpeechCandidateTranscript + structs to return. Returned value might be smaller than this. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ + public func speechToTextWithMetadata(buffer: UnsafeBufferPointer, numResults: Int) -> DeepSpeechMetadata { + let result = DS_SpeechToTextWithMetadata( + modelCtx, + buffer.baseAddress, + UInt32(buffer.count), + UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + + /** Create a new streaming inference state. + + - Returns: DeepSpeechStream object representing the streaming state. + + - Throws: `DeepSpeechError` on failure. + */ public func createStream() throws -> DeepSpeechStream { var streamContext: OpaquePointer! let err = DS_CreateStream(modelCtx, &streamContext) try evaluateErrorCode(errorCode: err) return DeepSpeechStream(streamContext: streamContext) } - - public class func open(path: String, scorerPath: Optional = nil) -> OpaquePointer { - var fooOpaque: OpaquePointer! - DS_CreateModel(path, &fooOpaque) - if let scorerPath = scorerPath { - DS_EnableExternalScorer(fooOpaque, scorerPath) - } - return fooOpaque - } - - public class func createStream(modelState: OpaquePointer) -> OpaquePointer { - var fooOpaque: OpaquePointer! - DS_CreateStream(modelState, &fooOpaque) - return fooOpaque - } } public func DeepSpeechVersion() -> String { From a982a61d838b80787b3486b8be314e40304e8584 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Thu, 9 Jul 2020 17:51:17 +0200 Subject: [PATCH 20/33] Resolves #3146 - Let build_sdb.py also output CSV files and rename it accordingly --- bin/build_sdb.py | 92 --------------- bin/data_set_tool.py | 111 ++++++++++++++++++ bin/play.py | 2 +- bin/run-tc-ldc93s1_checkpoint_sdb.sh | 2 +- bin/run-tc-ldc93s1_new_sdb.sh | 2 +- bin/run-tc-ldc93s1_new_sdb_csv.sh | 2 +- doc/TRAINING.rst | 11 +- .../util/sample_collections.py | 74 +++++++++++- 8 files changed, 198 insertions(+), 98 deletions(-) delete mode 100755 bin/build_sdb.py create mode 100755 bin/data_set_tool.py diff --git a/bin/build_sdb.py b/bin/build_sdb.py deleted file mode 100755 index ac7be8af..00000000 --- a/bin/build_sdb.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -""" -Tool for building Sample Databases (SDB files) from DeepSpeech CSV files and other SDB files -Use "python3 build_sdb.py -h" for help -""" -import argparse - -import progressbar - -from deepspeech_training.util.audio import ( - AUDIO_TYPE_PCM, - AUDIO_TYPE_OPUS, - AUDIO_TYPE_WAV, - change_audio_types, -) -from deepspeech_training.util.downloader import SIMPLE_BAR -from deepspeech_training.util.sample_collections import ( - DirectSDBWriter, - samples_from_sources, -) -from deepspeech_training.util.augmentations import ( - parse_augmentations, - apply_sample_augmentations, - SampleAugmentation -) - -AUDIO_TYPE_LOOKUP = {"wav": AUDIO_TYPE_WAV, "opus": AUDIO_TYPE_OPUS} - - -def build_sdb(): - audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type] - augmentations = parse_augmentations(CLI_ARGS.augment) - if any(not isinstance(a, SampleAugmentation) for a in augmentations): - print("Warning: Some of the augmentations cannot be applied by this command.") - with DirectSDBWriter( - CLI_ARGS.target, audio_type=audio_type, labeled=not CLI_ARGS.unlabeled - ) as sdb_writer: - samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled) - num_samples = len(samples) - if augmentations: - samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations) - bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) - for sample in bar( - change_audio_types(samples, audio_type=audio_type, bitrate=CLI_ARGS.bitrate, processes=CLI_ARGS.workers) - ): - sdb_writer.add(sample) - - -def handle_args(): - parser = argparse.ArgumentParser( - description="Tool for building Sample Databases (SDB files) " - "from DeepSpeech CSV files and other SDB files" - ) - parser.add_argument( - "sources", - nargs="+", - help="Source CSV and/or SDB files - " - "Note: For getting a correctly ordered target SDB, source SDBs have to have their samples " - "already ordered from shortest to longest.", - ) - parser.add_argument("target", help="SDB file to create") - parser.add_argument( - "--audio-type", - default="opus", - choices=AUDIO_TYPE_LOOKUP.keys(), - help="Audio representation inside target SDB", - ) - parser.add_argument( - "--bitrate", - type=int, - help="Bitrate for lossy compressed SDB samples like in case of --audio-type opus", - ) - parser.add_argument( - "--workers", type=int, default=None, help="Number of encoding SDB workers" - ) - parser.add_argument( - "--unlabeled", - action="store_true", - help="If to build an SDB with unlabeled (audio only) samples - " - "typically used for building noise augmentation corpora", - ) - parser.add_argument( - "--augment", - action='append', - help="Add an augmentation operation", - ) - return parser.parse_args() - - -if __name__ == "__main__": - CLI_ARGS = handle_args() - build_sdb() diff --git a/bin/data_set_tool.py b/bin/data_set_tool.py new file mode 100755 index 00000000..589b4585 --- /dev/null +++ b/bin/data_set_tool.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +''' +Tool for building a combined SDB or CSV sample-set from other sets +Use 'python3 data_set_tool.py -h' for help +''' +import sys +import argparse +import progressbar +from pathlib import Path + +from deepspeech_training.util.audio import ( + AUDIO_TYPE_PCM, + AUDIO_TYPE_OPUS, + AUDIO_TYPE_WAV, + change_audio_types, +) +from deepspeech_training.util.downloader import SIMPLE_BAR +from deepspeech_training.util.sample_collections import ( + CSVWriter, + DirectSDBWriter, + samples_from_sources, +) +from deepspeech_training.util.augmentations import ( + parse_augmentations, + apply_sample_augmentations, + SampleAugmentation +) + +AUDIO_TYPE_LOOKUP = {'wav': AUDIO_TYPE_WAV, 'opus': AUDIO_TYPE_OPUS} + + +def build_data_set(): + audio_type = AUDIO_TYPE_LOOKUP[CLI_ARGS.audio_type] + augmentations = parse_augmentations(CLI_ARGS.augment) + if any(not isinstance(a, SampleAugmentation) for a in augmentations): + print('Warning: Some of the specified augmentations will not get applied, as this tool only supports ' + 'overlay, codec, reverb, resample and volume.') + extension = Path(CLI_ARGS.target).suffix.lower() + labeled = not CLI_ARGS.unlabeled + if extension == '.csv': + writer = CSVWriter(CLI_ARGS.target, absolute_paths=CLI_ARGS.absolute_paths, labeled=labeled) + elif extension == '.sdb': + writer = DirectSDBWriter(CLI_ARGS.target, audio_type=audio_type, labeled=labeled) + else: + print('Unknown extension of target file - has to be either .csv or .sdb') + sys.exit(1) + with writer: + samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled) + num_samples = len(samples) + if augmentations: + samples = apply_sample_augmentations(samples, audio_type=AUDIO_TYPE_PCM, augmentations=augmentations) + bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR) + for sample in bar(change_audio_types( + samples, + audio_type=audio_type, + bitrate=CLI_ARGS.bitrate, + processes=CLI_ARGS.workers)): + writer.add(sample) + + +def handle_args(): + parser = argparse.ArgumentParser( + description='Tool for building a combined SDB or CSV sample-set from other sets' + ) + parser.add_argument( + 'sources', + nargs='+', + help='Source CSV and/or SDB files - ' + 'Note: For getting a correctly ordered target set, source SDBs have to have their samples ' + 'already ordered from shortest to longest.', + ) + parser.add_argument( + 'target', + help='SDB or CSV file to create' + ) + parser.add_argument( + '--audio-type', + default='opus', + choices=AUDIO_TYPE_LOOKUP.keys(), + help='Audio representation inside target SDB', + ) + parser.add_argument( + '--bitrate', + type=int, + help='Bitrate for lossy compressed SDB samples like in case of --audio-type opus', + ) + parser.add_argument( + '--workers', type=int, default=None, help='Number of encoding SDB workers' + ) + parser.add_argument( + '--unlabeled', + action='store_true', + help='If to build an SDB with unlabeled (audio only) samples - ' + 'typically used for building noise augmentation corpora', + ) + parser.add_argument( + '--absolute-paths', + action='store_true', + help='If to reference samples by their absolute paths when writing CSV files', + ) + parser.add_argument( + '--augment', + action='append', + help='Add an augmentation operation', + ) + return parser.parse_args() + + +if __name__ == '__main__': + CLI_ARGS = handle_args() + build_data_set() diff --git a/bin/play.py b/bin/play.py index e9348c8e..1e8c59ca 100755 --- a/bin/play.py +++ b/bin/play.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files -Use "python3 build_sdb.py -h" for help +Use "python3 play.py -h" for help """ import os diff --git a/bin/run-tc-ldc93s1_checkpoint_sdb.sh b/bin/run-tc-ldc93s1_checkpoint_sdb.sh index 6f5c307f..c811f984 100755 --- a/bin/run-tc-ldc93s1_checkpoint_sdb.sh +++ b/bin/run-tc-ldc93s1_checkpoint_sdb.sh @@ -13,7 +13,7 @@ fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." - python -u bin/build_sdb.py ${ldc93s1_csv} ${ldc93s1_sdb} + python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset diff --git a/bin/run-tc-ldc93s1_new_sdb.sh b/bin/run-tc-ldc93s1_new_sdb.sh index 76032aa2..6cd4a450 100755 --- a/bin/run-tc-ldc93s1_new_sdb.sh +++ b/bin/run-tc-ldc93s1_new_sdb.sh @@ -16,7 +16,7 @@ fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." - python -u bin/build_sdb.py ${ldc93s1_csv} ${ldc93s1_sdb} + python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset diff --git a/bin/run-tc-ldc93s1_new_sdb_csv.sh b/bin/run-tc-ldc93s1_new_sdb_csv.sh index 1b0f6d3d..ec3e7774 100755 --- a/bin/run-tc-ldc93s1_new_sdb_csv.sh +++ b/bin/run-tc-ldc93s1_new_sdb_csv.sh @@ -16,7 +16,7 @@ fi; if [ ! -f "${ldc93s1_dir}/ldc93s1.sdb" ]; then echo "Converting LDC93S1 example data, saving to ${ldc93s1_sdb}." - python -u bin/build_sdb.py ${ldc93s1_csv} ${ldc93s1_sdb} + python -u bin/data_set_tool.py ${ldc93s1_csv} ${ldc93s1_sdb} fi; # Force only one visible device because we have a single-sample dataset diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 764088b5..68007457 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -496,7 +496,7 @@ Example training with all augmentations: [...] -The ``bin/play.py`` tool also supports ``--augment`` parameters (for sample domain augmentations) and can be used for experimenting with different configurations. +The ``bin/play.py`` and ``bin/data_set_tool.py`` tools also support ``--augment`` parameters (for sample domain augmentations) and can be used for experimenting with different configurations or creating augmented data sets. Example of playing all samples with reverberation and maximized volume: @@ -510,3 +510,12 @@ Example simulation of the codec augmentation of a wav-file first at the beginnin bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav + +Example of creating a pre-augmented test set: + +.. code-block:: bash + + bin/data_set_tool.py \ + --augment overlay[source=noise.sdb,layers=1,snr=20~10] \ + --augment resample[rate=12000:8000~4000] \ + test.sdb test-augmented.sdb diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 37210659..b220e1b3 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -7,7 +7,15 @@ from pathlib import Path from functools import partial from .helpers import MEGABYTE, GIGABYTE, Interleaved -from .audio import Sample, DEFAULT_FORMAT, AUDIO_TYPE_OPUS, SERIALIZABLE_AUDIO_TYPES, get_audio_type_from_extension +from .audio import ( + Sample, + DEFAULT_FORMAT, + AUDIO_TYPE_PCM, + AUDIO_TYPE_OPUS, + SERIALIZABLE_AUDIO_TYPES, + get_audio_type_from_extension, + write_wav +) BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -297,6 +305,70 @@ class SDB: # pylint: disable=too-many-instance-attributes self.close() +class CSVWriter: # pylint: disable=too-many-instance-attributes + """Sample collection writer for writing a CSV data-set and all its referenced WAV samples""" + def __init__(self, + csv_filename, + absolute_paths=False, + labeled=True): + """ + Parameters + ---------- + csv_filename : str + Path to the CSV file to write. + Will create a directory (CSV-filename without extension) next to it and fail if it already exists. + absolute_paths : bool + If paths in CSV file should be absolute instead of relative to the CSV file's parent directory. + labeled : bool or None + If True: Writes labeled samples (util.sample_collections.LabeledSample) only. + If False: Ignores transcripts (if available) and writes (unlabeled) util.audio.Sample instances. + """ + self.csv_filename = Path(csv_filename) + self.csv_base_dir = self.csv_filename.parent.resolve().absolute() + self.set_name = self.csv_filename.stem + self.csv_dir = self.csv_base_dir / self.set_name + if self.csv_dir.exists(): + raise RuntimeError('"{}" already existing'.format(self.csv_dir)) + os.mkdir(str(self.csv_dir)) + self.absolute_paths = absolute_paths + fieldnames = ['wav_filename', 'wav_filesize'] + self.labeled = labeled + if labeled: + fieldnames.append('transcript') + self.csv_file = open(csv_filename, 'w', encoding='utf-8', newline='') + self.csv_writer = csv.DictWriter(self.csv_file, fieldnames=fieldnames) + self.csv_writer.writeheader() + self.counter = 0 + + def __enter__(self): + return self + + def add(self, sample): + sample_filename = self.csv_dir / 'sample{0:08d}.wav'.format(self.counter) + self.counter += 1 + sample.change_audio_type(AUDIO_TYPE_PCM) + write_wav(str(sample_filename), sample.audio, audio_format=sample.audio_format) + sample.sample_id = str(sample_filename.relative_to(self.csv_base_dir)) + row = { + 'wav_filename': str(sample_filename.absolute()) if self.absolute_paths else sample.sample_id, + 'wav_filesize': sample_filename.stat().st_size + } + if self.labeled: + row['transcript'] = sample.transcript + self.csv_writer.writerow(row) + return sample.sample_id + + def close(self): + if self.csv_file: + self.csv_file.close() + + def __len__(self): + return self.counter + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + class SampleList: """Sample collection base class with samples loaded from a list of in-memory paths.""" def __init__(self, samples, labeled=True): From 48fb43c3eb22175db0c5a9f9337efefc906cea84 Mon Sep 17 00:00:00 2001 From: Carlos Fonseca M Date: Tue, 21 Jul 2020 10:31:22 -0600 Subject: [PATCH 21/33] Add UWP Nuget packing support --- native_client/dotnet/DeepSpeech.sln | 6 +- .../DeepSpeechClient/DeepSpeechClient.csproj | 87 +++++++------------ .../Properties/AssemblyInfo.cs | 36 -------- taskcluster/.shared.yml | 8 +- taskcluster/tc-build-utils.sh | 18 +++- taskcluster/win-opt-base.tyml | 2 +- taskcluster/worker.cyml | 4 +- 7 files changed, 57 insertions(+), 104 deletions(-) delete mode 100644 native_client/dotnet/DeepSpeechClient/Properties/AssemblyInfo.cs diff --git a/native_client/dotnet/DeepSpeech.sln b/native_client/dotnet/DeepSpeech.sln index 091f2062..78afe7db 100644 --- a/native_client/dotnet/DeepSpeech.sln +++ b/native_client/dotnet/DeepSpeech.sln @@ -1,8 +1,8 @@ Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28307.136 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30204.135 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" EndProject diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj b/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj index 0139b3e8..33a94115 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj +++ b/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj @@ -1,63 +1,42 @@ - - - + - Debug - AnyCPU - {56DE4091-BBBE-47E4-852D-7268B33B971F} - Library - Properties - DeepSpeechClient - DeepSpeechClient - v4.6.2 - 512 - true + Library + net452;net46;net47;uap10.0 + x64 - - true - bin\x64\Debug\ - DEBUG;TRACE - true - full + x64 - prompt - MinimumRecommendedRules.ruleset - - bin\x64\Release\ - TRACE - true - pdbonly - x64 - prompt - MinimumRecommendedRules.ruleset + true - - - - - - - - - + + true + x64 + + + + false + UAP,Version=v10.0 + UAP + 10.0.19041.0 + 10.0.10240.0 + .NETCore + v5.0 + $(DefineConstants);WINDOWS_UWP + $(MSBuildExtensionsPath)\Microsoft\WindowsXaml\v$(VisualStudioVersion)\Microsoft.Windows.UI.Xaml.CSharp.targets + None + + + + - - - - - - - - - - - - - + - - - \ No newline at end of file + + + + $(DefineConstants);NO_HTTPS + + diff --git a/native_client/dotnet/DeepSpeechClient/Properties/AssemblyInfo.cs b/native_client/dotnet/DeepSpeechClient/Properties/AssemblyInfo.cs deleted file mode 100644 index 96e05580..00000000 --- a/native_client/dotnet/DeepSpeechClient/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("DeepSpeechClient")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("DeepSpeechClient")] -[assembly: AssemblyCopyright("Copyright © 2018")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("56de4091-bbbe-47e4-852d-7268b33b971f")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 03bdd3fd..3b9ad525 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -161,11 +161,11 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.2.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.2.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.2.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d.2.win-cuda" ios_arm64: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 75645582..4088a7ce 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -231,7 +231,7 @@ do_deepspeech_netframework_build() cd ${DS_DSDIR}/native_client/dotnet # Setup dependencies - nuget install DeepSpeechConsole/packages.config -OutputDirectory packages/ + nuget restore DeepSpeech.sln MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe')" @@ -242,23 +242,30 @@ do_deepspeech_netframework_build() DeepSpeechClient/DeepSpeechClient.csproj \ /p:Configuration=Release \ /p:Platform=x64 \ - /p:TargetFrameworkVersion="v4.5.2" \ + /p:TargetFramework="net452" \ /p:OutputPath=bin/nuget/x64/v4.5 MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \ DeepSpeechClient/DeepSpeechClient.csproj \ /p:Configuration=Release \ /p:Platform=x64 \ - /p:TargetFrameworkVersion="v4.6" \ + /p:TargetFramework="net46" \ /p:OutputPath=bin/nuget/x64/v4.6 MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \ DeepSpeechClient/DeepSpeechClient.csproj \ /p:Configuration=Release \ /p:Platform=x64 \ - /p:TargetFrameworkVersion="v4.7" \ + /p:TargetFramework="net47" \ /p:OutputPath=bin/nuget/x64/v4.7 + MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \ + DeepSpeechClient/DeepSpeechClient.csproj \ + /p:Configuration=Release \ + /p:Platform=x64 \ + /p:TargetFramework="uap10.0" \ + /p:OutputPath=bin/nuget/x64/uap10.0 + MSYS2_ARG_CONV_EXCL='/' "${MSBUILD}" \ DeepSpeechConsole/DeepSpeechConsole.csproj \ /p:Configuration=Release \ @@ -307,6 +314,9 @@ do_nuget_build() mkdir -p nupkg/lib/net47/ cp DeepSpeechClient/bin/nuget/x64/v4.7/DeepSpeechClient.dll nupkg/lib/net47/ + mkdir -p nupkg/lib/uap10.0/ + cp DeepSpeechClient/bin/nuget/x64/uap10.0/DeepSpeechClient.dll nupkg/lib/uap10.0/ + PROJECT_VERSION=$(strip "${DS_VERSION}") sed \ -e "s/\$NUPKG_ID/${PROJECT_NAME}/" \ diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index b402e4b2..db232e24 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -43,7 +43,7 @@ payload: - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - echo .\msys64\usr\bin\bash.exe --login -cxe " export LC_ALL=C && - export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files (x86)/Windows Kits/10/bin/x64/:$PATH\" && + export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files (x86)/Windows Kits/10/bin/x64/:/c/Program Files/dotnet/:$PATH\" && export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && (mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR && diff --git a/taskcluster/worker.cyml b/taskcluster/worker.cyml index 65c5c895..9ef5a85e 100644 --- a/taskcluster/worker.cyml +++ b/taskcluster/worker.cyml @@ -4,8 +4,8 @@ taskcluster: provisionerId: proj-deepspeech workerType: ci workerTypeKvm: kvm - workerTypeWin: win - workerTypeCuda: win-gpu-b + workerTypeWin: win-b + workerTypeCuda: win-gpu dockerrpi3: provisionerId: proj-deepspeech workerType: ds-rpi3 From 9bdce0a30584fa1a3c66260d251a042243976541 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jul 2020 19:09:04 +0200 Subject: [PATCH 22/33] Move deepspeech_ios_test projects to same level as deepspeech_ios --- .../swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata | 2 +- .../deepspeech_ios_test.xcodeproj/project.pbxproj | 0 .../project.xcworkspace/contents.xcworkspacedata | 0 .../project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist | 0 .../xcshareddata/xcschemes/deepspeech_ios_test.xcscheme | 0 .../{deepspeech_ios_test => }/AppDelegate.swift | 0 .../Assets.xcassets/AppIcon.appiconset/Contents.json | 0 .../{deepspeech_ios_test => }/Assets.xcassets/Contents.json | 0 .../Base.lproj/LaunchScreen.storyboard | 0 .../{deepspeech_ios_test => }/ContentView.swift | 0 .../deepspeech_ios_test/{deepspeech_ios_test => }/Info.plist | 0 .../Preview Content/Preview Assets.xcassets/Contents.json | 0 .../{deepspeech_ios_test => }/SceneDelegate.swift | 0 .../deepspeech_ios_testTests/Info.plist | 0 .../deepspeech_ios_testTests/deepspeech_ios_testTests.swift | 0 .../deepspeech_ios_testUITests/Info.plist | 0 .../deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift | 0 17 files changed, 1 insertion(+), 1 deletion(-) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_test.xcodeproj/project.pbxproj (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/AppDelegate.swift (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/Assets.xcassets/AppIcon.appiconset/Contents.json (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/Assets.xcassets/Contents.json (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/Base.lproj/LaunchScreen.storyboard (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/ContentView.swift (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/Info.plist (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/Preview Content/Preview Assets.xcassets/Contents.json (100%) rename native_client/swift/deepspeech_ios_test/{deepspeech_ios_test => }/SceneDelegate.swift (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_testTests/Info.plist (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_testTests/deepspeech_ios_testTests.swift (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_testUITests/Info.plist (100%) rename native_client/swift/{deepspeech_ios_test => }/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift (100%) diff --git a/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata index 73975e36..8aec54f0 100644 --- a/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata +++ b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata @@ -5,6 +5,6 @@ location = "group:deepspeech_ios.xcodeproj"> + location = "group:deepspeech_ios_test.xcodeproj"> diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj rename to native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata rename to native_client/swift/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist rename to native_client/swift/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme b/native_client/swift/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme rename to native_client/swift/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift b/native_client/swift/deepspeech_ios_test/AppDelegate.swift similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift rename to native_client/swift/deepspeech_ios_test/AppDelegate.swift diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json b/native_client/swift/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json rename to native_client/swift/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/Assets.xcassets/Contents.json similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json rename to native_client/swift/deepspeech_ios_test/Assets.xcassets/Contents.json diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard b/native_client/swift/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard rename to native_client/swift/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift b/native_client/swift/deepspeech_ios_test/ContentView.swift similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift rename to native_client/swift/deepspeech_ios_test/ContentView.swift diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist b/native_client/swift/deepspeech_ios_test/Info.plist similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist rename to native_client/swift/deepspeech_ios_test/Info.plist diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json rename to native_client/swift/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift b/native_client/swift/deepspeech_ios_test/SceneDelegate.swift similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift rename to native_client/swift/deepspeech_ios_test/SceneDelegate.swift diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist b/native_client/swift/deepspeech_ios_testTests/Info.plist similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist rename to native_client/swift/deepspeech_ios_testTests/Info.plist diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift b/native_client/swift/deepspeech_ios_testTests/deepspeech_ios_testTests.swift similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift rename to native_client/swift/deepspeech_ios_testTests/deepspeech_ios_testTests.swift diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist b/native_client/swift/deepspeech_ios_testUITests/Info.plist similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist rename to native_client/swift/deepspeech_ios_testUITests/Info.plist diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift b/native_client/swift/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift similarity index 100% rename from native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift rename to native_client/swift/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift From ce0ef4fd1ed7da9fe45169cfc83698de8b3e1d7c Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jul 2020 19:57:07 +0200 Subject: [PATCH 23/33] Build and publish deepspeech_ios.framework --- taskcluster/ios-arm64-tflite-opt.yml | 2 +- taskcluster/ios-build.sh | 6 +++-- taskcluster/ios-package.sh | 28 ++++++++++++++++++++++++ taskcluster/ios-x86_64-tflite-opt.yml | 2 +- taskcluster/scriptworker-task-github.yml | 5 +++++ taskcluster/tc-build-utils.sh | 15 +++++++++++++ 6 files changed, 54 insertions(+), 4 deletions(-) create mode 100755 taskcluster/ios-package.sh diff --git a/taskcluster/ios-arm64-tflite-opt.yml b/taskcluster/ios-arm64-tflite-opt.yml index cd85ca7f..0d8da19b 100644 --- a/taskcluster/ios-arm64-tflite-opt.yml +++ b/taskcluster/ios-arm64-tflite-opt.yml @@ -13,7 +13,7 @@ build: tensorflow: ${system.tensorflow.ios_arm64.url} scripts: build: "taskcluster/ios-build.sh --arm64" - package: "taskcluster/package.sh" + package: "taskcluster/ios-package.sh --arm64" nc_asset_name: "native_client.arm64.tflite.ios.tar.xz" maxRunTime: 14400 metadata: diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index ed99cb60..282f8c32 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -2,7 +2,7 @@ set -xe -platform=$1 +arch=$1 source $(dirname "$0")/tc-tests-utils.sh @@ -12,7 +12,7 @@ BAZEL_TARGETS=" //native_client:libdeepspeech.so " -if [ "${platform}" = "--arm64" ]; then +if [ "${arch}" = "--arm64" ]; then BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS}" else BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS}" @@ -21,3 +21,5 @@ fi BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" do_bazel_build + +do_deepspeech_ios_framework_build "${arch}" diff --git a/taskcluster/ios-package.sh b/taskcluster/ios-package.sh new file mode 100755 index 00000000..eb608054 --- /dev/null +++ b/taskcluster/ios-package.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -xe + +arch=$1 + +source $(dirname "$0")/tc-tests-utils.sh + +mkdir -p ${TASKCLUSTER_ARTIFACTS} || true + +cp ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/bazel*.log ${TASKCLUSTER_ARTIFACTS}/ + +package_native_client "native_client.tar.xz" + +package_libdeepspeech_as_zip "libdeepspeech.zip" + +case $arch in +"--x86_64") + ${TAR} -cf - \ + -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphonesimulator/deepspeech_ios.framework \ + | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/ deepspeech_ios.framework.x86_64.tar.xz" + ;; +"--arm64") + ${TAR} -cf - \ + -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphoneos/deepspeech_ios.framework \ + | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/ deepspeech_ios.framework.arm64.tar.xz" +;; +esac diff --git a/taskcluster/ios-x86_64-tflite-opt.yml b/taskcluster/ios-x86_64-tflite-opt.yml index b55cebe7..22acb435 100644 --- a/taskcluster/ios-x86_64-tflite-opt.yml +++ b/taskcluster/ios-x86_64-tflite-opt.yml @@ -13,7 +13,7 @@ build: tensorflow: ${system.tensorflow.ios_x86_64.url} scripts: build: "taskcluster/ios-build.sh --x86_64" - package: "taskcluster/package.sh" + package: "taskcluster/ios-package.sh --x86_64" nc_asset_name: "native_client.x86_64.tflite.ios.tar.xz" maxRunTime: 14400 metadata: diff --git a/taskcluster/scriptworker-task-github.yml b/taskcluster/scriptworker-task-github.yml index 3003baad..f740a39d 100644 --- a/taskcluster/scriptworker-task-github.yml +++ b/taskcluster/scriptworker-task-github.yml @@ -20,6 +20,8 @@ build: - "win-amd64-cpu-opt" - "win-amd64-gpu-opt" - "win-amd64-ctc-opt" + - "ios-x86_64-tflite-opt" + - "ios-arm64-tflite-opt" allowed: - "tag" ref_match: "refs/tags/" @@ -66,6 +68,9 @@ build: - "win-amd64-cpu-opt" - "win-amd64-gpu-opt" - "win-amd64-tflite-opt" + ios: + - "ios-x86_64-tflite-opt" + - "ios-arm64-tflite-opt" metadata: name: "DeepSpeech GitHub Packages" description: "Trigger Uploading of DeepSpeech Packages to GitHub release page" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 75645582..349205f5 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -315,3 +315,18 @@ do_nuget_build() nuget pack nupkg/deepspeech.nuspec } + +do_deepspeech_ios_framework_build() +{ + arch=$1 + cp ${DS_TFDIR}/bazel-bin/native_client/libdeepspeech.so ${DS_DSDIR}/native_client/swift/libdeepspeech.so + cd ${DS_DSDIR}/native_client/swift + case $arch in + "--x86_64") + xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch x86_64 -sdk "iphonesimulator" -derivedDataPath DerivedData + ;; + "--arm64") + xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch arm64 -sdk "iphoneos" -derivedDataPath DerivedData + ;; + esac +} From 2fd1474e6992a873aa72856ca606913dbe765581 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jul 2020 20:12:17 +0200 Subject: [PATCH 24/33] Fix deepspeech_ios_project reference after folder move --- .../swift/deepspeech_ios_test.xcodeproj/project.pbxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj index e9a7d0a2..eadc4fae 100644 --- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -53,7 +53,7 @@ /* Begin PBXFileReference section */ 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = ../libdeepspeech.so; sourceTree = ""; }; + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = ""; }; 50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; }; 50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; From 47685f059f8d81fc0e44ecfd8409b5683b109813 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 21 Jul 2020 23:52:29 +0200 Subject: [PATCH 25/33] Disable code signing in CI builds --- taskcluster/tc-build-utils.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 349205f5..ac0bd39e 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -323,10 +323,10 @@ do_deepspeech_ios_framework_build() cd ${DS_DSDIR}/native_client/swift case $arch in "--x86_64") - xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch x86_64 -sdk "iphonesimulator" -derivedDataPath DerivedData + xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch x86_64 -sdk "iphonesimulator" -derivedDataPath DerivedData CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO ;; "--arm64") - xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch arm64 -sdk "iphoneos" -derivedDataPath DerivedData + xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch arm64 -sdk "iphoneos" -derivedDataPath DerivedData CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO ;; esac } From 509d06d474c9c1f55ea7335bbf0223b38bf52996 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 22 Jul 2020 09:49:19 +0200 Subject: [PATCH 26/33] Fix typo in ios-package.sh --- taskcluster/ios-package.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/taskcluster/ios-package.sh b/taskcluster/ios-package.sh index eb608054..21f59de1 100755 --- a/taskcluster/ios-package.sh +++ b/taskcluster/ios-package.sh @@ -17,12 +17,12 @@ package_libdeepspeech_as_zip "libdeepspeech.zip" case $arch in "--x86_64") ${TAR} -cf - \ - -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphonesimulator/deepspeech_ios.framework \ - | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/ deepspeech_ios.framework.x86_64.tar.xz" + -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphonesimulator/ deepspeech_ios.framework \ + | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/deepspeech_ios.framework.x86_64.tar.xz" ;; "--arm64") ${TAR} -cf - \ - -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphoneos/deepspeech_ios.framework \ - | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/ deepspeech_ios.framework.arm64.tar.xz" + -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphoneos/ deepspeech_ios.framework \ + | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/deepspeech_ios.framework.arm64.tar.xz" ;; esac From 844b375e7d6af74223017392c697f917526e1e40 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 22 Jul 2020 10:23:35 +0200 Subject: [PATCH 27/33] Address review comments --- taskcluster/ios-package.sh | 14 ++++++++------ taskcluster/tc-build-utils.sh | 7 +++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/taskcluster/ios-package.sh b/taskcluster/ios-package.sh index 21f59de1..16cc9f96 100755 --- a/taskcluster/ios-package.sh +++ b/taskcluster/ios-package.sh @@ -16,13 +16,15 @@ package_libdeepspeech_as_zip "libdeepspeech.zip" case $arch in "--x86_64") - ${TAR} -cf - \ - -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphonesimulator/ deepspeech_ios.framework \ - | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/deepspeech_ios.framework.x86_64.tar.xz" + release_folder="Release-iphonesimulator" + artifact_name="deepspeech_ios.framework.x86_64.tar.xz" ;; "--arm64") - ${TAR} -cf - \ - -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/Release-iphoneos/ deepspeech_ios.framework \ - | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/deepspeech_ios.framework.arm64.tar.xz" + release_folder="Release-iphoneos" + artifact_name="deepspeech_ios.framework.arm64.tar.xz" ;; esac + +${TAR} -cf - \ + -C ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/swift/DerivedData/Build/Products/${release_folder}/ deepspeech_ios.framework \ + | ${XZ} > "${TASKCLUSTER_ARTIFACTS}/${artifact_name}" \ No newline at end of file diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index ac0bd39e..d4386ff5 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -323,10 +323,13 @@ do_deepspeech_ios_framework_build() cd ${DS_DSDIR}/native_client/swift case $arch in "--x86_64") - xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch x86_64 -sdk "iphonesimulator" -derivedDataPath DerivedData CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO + iosSDK="iphonesimulator" + xcodeArch="x86_64" ;; "--arm64") - xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch arm64 -sdk "iphoneos" -derivedDataPath DerivedData CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO + iosSDK="iphoneos" + xcodeArch="arm64" ;; esac + xcodebuild -workspace deepspeech_ios.xcworkspace -scheme deepspeech_ios_test -configuration Release -arch "${xcodeArch}" -sdk "${iosSDK}" -derivedDataPath DerivedData CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO } From eb33fc171932c0779a4f7e06bec5a2a961546bf7 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 23 Jul 2020 13:00:10 +0200 Subject: [PATCH 28/33] Document Alphabet methods in Python binding as well --- native_client/ctcdecode/__init__.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 18f402a7..2dc2be56 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -48,15 +48,33 @@ class Alphabet(swigwrapper.Alphabet): raise ValueError('Alphabet initialization failed with error code 0x{:X}'.format(err)) def CanEncodeSingle(self, input): + ''' + Returns true if the single character/output class has a corresponding label + in the alphabet. + ''' return super(Alphabet, self).CanEncodeSingle(input.encode('utf-8')) def CanEncode(self, input): + ''' + Returns true if the entire string can be encoded into labels in this + alphabet. + ''' return super(Alphabet, self).CanEncode(input.encode('utf-8')) def EncodeSingle(self, input): + ''' + Encode a single character/output class into a label. Character must be in + the alphabet, this method will assert that. Use `CanEncodeSingle` to test. + ''' return super(Alphabet, self).EncodeSingle(input.encode('utf-8')) def Encode(self, input): + ''' + Encode a sequence of character/output classes into a sequence of labels. + Characters are assumed to always take a single Unicode codepoint. + Characters must be in the alphabet, this method will assert that. Use + `CanEncode` and `CanEncodeSingle` to test. + ''' # Convert SWIG's UnsignedIntVec to a Python list res = super(Alphabet, self).Encode(input.encode('utf-8')) return [el for el in res] @@ -66,6 +84,7 @@ class Alphabet(swigwrapper.Alphabet): return res.decode('utf-8') def Decode(self, input): + '''Decode a sequence of labels into a string.''' res = super(Alphabet, self).Decode(input) return res.decode('utf-8') From 2cdc228db48fe62330381214863d0a8e4e405d2f Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 23 Jul 2020 13:16:12 +0200 Subject: [PATCH 29/33] Use Alphabet.CanEncode in text_to_char_array --- training/deepspeech_training/util/text.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/training/deepspeech_training/util/text.py b/training/deepspeech_training/util/text.py index e1c2e981..198bd96e 100644 --- a/training/deepspeech_training/util/text.py +++ b/training/deepspeech_training/util/text.py @@ -9,16 +9,20 @@ def text_to_char_array(transcript, alphabet, context=''): integers and return a numpy array representing the processed string. Use a string in `context` for adding text to raised exceptions. """ - try: - transcript = alphabet.Encode(transcript) - if len(transcript) == 0: - raise ValueError('While processing {}: Found an empty transcript! ' - 'You must include a transcript for all training data.' - .format(context)) - return transcript - except KeyError as e: + if not alphabet.CanEncode(transcript): # Provide the row context (especially wav_filename) for alphabet errors - raise ValueError('While processing: {}\n{}'.format(context, e)) + raise ValueError( + 'Alphabet cannot encode transcript "{}" while processing sample "{}", ' + 'check that your alphabet contains all characters in the training corpus. ' + 'Missing characters are: {}.' + .format(transcript, context, list(ch for ch in transcript if not alphabet.CanEncodeSingle(ch)))) + + encoded = alphabet.Encode(transcript) + if len(encoded) == 0: + raise ValueError('While processing {}: Found an empty transcript! ' + 'You must include a transcript for all training data.' + .format(context)) + return encoded # The following code is from: http://hetland.org/coding/python/levenshtein.py From 9a5d19d7c50bccb963c0a05aa1b7199fe173ae22 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Thu, 23 Jul 2020 16:59:12 +0200 Subject: [PATCH 30/33] Resolves #1565 - Limiting and reversing data-sets --- training/deepspeech_training/evaluate.py | 6 ++- training/deepspeech_training/train.py | 6 +++ training/deepspeech_training/util/feeding.py | 8 +++- training/deepspeech_training/util/flags.py | 10 +++- training/deepspeech_training/util/helpers.py | 5 +- .../util/sample_collections.py | 47 +++++++++++++------ 6 files changed, 61 insertions(+), 21 deletions(-) diff --git a/training/deepspeech_training/evaluate.py b/training/deepspeech_training/evaluate.py index 00eac8c7..965b3370 100755 --- a/training/deepspeech_training/evaluate.py +++ b/training/deepspeech_training/evaluate.py @@ -50,7 +50,11 @@ def evaluate(test_csvs, create_model): else: scorer = None - test_sets = [create_dataset([csv], batch_size=FLAGS.test_batch_size, train_phase=False) for csv in test_csvs] + test_sets = [create_dataset([csv], + batch_size=FLAGS.test_batch_size, + train_phase=False, + reverse=FLAGS.reverse_test, + limit=FLAGS.limit_test) for csv in test_csvs] iterator = tfv1.data.Iterator.from_structure(tfv1.data.get_output_types(test_sets[0]), tfv1.data.get_output_shapes(test_sets[0]), output_classes=tfv1.data.get_output_classes(test_sets[0])) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 93d0c727..47052a07 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -417,6 +417,8 @@ def train(): train_phase=True, exception_box=exception_box, process_ahead=len(Config.available_devices) * FLAGS.train_batch_size * 2, + reverse=FLAGS.reverse_train, + limit=FLAGS.limit_train, buffering=FLAGS.read_buffer) iterator = tfv1.data.Iterator.from_structure(tfv1.data.get_output_types(train_set), @@ -433,6 +435,8 @@ def train(): train_phase=False, exception_box=exception_box, process_ahead=len(Config.available_devices) * FLAGS.dev_batch_size * 2, + reverse=FLAGS.reverse_dev, + limit=FLAGS.limit_dev, buffering=FLAGS.read_buffer) for source in dev_sources] dev_init_ops = [iterator.make_initializer(dev_set) for dev_set in dev_sets] @@ -443,6 +447,8 @@ def train(): train_phase=False, exception_box=exception_box, process_ahead=len(Config.available_devices) * FLAGS.dev_batch_size * 2, + reverse=FLAGS.reverse_dev, + limit=FLAGS.limit_dev, buffering=FLAGS.read_buffer) for source in metrics_sources] metrics_init_ops = [iterator.make_initializer(metrics_set) for metrics_set in metrics_sets] diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index 4c9b681d..9a26215c 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -90,6 +90,8 @@ def create_dataset(sources, augmentations=None, cache_path=None, train_phase=False, + reverse=False, + limit=0, exception_box=None, process_ahead=None, buffering=1 * MEGABYTE): @@ -99,8 +101,10 @@ def create_dataset(sources, epoch = epoch_counter['epoch'] if train_phase: epoch_counter['epoch'] += 1 - samples = samples_from_sources(sources, buffering=buffering, labeled=True) + samples = samples_from_sources(sources, buffering=buffering, labeled=True, reverse=reverse) num_samples = len(samples) + if limit > 0: + num_samples = min(limit, num_samples) samples = apply_sample_augmentations(samples, augmentations, buffering=buffering, @@ -108,6 +112,8 @@ def create_dataset(sources, clock=epoch / epochs, final_clock=(epoch + 1) / epochs) for sample_index, sample in enumerate(samples): + if sample_index >= num_samples: + break clock = (epoch * num_samples + sample_index) / (epochs * num_samples) if train_phase and epochs > 0 else 0.0 transcript = text_to_char_array(sample.transcript, Config.alphabet, context=sample.sample_id) transcript = to_sparse_tuple(transcript) diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py index 6bf64251..128441fd 100644 --- a/training/deepspeech_training/util/flags.py +++ b/training/deepspeech_training/util/flags.py @@ -71,8 +71,14 @@ def create_flags(): # Sample limits f.DEFINE_integer('limit_train', 0, 'maximum number of elements to use from train set - 0 means no limit') - f.DEFINE_integer('limit_dev', 0, 'maximum number of elements to use from validation set- 0 means no limit') - f.DEFINE_integer('limit_test', 0, 'maximum number of elements to use from test set- 0 means no limit') + f.DEFINE_integer('limit_dev', 0, 'maximum number of elements to use from validation set - 0 means no limit') + f.DEFINE_integer('limit_test', 0, 'maximum number of elements to use from test set - 0 means no limit') + + # Sample order + + f.DEFINE_boolean('reverse_train', False, 'if to reverse sample order of the train set') + f.DEFINE_boolean('reverse_dev', False, 'if to reverse sample order of the dev set') + f.DEFINE_boolean('reverse_test', False, 'if to reverse sample order of the test set') # Checkpointing diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index 32116f3f..195c117e 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -65,13 +65,14 @@ class Interleaved: """Collection that lazily combines sorted collections in an interleaving fashion. During iteration the next smallest element from all the sorted collections is always picked. The collections must support iter() and len().""" - def __init__(self, *iterables, key=lambda obj: obj): + def __init__(self, *iterables, key=lambda obj: obj, reverse=False): self.iterables = iterables self.key = key + self.reverse = reverse self.len = sum(map(len, iterables)) def __iter__(self): - return heapq.merge(*self.iterables, key=self.key) + return heapq.merge(*self.iterables, key=self.key, reverse=self.reverse) def __len__(self): return self.len diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index b220e1b3..15c97f97 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -6,7 +6,7 @@ import json from pathlib import Path from functools import partial -from .helpers import MEGABYTE, GIGABYTE, Interleaved +from .helpers import KILOBYTE, MEGABYTE, GIGABYTE, Interleaved from .audio import ( Sample, DEFAULT_FORMAT, @@ -23,6 +23,7 @@ BIGINT_SIZE = 2 * INT_SIZE MAGIC = b'SAMPLEDB' BUFFER_SIZE = 1 * MEGABYTE +REVERSE_BUFFER_SIZE = 16 * KILOBYTE CACHE_SIZE = 1 * GIGABYTE SCHEMA_KEY = 'schema' @@ -189,14 +190,19 @@ class DirectSDBWriter: class SDB: # pylint: disable=too-many-instance-attributes """Sample collection reader for reading a Sample DB (SDB) file""" - def __init__(self, sdb_filename, buffering=BUFFER_SIZE, id_prefix=None, labeled=True): + def __init__(self, + sdb_filename, + buffering=BUFFER_SIZE, + id_prefix=None, + labeled=True, + reverse=False): """ Parameters ---------- sdb_filename : str Path to the SDB file to read samples from buffering : int - Read-buffer size to use while reading the SDB file + Read-ahead buffer size to use while reading the SDB file in normal order. Fixed to 16kB if in reverse-mode. id_prefix : str Prefix for IDs of read samples - defaults to sdb_filename labeled : bool or None @@ -207,7 +213,7 @@ class SDB: # pylint: disable=too-many-instance-attributes """ self.sdb_filename = sdb_filename self.id_prefix = sdb_filename if id_prefix is None else id_prefix - self.sdb_file = open(sdb_filename, 'rb', buffering=buffering) + self.sdb_file = open(sdb_filename, 'rb', buffering=REVERSE_BUFFER_SIZE if reverse else buffering) self.offsets = [] if self.sdb_file.read(len(MAGIC)) != MAGIC: raise RuntimeError('No Sample Database') @@ -237,6 +243,8 @@ class SDB: # pylint: disable=too-many-instance-attributes num_samples = self.read_big_int() for _ in range(num_samples): self.offsets.append(self.read_big_int()) + if reverse: + self.offsets.reverse() def read_int(self): return int.from_bytes(self.sdb_file.read(INT_SIZE), BIG_ENDIAN) @@ -371,7 +379,7 @@ class CSVWriter: # pylint: disable=too-many-instance-attributes class SampleList: """Sample collection base class with samples loaded from a list of in-memory paths.""" - def __init__(self, samples, labeled=True): + def __init__(self, samples, labeled=True, reverse=False): """ Parameters ---------- @@ -380,10 +388,12 @@ class SampleList: labeled : bool or None If True: Reads LabeledSample instances. If False: Ignores transcripts (if available) and reads (unlabeled) util.audio.Sample instances. + reverse : bool + If the order of the samples should be reversed """ self.labeled = labeled self.samples = list(samples) - self.samples.sort(key=lambda r: r[1]) + self.samples.sort(key=lambda r: r[1], reverse=reverse) def __getitem__(self, i): sample_spec = self.samples[i] @@ -396,7 +406,7 @@ class SampleList: class CSV(SampleList): """Sample collection reader for reading a DeepSpeech CSV file Automatically orders samples by CSV column wav_filesize (if available).""" - def __init__(self, csv_filename, labeled=None): + def __init__(self, csv_filename, labeled=None, reverse=False): """ Parameters ---------- @@ -407,6 +417,8 @@ class CSV(SampleList): If False: Ignores transcripts (if available) and reads (unlabeled) util.audio.Sample instances. If None: Automatically determines if CSV file has a transcript column (reading util.sample_collections.LabeledSample instances) or not (reading util.audio.Sample instances). + reverse : bool + If the order of the samples should be reversed """ rows = [] csv_dir = Path(csv_filename).parent @@ -427,10 +439,10 @@ class CSV(SampleList): rows.append((wav_filename, wav_filesize, row['transcript'])) else: rows.append((wav_filename, wav_filesize)) - super(CSV, self).__init__(rows, labeled=labeled) + super(CSV, self).__init__(rows, labeled=labeled, reverse=reverse) -def samples_from_source(sample_source, buffering=BUFFER_SIZE, labeled=None): +def samples_from_source(sample_source, buffering=BUFFER_SIZE, labeled=None, reverse=False): """ Loads samples from a sample source file. @@ -445,6 +457,8 @@ def samples_from_source(sample_source, buffering=BUFFER_SIZE, labeled=None): If False: Ignores transcripts (if available) and reads (unlabeled) util.audio.Sample instances. If None: Automatically determines if source provides transcripts (reading util.sample_collections.LabeledSample instances) or not (reading util.audio.Sample instances). + reverse : bool + If the order of the samples should be reversed Returns ------- @@ -452,13 +466,13 @@ def samples_from_source(sample_source, buffering=BUFFER_SIZE, labeled=None): """ ext = os.path.splitext(sample_source)[1].lower() if ext == '.sdb': - return SDB(sample_source, buffering=buffering, labeled=labeled) + return SDB(sample_source, buffering=buffering, labeled=labeled, reverse=reverse) if ext == '.csv': - return CSV(sample_source, labeled=labeled) + return CSV(sample_source, labeled=labeled, reverse=reverse) raise ValueError('Unknown file type: "{}"'.format(ext)) -def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None): +def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None, reverse=False): """ Loads and combines samples from a list of source files. Sources are combined in an interleaving way to keep default sample order from shortest to longest. @@ -474,6 +488,8 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None): If False: Ignores transcripts (if available) and always reads (unlabeled) util.audio.Sample instances. If None: Reads util.sample_collections.LabeledSample instances from sources with transcripts and util.audio.Sample instances from sources with no transcripts. + reverse : bool + If the order of the samples should be reversed Returns ------- @@ -483,6 +499,7 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None): if len(sample_sources) == 0: raise ValueError('No files') if len(sample_sources) == 1: - return samples_from_source(sample_sources[0], buffering=buffering, labeled=labeled) - cols = list(map(partial(samples_from_source, buffering=buffering, labeled=labeled), sample_sources)) - return Interleaved(*cols, key=lambda s: s.duration) + return samples_from_source(sample_sources[0], buffering=buffering, labeled=labeled, reverse=reverse) + cols = [samples_from_source(source, buffering=buffering, labeled=labeled, reverse=reverse) + for source in sample_sources] + return Interleaved(*cols, key=lambda s: s.duration, reverse=reverse) From ecbdf46940d2497307d1c616b37f63a1e14d81ef Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Thu, 23 Jul 2020 17:18:40 +0200 Subject: [PATCH 31/33] Fixes #3178 - Librosa requires 1-dimensional array for mono samples --- training/deepspeech_training/util/augmentations.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 7ac52c41..941c17f2 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -349,8 +349,13 @@ class Resample(SampleAugmentation): audio = sample.audio orig_len = len(audio) audio = np.swapaxes(audio, 0, 1) - audio = resample(audio, sample.audio_format.rate, rate) - audio = resample(audio, rate, sample.audio_format.rate) + if audio.shape[0] < 2: + # since v0.8 librosa enforces a shape of (samples,) instead of (channels, samples) for mono samples + resampled = resample(audio[0], sample.audio_format.rate, rate) + audio[0] = resample(resampled, rate, sample.audio_format.rate)[:orig_len] + else: + audio = resample(audio, sample.audio_format.rate, rate) + audio = resample(audio, rate, sample.audio_format.rate) audio = np.swapaxes(audio, 0, 1)[0:orig_len] sample.audio = audio From 8629573587e5e1296473cf61eded560f6bf4f6a9 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Mon, 27 Jul 2020 10:23:43 +0200 Subject: [PATCH 32/33] Fix #3182: document rebuild of generate_scorer_package X-DeepSpeech: NOBUILD --- doc/BUILDING.rst | 14 ++++++++++++++ doc/Scorer.rst | 3 +++ 2 files changed, 17 insertions(+) diff --git a/doc/BUILDING.rst b/doc/BUILDING.rst index 16c5734a..bcc4d374 100644 --- a/doc/BUILDING.rst +++ b/doc/BUILDING.rst @@ -77,6 +77,20 @@ You can now use Bazel to build the main DeepSpeech library, ``libdeepspeech.so`` The generated binaries will be saved to ``bazel-bin/native_client/``. +.. _build-generate-scorer-package: + +Compile ``generate_scorer_package`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Following the same setup as for ``libdeepspeech.so`` above, you can rebuild the ``generate_scorer_package`` binary by adding its target to the command line: ``//native_client:generate_scorer_package``. +Using the example from above you can build the library and that binary at the same time: + +.. code-block:: + + bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_scorer_package + +The generated binaries will be saved to ``bazel-bin/native_client/``. + Compile Language Bindings ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/Scorer.rst b/doc/Scorer.rst index 04ce2d68..1f374604 100644 --- a/doc/Scorer.rst +++ b/doc/Scorer.rst @@ -49,6 +49,9 @@ Afterwards you can use ``generate_scorer_package`` to generate the scorer packag ./generate_scorer_package --alphabet ../alphabet.txt --lm lm.binary --vocab vocab-500000.txt \ --package kenlm.scorer --default_alpha 0.931289039105002 --default_beta 1.1834137581510284 +The ``generate_scorer_package`` binary is part of the released ``native_client.tar.xz``. If for some reason you need to rebuild it, +please refer to how to :ref:`build-generate-scorer-package`. + Building your own scorer ------------------------ From 9e3c4209b9b6665b137306af6d0061fa9774d420 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Mon, 27 Jul 2020 10:27:07 +0200 Subject: [PATCH 33/33] Fix #3184: add missing label for data augmentation doc X-DeepSpeech: NOBUILD --- doc/TRAINING.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 68007457..0463ba26 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -287,6 +287,8 @@ UTF-8 mode DeepSpeech includes a UTF-8 operating mode which can be useful to model languages with very large alphabets, such as Chinese Mandarin. For details on how it works and how to use it, see :ref:`decoder-docs`. +.. _training-data-augmentation: + Augmentation ^^^^^^^^^^^^