From 3ce07afae045ce9bf014a36719a0dfec75662187 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 30 Jun 2020 13:01:11 +0200 Subject: [PATCH 01/16] Add TensorFlow iOS tasks --- taskcluster/.build.yml | 2 +- taskcluster/.shared.yml | 42 +++++++++++++---------- taskcluster/tf_darwin-amd64-opt.yml | 2 +- taskcluster/tf_ios-arm64-opt.yml | 18 ++++++++++ taskcluster/tf_ios-x86_64-opt.yml | 18 ++++++++++ taskcluster/tf_linux-amd64-cpu-opt.yml | 2 +- taskcluster/tf_tc-build.sh | 46 +++++++++++++------------- taskcluster/tf_tc-vars.sh | 2 ++ taskcluster/tf_win-amd64-cpu-opt.yml | 2 +- tensorflow | 2 +- 10 files changed, 90 insertions(+), 46 deletions(-) create mode 100644 taskcluster/tf_ios-arm64-opt.yml create mode 100644 taskcluster/tf_ios-x86_64-opt.yml diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index fd261359..76fc9703 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -25,7 +25,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v2.2.0-15-g518c1d0' + tensorflow_git_desc: 'TensorFlow: v2.2.0-16-gfc46411' test_model_task: '' homebrew: url: '' diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 7ec3f366..9323c995 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -142,32 +142,38 @@ system: namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" tensorflow: linux_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu" linux_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda" linux_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm" linux_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx" android_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64" android_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda" + ios_arm64: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64" + ios_x86_64: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/tf_darwin-amd64-opt.yml b/taskcluster/tf_darwin-amd64-opt.yml index 21c19021..365e1700 100644 --- a/taskcluster/tf_darwin-amd64-opt.yml +++ b/taskcluster/tf_darwin-amd64-opt.yml @@ -10,7 +10,7 @@ build: ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh --osx" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 28800 metadata: diff --git a/taskcluster/tf_ios-arm64-opt.yml b/taskcluster/tf_ios-arm64-opt.yml new file mode 100644 index 00000000..9f253b3f --- /dev/null +++ b/taskcluster/tf_ios-arm64-opt.yml @@ -0,0 +1,18 @@ +build: + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.tensorflow.ios_arm64.url} + artifact_namespace: ${system.tensorflow.ios_arm64.namespace} + generic: + workerType: "ds-macos-heavy" + system_config: + > + ${tensorflow.packages_macos.brew} + scripts: + setup: "taskcluster/tf_tc-setup.sh" + build: "taskcluster/tf_tc-build.sh --ios-arm64" + package: "taskcluster/tf_tc-package.sh" + maxRunTime: 28800 + metadata: + name: "TensorFlow iOS ARM64 TFLite" + description: "Building TensorFlow for iOS ARM64, TFLite, optimized version" diff --git a/taskcluster/tf_ios-x86_64-opt.yml b/taskcluster/tf_ios-x86_64-opt.yml new file mode 100644 index 00000000..c56ad3ca --- /dev/null +++ b/taskcluster/tf_ios-x86_64-opt.yml @@ -0,0 +1,18 @@ +build: + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.tensorflow.ios_x86_64.url} + artifact_namespace: ${system.tensorflow.ios_x86_64.namespace} + generic: + workerType: "ds-macos-heavy" + system_config: + > + ${tensorflow.packages_macos.brew} + scripts: + setup: "taskcluster/tf_tc-setup.sh" + build: "taskcluster/tf_tc-build.sh --ios-x86_64" + package: "taskcluster/tf_tc-package.sh" + maxRunTime: 28800 + metadata: + name: "TensorFlow iOS x86_64 TFLite" + description: "Building TensorFlow for iOS x86_64, TFLite, optimized version" diff --git a/taskcluster/tf_linux-amd64-cpu-opt.yml b/taskcluster/tf_linux-amd64-cpu-opt.yml index d869d280..36b051b1 100644 --- a/taskcluster/tf_linux-amd64-cpu-opt.yml +++ b/taskcluster/tf_linux-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: ${tensorflow.packages_xenial.apt} && ${java.packages_xenial.apt} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 14400 metadata: diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index ad8085ee..cb620906 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -4,56 +4,48 @@ set -ex source $(dirname $0)/tf_tc-vars.sh -build_amd64=yes +build_amd64=no build_gpu=no build_android_arm=no build_android_arm64=no build_linux_arm=no build_linux_arm64=no +build_ios_arm64=no +build_ios_x86_64=no + +if [ "$1" = "--cpu" ]; then + build_amd64=yes +fi if [ "$1" = "--gpu" ]; then build_amd64=yes build_gpu=yes - build_android_arm=no - build_android_arm64=no - build_linux_arm=no - build_linux_arm64=no fi if [ "$1" = "--arm" ]; then build_amd64=yes - build_gpu=no - build_android_arm=no - build_android_arm64=no build_linux_arm=yes - build_linux_arm64=no fi if [ "$1" = "--arm64" ]; then build_amd64=yes - build_gpu=no - build_android_arm=no - build_android_arm64=no - build_linux_arm=no build_linux_arm64=yes fi if [ "$1" = "--android-armv7" ]; then - build_amd64=no - build_gpu=no build_android_arm=yes - build_android_arm64=no - build_linux_arm=no - build_linux_arm64=no fi if [ "$1" = "--android-arm64" ]; then - build_amd64=no - build_gpu=no - build_android_arm=no build_android_arm64=yes - build_linux_arm=no - build_linux_arm64=no +fi + +if [ "$1" = "--ios-arm64" ]; then + build_ios_arm64=yes +fi + +if [ "$1" = "--ios-x86_64" ]; then + build_ios_x86_64=yes fi pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ @@ -98,6 +90,14 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; + if [ "${build_ios_arm64}" = "yes" ]; then + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + fi; + + if [ "${build_ios_x86_64}" = "yes" ]; then + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + fi; + if [ $? -ne 0 ]; then # There was a failure, just account for it. echo "Build failure, please check the output above. Exit code was: $?" diff --git a/taskcluster/tf_tc-vars.sh b/taskcluster/tf_tc-vars.sh index 8150bb8d..8c30ea2a 100755 --- a/taskcluster/tf_tc-vars.sh +++ b/taskcluster/tf_tc-vars.sh @@ -171,6 +171,8 @@ BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_W BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_CUDA_FLAGS="--config=cuda" +BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --copt=-DTFLITE_WITH_RUY_GEMV" if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then # Somehow, even with Python being in the PATH, Bazel on windows struggles diff --git a/taskcluster/tf_win-amd64-cpu-opt.yml b/taskcluster/tf_win-amd64-cpu-opt.yml index 4b4d1105..99b4d8a3 100644 --- a/taskcluster/tf_win-amd64-cpu-opt.yml +++ b/taskcluster/tf_win-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: ${tensorflow.packages_win.pacman} && ${tensorflow.packages_win.msys64} scripts: setup: "taskcluster/tf_tc-setup.sh" - build: "taskcluster/tf_tc-build.sh" + build: "taskcluster/tf_tc-build.sh --cpu" package: "taskcluster/tf_tc-package.sh" maxRunTime: 14400 metadata: diff --git a/tensorflow b/tensorflow index 518c1d04..fc464111 160000 --- a/tensorflow +++ b/tensorflow @@ -1 +1 @@ -Subproject commit 518c1d04bf55d362bb11e973b8f5d0aa3e5bf44d +Subproject commit fc464111ac5c49791e44122e5946e521b25840bd From c85f95f781ee8c007502bcfc4ab2e24fc3e3e24c Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 9 Jun 2020 19:58:04 +0200 Subject: [PATCH 02/16] Add DeepSpeech iOS tasks --- taskcluster/ios-arm64-tflite-opt.yml | 21 +++++++++++++++++++++ taskcluster/ios-build.sh | 17 +++++++++++++++++ taskcluster/ios-x86_64-tflite-opt.yml | 21 +++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 taskcluster/ios-arm64-tflite-opt.yml create mode 100755 taskcluster/ios-build.sh create mode 100644 taskcluster/ios-x86_64-tflite-opt.yml diff --git a/taskcluster/ios-arm64-tflite-opt.yml b/taskcluster/ios-arm64-tflite-opt.yml new file mode 100644 index 00000000..dab0d24b --- /dev/null +++ b/taskcluster/ios-arm64-tflite-opt.yml @@ -0,0 +1,21 @@ +build: + template_file: darwin-opt-base.tyml + dependencies: + - "swig-darwin-amd64" + - "node-gyp-cache" + - "homebrew_builds-darwin-amd64" + - "pyenv-darwin-amd64" + - "tf_ios-arm64-opt" + routes: + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.ios_arm64-tflite" + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.ios_arm64-tflite" + - "index.project.deepspeech.deepspeech.native_client.ios_arm64-tflite.${event.head.sha}" + tensorflow: ${system.tensorflow.ios_arm64.url} + scripts: + build: "taskcluster/ios-build.sh" + package: "taskcluster/package.sh" + nc_asset_name: "native_client.arm64.tflite.ios.tar.xz" + maxRunTime: 14400 + metadata: + name: "DeepSpeech iOS ARM64 TFLite" + description: "Building DeepSpeech for iOS ARM64, TFLite, optimized version" diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh new file mode 100755 index 00000000..1b913130 --- /dev/null +++ b/taskcluster/ios-build.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh + +BAZEL_TARGETS=" +//native_client:libdeepspeech.so +" + +BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" + +BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" + +do_bazel_build diff --git a/taskcluster/ios-x86_64-tflite-opt.yml b/taskcluster/ios-x86_64-tflite-opt.yml new file mode 100644 index 00000000..144f0bd5 --- /dev/null +++ b/taskcluster/ios-x86_64-tflite-opt.yml @@ -0,0 +1,21 @@ +build: + template_file: darwin-opt-base.tyml + dependencies: + - "swig-darwin-amd64" + - "node-gyp-cache" + - "homebrew_builds-darwin-amd64" + - "pyenv-darwin-amd64" + - "tf_ios-x86_64-opt" + routes: + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.ios_x86_64-tflite" + - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.ios_x86_64-tflite" + - "index.project.deepspeech.deepspeech.native_client.ios_x86_64-tflite.${event.head.sha}" + tensorflow: ${system.tensorflow.ios_x86_64.url} + scripts: + build: "taskcluster/ios-build.sh" + package: "taskcluster/package.sh" + nc_asset_name: "native_client.x86_64.tflite.ios.tar.xz" + maxRunTime: 14400 + metadata: + name: "DeepSpeech iOS x86_64 TFLite" + description: "Building DeepSpeech for iOS x86_64, TFLite, optimized version" From 4ca0f94d78658d4b57c784197046c669122c7b7d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 9 Jun 2020 20:41:36 +0200 Subject: [PATCH 03/16] client.cc iOS build --- native_client/client.cc | 10 +++++++--- native_client/definitions.mk | 22 ++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 1b335955..46a16115 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -12,7 +12,11 @@ #include #include -#if defined(__ANDROID__) || defined(_MSC_VER) +#ifdef __APPLE__ +#include +#endif + +#if defined(__ANDROID__) || defined(_MSC_VER) || TARGET_OS_IPHONE #define NO_SOX #endif @@ -244,7 +248,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) sox_false // Reverse endianness }; -#ifdef __APPLE__ +#if TARGET_OS_OSX // It would be preferable to use sox_open_memstream_write here, but OS-X // doesn't support POSIX 2008, which it requires. See Issue #461. // Instead, we write to a temporary file. @@ -348,7 +352,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) fclose(wave); #endif // NO_SOX -#ifdef __APPLE__ +#if TARGET_OS_OSX res.buffer_size = (size_t)(output->olength * 2); res.buffer = (char*)malloc(sizeof(char) * res.buffer_size); FILE* output_file = fopen(output_name, "rb"); diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 2f6afbf4..0c8ab656 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -101,6 +101,20 @@ NODE_PLATFORM_TARGET := --target_arch=arm64 --target_platform=linux TOOLCHAIN_LDD_OPTS := --root $(RASPBIAN)/ endif # ($(TARGET),rpi3-armv8) +ifeq ($(TARGET),ios-simulator) +CFLAGS := -isysroot $(shell xcrun -sdk iphonesimulator13.5 -show-sdk-path) +SOX_CFLAGS := +SOX_LDFLAGS := +LDFLAGS := +endif + +ifeq ($(TARGET),ios-arm64) +CFLAGS := -target arm64-apple-ios -isysroot $(shell xcrun -sdk iphoneos13.5 -show-sdk-path) +SOX_CFLAGS := +SOX_LDFLAGS := +LDFLAGS := +endif + # -Wl,--no-as-needed is required to force linker not to evict libs it thinks we # dont need ; will fail the build on OSX because that option does not exists ifeq ($(OS),Linux) @@ -108,9 +122,13 @@ LDFLAGS_NEEDED := -Wl,--no-as-needed LDFLAGS_RPATH := -Wl,-rpath,\$$ORIGIN endif ifeq ($(OS),Darwin) -CXXFLAGS += -stdlib=libc++ -mmacosx-version-min=10.10 -LDFLAGS_NEEDED := -stdlib=libc++ -mmacosx-version-min=10.10 +CXXFLAGS += -stdlib=libc++ +LDFLAGS_NEEDED := -stdlib=libc++ LDFLAGS_RPATH := -Wl,-rpath,@executable_path +ifeq ($(TARGET),host) +CXXFLAGS += -mmacosx-version-min=10.10 +LDFLAGS_NEEDED += -mmacosx-version-min=10.10 +endif endif CFLAGS += $(EXTRA_CFLAGS) From a1aa873259a96d12969039fdc958d9a2d9243ec3 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 14 Jun 2020 11:46:12 +0200 Subject: [PATCH 04/16] Embed bitcode when linking --- native_client/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/native_client/BUILD b/native_client/BUILD index b38979e5..92eb788c 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -130,6 +130,7 @@ tf_cc_shared_object( }) + tflite_copts(), linkopts = select({ "//tensorflow:macos": [], + "//tensorflow:ios": ["-fembed-bitcode"], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, "//tensorflow:rpi3": LINUX_LINKOPTS, "//tensorflow:rpi3-armv8": LINUX_LINKOPTS, From a274c26a89c7fb4334473b8f715325c8e1e5c30d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 14 Jun 2020 12:05:45 +0200 Subject: [PATCH 05/16] Add Swift wrapper framework --- native_client/swift/.gitignore | 4 + .../deepspeech_ios.xcodeproj/project.pbxproj | 499 ++++++++++++++ .../contents.xcworkspacedata | 7 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcschemes/deepspeech_ios.xcscheme | 77 +++ .../contents.xcworkspacedata | 10 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcshareddata/WorkspaceSettings.xcsettings | 8 + .../swift/deepspeech_ios/DeepSpeech.swift | 301 +++++++++ native_client/swift/deepspeech_ios/Info.plist | 22 + .../swift/deepspeech_ios/deepspeech.h | 357 ++++++++++ .../swift/deepspeech_ios/deepspeech_ios.h | 13 + .../deepspeech_ios/deepspeech_ios.modulemap | 12 + .../project.pbxproj | 637 ++++++++++++++++++ .../contents.xcworkspacedata | 7 + .../xcshareddata/IDEWorkspaceChecks.plist | 8 + .../xcschemes/deepspeech_ios_test.xcscheme | 106 +++ .../deepspeech_ios_test/AppDelegate.swift | 255 +++++++ .../AppIcon.appiconset/Contents.json | 98 +++ .../Assets.xcassets/Contents.json | 6 + .../Base.lproj/LaunchScreen.storyboard | 25 + .../deepspeech_ios_test/ContentView.swift | 21 + .../deepspeech_ios_test/Info.plist | 60 ++ .../Preview Assets.xcassets/Contents.json | 6 + .../deepspeech_ios_test/SceneDelegate.swift | 64 ++ .../deepspeech_ios_testTests/Info.plist | 22 + .../deepspeech_ios_testTests.swift | 34 + .../deepspeech_ios_testUITests/Info.plist | 22 + .../deepspeech_ios_testUITests.swift | 43 ++ 29 files changed, 2740 insertions(+) create mode 100644 native_client/swift/.gitignore create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings create mode 100644 native_client/swift/deepspeech_ios/DeepSpeech.swift create mode 100644 native_client/swift/deepspeech_ios/Info.plist create mode 100644 native_client/swift/deepspeech_ios/deepspeech.h create mode 100644 native_client/swift/deepspeech_ios/deepspeech_ios.h create mode 100644 native_client/swift/deepspeech_ios/deepspeech_ios.modulemap create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist create mode 100644 native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift diff --git a/native_client/swift/.gitignore b/native_client/swift/.gitignore new file mode 100644 index 00000000..0351cff4 --- /dev/null +++ b/native_client/swift/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +build/ +xcuserdata/ +/deepspeech_ios/libdeepspeech.dylib diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj new file mode 100644 index 00000000..59927e9e --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.pbxproj @@ -0,0 +1,499 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 505B136124960D550007DADA /* deepspeech_ios.framework */; }; + 505B137224960D550007DADA /* deepspeech_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* deepspeech_ios.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 505B137D24961AF20007DADA /* deepspeech.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* deepspeech.h */; settings = {ATTRIBUTES = (Private, ); }; }; + 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* DeepSpeech.swift */; }; + 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD39A24B61FA100409BBB /* libdeepspeech.so */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 505B136C24960D550007DADA /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 505B135824960D550007DADA /* Project object */; + proxyType = 1; + remoteGlobalIDString = 505B136024960D550007DADA; + remoteInfo = deepspeech_ios; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 505B138A249628290007DADA /* Embed Libraries */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 12; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + ); + name = "Embed Libraries"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 505B136124960D550007DADA /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 505B136424960D550007DADA /* deepspeech_ios.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = deepspeech_ios.h; sourceTree = ""; }; + 505B136524960D550007DADA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_iosTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 505B137B249619C90007DADA /* deepspeech_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = deepspeech_ios.modulemap; sourceTree = ""; }; + 505B137C24961AF20007DADA /* deepspeech.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = deepspeech.h; path = ../../deepspeech.h; sourceTree = ""; }; + 505B137E24961BA70007DADA /* DeepSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepSpeech.swift; sourceTree = ""; }; + 507CD39A24B61FA100409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 505B135E24960D550007DADA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 507CD39B24B61FA100409BBB /* libdeepspeech.so in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136724960D550007DADA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 505B135724960D550007DADA = { + isa = PBXGroup; + children = ( + 505B136324960D550007DADA /* deepspeech_ios */, + 505B136224960D550007DADA /* Products */, + 505B1380249620C60007DADA /* Frameworks */, + ); + sourceTree = ""; + }; + 505B136224960D550007DADA /* Products */ = { + isa = PBXGroup; + children = ( + 505B136124960D550007DADA /* deepspeech_ios.framework */, + 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 505B136324960D550007DADA /* deepspeech_ios */ = { + isa = PBXGroup; + children = ( + 505B137C24961AF20007DADA /* deepspeech.h */, + 505B136424960D550007DADA /* deepspeech_ios.h */, + 505B137E24961BA70007DADA /* DeepSpeech.swift */, + 505B137B249619C90007DADA /* deepspeech_ios.modulemap */, + 505B136524960D550007DADA /* Info.plist */, + ); + path = deepspeech_ios; + sourceTree = ""; + }; + 505B1380249620C60007DADA /* Frameworks */ = { + isa = PBXGroup; + children = ( + 507CD39A24B61FA100409BBB /* libdeepspeech.so */, + ); + name = Frameworks; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 505B135C24960D550007DADA /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B137224960D550007DADA /* deepspeech_ios.h in Headers */, + 505B137D24961AF20007DADA /* deepspeech.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 505B136024960D550007DADA /* deepspeech_ios */ = { + isa = PBXNativeTarget; + buildConfigurationList = 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */; + buildPhases = ( + 505B135C24960D550007DADA /* Headers */, + 505B135D24960D550007DADA /* Sources */, + 505B135E24960D550007DADA /* Frameworks */, + 505B135F24960D550007DADA /* Resources */, + 505B138A249628290007DADA /* Embed Libraries */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = deepspeech_ios; + productName = deepspeech_ios; + productReference = 505B136124960D550007DADA /* deepspeech_ios.framework */; + productType = "com.apple.product-type.framework"; + }; + 505B136924960D550007DADA /* deepspeech_iosTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */; + buildPhases = ( + 505B136624960D550007DADA /* Sources */, + 505B136724960D550007DADA /* Frameworks */, + 505B136824960D550007DADA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 505B136D24960D550007DADA /* PBXTargetDependency */, + ); + name = deepspeech_iosTests; + productName = deepspeech_iosTests; + productReference = 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 505B135824960D550007DADA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1150; + LastUpgradeCheck = 1150; + ORGANIZATIONNAME = Mozilla; + TargetAttributes = { + 505B136024960D550007DADA = { + CreatedOnToolsVersion = 11.5; + LastSwiftMigration = 1150; + }; + 505B136924960D550007DADA = { + CreatedOnToolsVersion = 11.5; + }; + }; + }; + buildConfigurationList = 505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 505B135724960D550007DADA; + productRefGroup = 505B136224960D550007DADA /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 505B136024960D550007DADA /* deepspeech_ios */, + 505B136924960D550007DADA /* deepspeech_iosTests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 505B135F24960D550007DADA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136824960D550007DADA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 505B135D24960D550007DADA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 505B136624960D550007DADA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 505B136D24960D550007DADA /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 505B136024960D550007DADA /* deepspeech_ios */; + targetProxy = 505B136C24960D550007DADA /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 505B137324960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Debug; + }; + 505B137424960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Release; + }; + 505B137624960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = AWCG9S27P7; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + INFOPLIST_FILE = deepspeech_ios/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/deepspeech_ios", + "$(PROJECT_DIR)", + ); + MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 505B137724960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_IDENTITY = "Apple Development"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = AWCG9S27P7; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + INFOPLIST_FILE = deepspeech_ios/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)/deepspeech_ios", + "$(PROJECT_DIR)", + ); + MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap; + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios"; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; + 505B137924960D550007DADA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_iosTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 505B137A24960D550007DADA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_iosTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-iosTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137324960D550007DADA /* Debug */, + 505B137424960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137624960D550007DADA /* Debug */, + 505B137724960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 505B137924960D550007DADA /* Debug */, + 505B137A24960D550007DADA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 505B135824960D550007DADA /* Project object */; +} diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..e763e6ba --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme new file mode 100644 index 00000000..b3ba3705 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..73975e36 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,10 @@ + + + + + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings new file mode 100644 index 00000000..f9b0d7c5 --- /dev/null +++ b/native_client/swift/deepspeech_ios.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings @@ -0,0 +1,8 @@ + + + + + PreviewsEnabled + + + diff --git a/native_client/swift/deepspeech_ios/DeepSpeech.swift b/native_client/swift/deepspeech_ios/DeepSpeech.swift new file mode 100644 index 00000000..b694995b --- /dev/null +++ b/native_client/swift/deepspeech_ios/DeepSpeech.swift @@ -0,0 +1,301 @@ +// +// DeepSpeech.swift +// deepspeech_ios +// +// Created by Reuben Morais on 14.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import deepspeech_ios.libdeepspeech_Private + +public enum DeepSpeechError: Error { + // Should be kept in sync with deepspeech.h + case noModel(errorCode: Int32) + case invalidAlphabet(errorCode: Int32) + case invalidShape(errorCode: Int32) + case invalidScorer(errorCode: Int32) + case modelIncompatible(errorCode: Int32) + case scorerNotEnabled(errorCode: Int32) + case scorerUnreadable(errorCode: Int32) + case scorerInvalidLm(errorCode: Int32) + case scorerNoTrie(errorCode: Int32) + case scorerInvalidTrie(errorCode: Int32) + case scorerVersionMismatch(errorCode: Int32) + case failInitMmap(errorCode: Int32) + case failInitSess(errorCode: Int32) + case failInterpreter(errorCode: Int32) + case failRunSess(errorCode: Int32) + case failCreateStream(errorCode: Int32) + case failReadProtobuf(errorCode: Int32) + case failCreateSess(errorCode: Int32) + case failCreateModel(errorCode: Int32) + + // Additional case for invalid error codes, should never happen unless the user has mixed header and binary versions + case invalidErrorCode(errorCode: Int32) +} + +extension DeepSpeechError : LocalizedError { + public var errorDescription: String? { + switch self { + case .noModel(let errorCode), + .invalidAlphabet(let errorCode), + .invalidShape(let errorCode), + .invalidScorer(let errorCode), + .modelIncompatible(let errorCode), + .scorerNotEnabled(let errorCode), + .scorerUnreadable(let errorCode), + .scorerInvalidLm(let errorCode), + .scorerNoTrie(let errorCode), + .scorerInvalidTrie(let errorCode), + .scorerVersionMismatch(let errorCode), + .failInitMmap(let errorCode), + .failInitSess(let errorCode), + .failInterpreter(let errorCode), + .failRunSess(let errorCode), + .failCreateStream(let errorCode), + .failReadProtobuf(let errorCode), + .failCreateSess(let errorCode), + .failCreateModel(let errorCode), + .invalidErrorCode(let errorCode): + let result = DS_ErrorCodeToErrorMessage(errorCode) + defer { DS_FreeString(result) } + return String(cString: result!) + } + } +} + +private func errorCodeToEnum(errorCode: Int32) -> DeepSpeechError { + switch Int(errorCode) { + case Int(DS_ERR_NO_MODEL.rawValue): + return DeepSpeechError.noModel(errorCode: errorCode) + case Int(DS_ERR_INVALID_ALPHABET.rawValue): + return DeepSpeechError.invalidAlphabet(errorCode: errorCode) + case Int(DS_ERR_INVALID_SHAPE.rawValue): + return DeepSpeechError.invalidShape(errorCode: errorCode) + case Int(DS_ERR_INVALID_SCORER.rawValue): + return DeepSpeechError.invalidScorer(errorCode: errorCode) + case Int(DS_ERR_MODEL_INCOMPATIBLE.rawValue): + return DeepSpeechError.modelIncompatible(errorCode: errorCode) + case Int(DS_ERR_SCORER_NOT_ENABLED.rawValue): + return DeepSpeechError.scorerNotEnabled(errorCode: errorCode) + case Int(DS_ERR_SCORER_UNREADABLE.rawValue): + return DeepSpeechError.scorerUnreadable(errorCode: errorCode) + case Int(DS_ERR_SCORER_INVALID_LM.rawValue): + return DeepSpeechError.scorerInvalidLm(errorCode: errorCode) + case Int(DS_ERR_SCORER_NO_TRIE.rawValue): + return DeepSpeechError.scorerNoTrie(errorCode: errorCode) + case Int(DS_ERR_SCORER_INVALID_TRIE.rawValue): + return DeepSpeechError.scorerInvalidTrie(errorCode: errorCode) + case Int(DS_ERR_SCORER_VERSION_MISMATCH.rawValue): + return DeepSpeechError.scorerVersionMismatch(errorCode: errorCode) + case Int(DS_ERR_FAIL_INIT_MMAP.rawValue): + return DeepSpeechError.failInitMmap(errorCode: errorCode) + case Int(DS_ERR_FAIL_INIT_SESS.rawValue): + return DeepSpeechError.failInitSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_INTERPRETER.rawValue): + return DeepSpeechError.failInterpreter(errorCode: errorCode) + case Int(DS_ERR_FAIL_RUN_SESS.rawValue): + return DeepSpeechError.failRunSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_STREAM.rawValue): + return DeepSpeechError.failCreateStream(errorCode: errorCode) + case Int(DS_ERR_FAIL_READ_PROTOBUF.rawValue): + return DeepSpeechError.failReadProtobuf(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_SESS.rawValue): + return DeepSpeechError.failCreateSess(errorCode: errorCode) + case Int(DS_ERR_FAIL_CREATE_MODEL.rawValue): + return DeepSpeechError.failCreateModel(errorCode: errorCode) + default: + return DeepSpeechError.invalidErrorCode(errorCode: errorCode) + } +} + +private func evaluateErrorCode(errorCode: Int32) throws { + if errorCode != Int32(DS_ERR_OK.rawValue) { + throw errorCodeToEnum(errorCode: errorCode) + } +} + +public struct DeepSpeechTokenMetadata { + let text: String + let timestep: Int + let startTime: Float + + internal init(fromInternal: TokenMetadata) { + text = String(cString: fromInternal.text) + timestep = Int(fromInternal.timestep) + startTime = fromInternal.start_time + } +} + +public struct DeepSpeechCandidateTranscript { + private(set) var tokens: [DeepSpeechTokenMetadata] = [] + let confidence: Double + + internal init(fromInternal: CandidateTranscript) { + let tokensBuffer = UnsafeBufferPointer(start: fromInternal.tokens, count: Int(fromInternal.num_tokens)) + for tok in tokensBuffer { + tokens.append(DeepSpeechTokenMetadata(fromInternal: tok)) + } + confidence = fromInternal.confidence + } +} + +public struct DeepSpeechMetadata { + private(set) var transcripts: [DeepSpeechCandidateTranscript] = [] + + internal init(fromInternal: UnsafeMutablePointer) { + let md = fromInternal.pointee + let transcriptsBuffer = UnsafeBufferPointer(start: md.transcripts, count: Int(md.num_transcripts)) + + for tr in transcriptsBuffer { + transcripts.append(DeepSpeechCandidateTranscript(fromInternal: tr)) + } + } +} + +public class DeepSpeechStream { + private var streamCtx: OpaquePointer! + + internal init(streamContext: OpaquePointer) { + streamCtx = streamContext + } + + deinit { + if streamCtx != nil { + DS_FreeStream(streamCtx) + streamCtx = nil + } + } + + public func feedAudioContent(buffer: Array) { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + buffer.withUnsafeBufferPointer { unsafeBufferPointer in + feedAudioContent(buffer: unsafeBufferPointer) + } + } + + public func feedAudioContent(buffer: UnsafeBufferPointer) { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) + } + + public func intermediateDecode() -> String { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_IntermediateDecode(streamCtx) + defer { DS_FreeString(result) } + return String(cString: result!) + } + + public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata { + precondition(streamCtx != nil, "calling method on invalidated Stream") + let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + + public func finishStream() -> String { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_FinishStream(streamCtx) + defer { + DS_FreeString(result) + streamCtx = nil + } + return String(cString: result!) + } +} + +public class DeepSpeechModel { + private var modelCtx: OpaquePointer! + + public init(modelPath: String) throws { + let err = DS_CreateModel(modelPath, &modelCtx) + try evaluateErrorCode(errorCode: err) + } + + deinit { + DS_FreeModel(modelCtx) + modelCtx = nil + } + + public func getBeamWidth() -> Int { + return Int(DS_GetModelBeamWidth(modelCtx)) + } + + public func setBeamWidth(beamWidth: Int) throws { + let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) + try evaluateErrorCode(errorCode: err) + } + + public var sampleRate: Int { + get { + return Int(DS_GetModelSampleRate(modelCtx)) + } + } + + public func enableExternalScorer(scorerPath: String) throws { + let err = DS_EnableExternalScorer(modelCtx, scorerPath) + try evaluateErrorCode(errorCode: err) + } + + public func disableExternalScorer() throws { + let err = DS_DisableExternalScorer(modelCtx) + try evaluateErrorCode(errorCode: err) + } + + public func setScorerAlphaBeta(alpha: Float, beta: Float) throws { + let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta) + try evaluateErrorCode(errorCode: err) + } + + public func speechToText(buffer: Array) -> String { + return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> String in + return speechToText(buffer: unsafeBufferPointer) + } + } + + public func speechToText(buffer: UnsafeBufferPointer) -> String { + let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) + defer { DS_FreeString(result) } + return String(cString: result!) + } + + public func speechToTextWithMetadata(buffer: Array, numResults: Int) -> DeepSpeechMetadata { + return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> DeepSpeechMetadata in + let result = DS_SpeechToTextWithMetadata(modelCtx, unsafeBufferPointer.baseAddress, UInt32(buffer.count), UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + } + + public func createStream() throws -> DeepSpeechStream { + var streamContext: OpaquePointer! + let err = DS_CreateStream(modelCtx, &streamContext) + try evaluateErrorCode(errorCode: err) + return DeepSpeechStream(streamContext: streamContext) + } + + public class func open(path: String, scorerPath: Optional = nil) -> OpaquePointer { + var fooOpaque: OpaquePointer! + DS_CreateModel(path, &fooOpaque) + if let scorerPath = scorerPath { + DS_EnableExternalScorer(fooOpaque, scorerPath) + } + return fooOpaque + } + + public class func createStream(modelState: OpaquePointer) -> OpaquePointer { + var fooOpaque: OpaquePointer! + DS_CreateStream(modelState, &fooOpaque) + return fooOpaque + } +} + +public func DeepSpeechVersion() -> String { + let result = DS_Version() + defer { DS_FreeString(result) } + return String(cString: result!) +} diff --git a/native_client/swift/deepspeech_ios/Info.plist b/native_client/swift/deepspeech_ios/Info.plist new file mode 100644 index 00000000..9bcb2444 --- /dev/null +++ b/native_client/swift/deepspeech_ios/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + $(CURRENT_PROJECT_VERSION) + + diff --git a/native_client/swift/deepspeech_ios/deepspeech.h b/native_client/swift/deepspeech_ios/deepspeech.h new file mode 100644 index 00000000..1df3cf2e --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech.h @@ -0,0 +1,357 @@ +#ifndef DEEPSPEECH_H +#define DEEPSPEECH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SWIG + #if defined _MSC_VER + #define DEEPSPEECH_EXPORT __declspec(dllexport) + #else + #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) + #endif /*End of _MSC_VER*/ +#else + #define DEEPSPEECH_EXPORT +#endif + +typedef struct ModelState ModelState; + +typedef struct StreamingState StreamingState; + +/** + * @brief Stores text of an individual token, along with its timing information + */ +typedef struct TokenMetadata { + /** The text corresponding to this token */ + const char* const text; + + /** Position of the token in units of 20ms */ + const unsigned int timestep; + + /** Position of the token in seconds */ + const float start_time; +} TokenMetadata; + +/** + * @brief A single transcript computed by the model, including a confidence + * value and the metadata for its constituent tokens. + */ +typedef struct CandidateTranscript { + /** Array of TokenMetadata objects */ + const TokenMetadata* const tokens; + /** Size of the tokens array */ + const unsigned int num_tokens; + /** Approximated confidence value for this transcript. This is roughly the + * sum of the acoustic model logit values for each timestep/character that + * contributed to the creation of this transcript. + */ + const double confidence; +} CandidateTranscript; + +/** + * @brief An array of CandidateTranscript objects computed by the model. + */ +typedef struct Metadata { + /** Array of CandidateTranscript objects */ + const CandidateTranscript* const transcripts; + /** Size of the transcripts array */ + const unsigned int num_transcripts; +} Metadata; + +// sphinx-doc: error_code_listing_start + +#define DS_FOR_EACH_ERROR(APPLY) \ + APPLY(DS_ERR_OK, 0x0000, "No error.") \ + APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") + +// sphinx-doc: error_code_listing_end + +enum DeepSpeech_Error_Codes +{ +#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, +DS_FOR_EACH_ERROR(DEFINE) +#undef DEFINE +}; + +/** + * @brief An object providing an interface to a trained DeepSpeech model. + * + * @param aModelPath The path to the frozen model graph. + * @param[out] retval a ModelState pointer + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateModel(const char* aModelPath, + ModelState** retval); + +/** + * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + * was not called before, will return the default value loaded from the + * model file. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Beam width value used by the model. + */ +DEEPSPEECH_EXPORT +unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); + +/** + * @brief Set beam width value used by the model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aBeamWidth The beam width used by the model. A larger beam width value + * generates better results at the cost of decoding time. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetModelBeamWidth(ModelState* aCtx, + unsigned int aBeamWidth); + +/** + * @brief Return the sample rate expected by a model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Sample rate expected by the model for its input. + */ +DEEPSPEECH_EXPORT +int DS_GetModelSampleRate(const ModelState* aCtx); + +/** + * @brief Frees associated resources and destroys model object. + */ +DEEPSPEECH_EXPORT +void DS_FreeModel(ModelState* ctx); + +/** + * @brief Enable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aScorerPath The path to the external scorer file. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath); + +/** + * @brief Disable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_DisableExternalScorer(ModelState* aCtx); + +/** + * @brief Set hyperparameters alpha and beta of the external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. + * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta); + +/** + * @brief Use the DeepSpeech model to convert speech to text. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. Returns NULL on error. + */ +DEEPSPEECH_EXPORT +char* DS_SpeechToText(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Use the DeepSpeech model to convert speech to text and output results + * including metadata. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. + * + * @return Metadata struct containing multiple CandidateTranscript structs. Each + * transcript has per-token metadata including timing information. The + * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize, + unsigned int aNumResults); + +/** + * @brief Create a new streaming inference state. The streaming state returned + * by this function can then be passed to {@link DS_FeedAudioContent()} + * and {@link DS_FinishStream()}. + * + * @param aCtx The ModelState pointer for the model to use. + * @param[out] retval an opaque pointer that represents the streaming state. Can + * be NULL if an error occurs. + * + * @return Zero for success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateStream(ModelState* aCtx, + StreamingState** retval); + +/** + * @brief Feed audio samples to an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aBuffer An array of 16-bit, mono raw audio samples at the + * appropriate sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in @p aBuffer. + */ +DEEPSPEECH_EXPORT +void DS_FeedAudioContent(StreamingState* aSctx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT intermediate result. The user is responsible for freeing the + * string using {@link DS_FreeString()}. + */ +DEEPSPEECH_EXPORT +char* DS_IntermediateDecode(const StreamingState* aSctx); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference, + * return results including metadata. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * the result. Signals the end of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +char* DS_FinishStream(StreamingState* aSctx); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * results including metadata. Signals the end of an ongoing streaming + * inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Destroy a streaming state without decoding the computed logits. This + * can be used if you no longer need the result of an ongoing streaming + * inference and don't want to perform a costly decode operation. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +void DS_FreeStream(StreamingState* aSctx); + +/** + * @brief Free memory allocated for metadata information. + */ +DEEPSPEECH_EXPORT +void DS_FreeMetadata(Metadata* m); + +/** + * @brief Free a char* string returned by the DeepSpeech API. + */ +DEEPSPEECH_EXPORT +void DS_FreeString(char* str); + +/** + * @brief Returns the version of this library. The returned version is a semantic + * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + * + * @return The version string. + */ +DEEPSPEECH_EXPORT +char* DS_Version(); + +/** + * @brief Returns a textual description corresponding to an error code. + * The string returned must be freed with @{link DS_FreeString()}. + * + * @return The error description. + */ +DEEPSPEECH_EXPORT +char* DS_ErrorCodeToErrorMessage(int aErrorCode); + +#undef DEEPSPEECH_EXPORT + +#ifdef __cplusplus +} +#endif + +#endif /* DEEPSPEECH_H */ diff --git a/native_client/swift/deepspeech_ios/deepspeech_ios.h b/native_client/swift/deepspeech_ios/deepspeech_ios.h new file mode 100644 index 00000000..a40fb954 --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech_ios.h @@ -0,0 +1,13 @@ +// +// deepspeech_ios.h +// deepspeech_ios +// +// Created by Reuben Morais on 14.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +#import + +// In this header, you should import all the public headers of your framework using statements like #import + + diff --git a/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap new file mode 100644 index 00000000..078ac915 --- /dev/null +++ b/native_client/swift/deepspeech_ios/deepspeech_ios.modulemap @@ -0,0 +1,12 @@ +framework module deepspeech_ios { + umbrella header "deepspeech_ios.h" + + export * + module * { export * } + + explicit module libdeepspeech_Private { + header "deepspeech.h" + export * + link "deepspeech" + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj new file mode 100644 index 00000000..e9a7d0a2 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -0,0 +1,637 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; }; + 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; }; + 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; + 50F787F32497683900D52237 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F22497683900D52237 /* AppDelegate.swift */; }; + 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F42497683900D52237 /* SceneDelegate.swift */; }; + 50F787F72497683900D52237 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F62497683900D52237 /* ContentView.swift */; }; + 50F787F92497683A00D52237 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787F82497683A00D52237 /* Assets.xcassets */; }; + 50F787FC2497683A00D52237 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FB2497683A00D52237 /* Preview Assets.xcassets */; }; + 50F787FF2497683A00D52237 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */; }; + 50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */; }; + 50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 50F788062497683A00D52237 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 50F787E72497683900D52237 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 50F787EE2497683900D52237; + remoteInfo = deepspeech_ios_test; + }; + 50F788112497683A00D52237 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 50F787E72497683900D52237 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 50F787EE2497683900D52237; + remoteInfo = deepspeech_ios_test; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 50F2B10E2498EB59007CD876 /* Embed Frameworks */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = ""; + dstSubfolderSpec = 10; + files = ( + 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */, + ); + name = "Embed Frameworks"; + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = ../libdeepspeech.so; sourceTree = ""; }; + 50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; + 50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; + 50F787F62497683900D52237 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; + 50F787F82497683A00D52237 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 50F787FB2497683A00D52237 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 50F787FE2497683A00D52237 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 50F788002497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testTests.swift; sourceTree = ""; }; + 50F7880B2497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testUITests.swift; sourceTree = ""; }; + 50F788162497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 50F787EC2497683900D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */, + 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788022497683A00D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880D2497683A00D52237 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 50F2B0FC2498D6C7007CD876 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */, + 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 50F787E62497683900D52237 = { + isa = PBXGroup; + children = ( + 50F787F12497683900D52237 /* deepspeech_ios_test */, + 50F788082497683A00D52237 /* deepspeech_ios_testTests */, + 50F788132497683A00D52237 /* deepspeech_ios_testUITests */, + 50F787F02497683900D52237 /* Products */, + 50F2B0FC2498D6C7007CD876 /* Frameworks */, + ); + sourceTree = ""; + }; + 50F787F02497683900D52237 /* Products */ = { + isa = PBXGroup; + children = ( + 50F787EF2497683900D52237 /* deepspeech_ios_test.app */, + 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */, + 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 50F787F12497683900D52237 /* deepspeech_ios_test */ = { + isa = PBXGroup; + children = ( + 50F787F22497683900D52237 /* AppDelegate.swift */, + 50F787F42497683900D52237 /* SceneDelegate.swift */, + 50F787F62497683900D52237 /* ContentView.swift */, + 50F787F82497683A00D52237 /* Assets.xcassets */, + 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */, + 50F788002497683A00D52237 /* Info.plist */, + 50F787FA2497683A00D52237 /* Preview Content */, + ); + path = deepspeech_ios_test; + sourceTree = ""; + }; + 50F787FA2497683A00D52237 /* Preview Content */ = { + isa = PBXGroup; + children = ( + 50F787FB2497683A00D52237 /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 50F788082497683A00D52237 /* deepspeech_ios_testTests */ = { + isa = PBXGroup; + children = ( + 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */, + 50F7880B2497683A00D52237 /* Info.plist */, + ); + path = deepspeech_ios_testTests; + sourceTree = ""; + }; + 50F788132497683A00D52237 /* deepspeech_ios_testUITests */ = { + isa = PBXGroup; + children = ( + 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */, + 50F788162497683A00D52237 /* Info.plist */, + ); + path = deepspeech_ios_testUITests; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 50F787EE2497683900D52237 /* deepspeech_ios_test */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */; + buildPhases = ( + 50F787EB2497683900D52237 /* Sources */, + 50F787EC2497683900D52237 /* Frameworks */, + 50F787ED2497683900D52237 /* Resources */, + 50F2B10E2498EB59007CD876 /* Embed Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = deepspeech_ios_test; + productName = deepspeech_ios_test; + productReference = 50F787EF2497683900D52237 /* deepspeech_ios_test.app */; + productType = "com.apple.product-type.application"; + }; + 50F788042497683A00D52237 /* deepspeech_ios_testTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */; + buildPhases = ( + 50F788012497683A00D52237 /* Sources */, + 50F788022497683A00D52237 /* Frameworks */, + 50F788032497683A00D52237 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 50F788072497683A00D52237 /* PBXTargetDependency */, + ); + name = deepspeech_ios_testTests; + productName = deepspeech_ios_testTests; + productReference = 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; + 50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */; + buildPhases = ( + 50F7880C2497683A00D52237 /* Sources */, + 50F7880D2497683A00D52237 /* Frameworks */, + 50F7880E2497683A00D52237 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 50F788122497683A00D52237 /* PBXTargetDependency */, + ); + name = deepspeech_ios_testUITests; + productName = deepspeech_ios_testUITests; + productReference = 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */; + productType = "com.apple.product-type.bundle.ui-testing"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 50F787E72497683900D52237 /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1150; + LastUpgradeCheck = 1150; + ORGANIZATIONNAME = Mozilla; + TargetAttributes = { + 50F787EE2497683900D52237 = { + CreatedOnToolsVersion = 11.5; + }; + 50F788042497683A00D52237 = { + CreatedOnToolsVersion = 11.5; + TestTargetID = 50F787EE2497683900D52237; + }; + 50F7880F2497683A00D52237 = { + CreatedOnToolsVersion = 11.5; + TestTargetID = 50F787EE2497683900D52237; + }; + }; + }; + buildConfigurationList = 50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 50F787E62497683900D52237; + productRefGroup = 50F787F02497683900D52237 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 50F787EE2497683900D52237 /* deepspeech_ios_test */, + 50F788042497683A00D52237 /* deepspeech_ios_testTests */, + 50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 50F787ED2497683900D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F787FF2497683A00D52237 /* LaunchScreen.storyboard in Resources */, + 50F787FC2497683A00D52237 /* Preview Assets.xcassets in Resources */, + 50F787F92497683A00D52237 /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788032497683A00D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880E2497683A00D52237 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 50F787EB2497683900D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F787F32497683900D52237 /* AppDelegate.swift in Sources */, + 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */, + 50F787F72497683900D52237 /* ContentView.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F788012497683A00D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 50F7880C2497683A00D52237 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 50F788072497683A00D52237 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 50F787EE2497683900D52237 /* deepspeech_ios_test */; + targetProxy = 50F788062497683A00D52237 /* PBXContainerItemProxy */; + }; + 50F788122497683A00D52237 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 50F787EE2497683900D52237 /* deepspeech_ios_test */; + targetProxy = 50F788112497683A00D52237 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin PBXVariantGroup section */ + 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 50F787FE2497683A00D52237 /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 50F788172497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 50F788182497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 50F7881A2497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\""; + DEVELOPMENT_TEAM = AWCG9S27P7; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = deepspeech_ios_test/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 50F7881B2497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\""; + DEVELOPMENT_TEAM = AWCG9S27P7; + ENABLE_PREVIEWS = YES; + FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)"; + INFOPLIST_FILE = deepspeech_ios_test/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + LIBRARY_SEARCH_PATHS = ( + "$(inherited)", + "$(PROJECT_DIR)", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-test"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; + 50F7881D2497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test"; + }; + name = Debug; + }; + 50F7881E2497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 13.5; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testTests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test"; + }; + name = Release; + }; + 50F788202497683A00D52237 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = deepspeech_ios_test; + }; + name = Debug; + }; + 50F788212497683A00D52237 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + "@loader_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.mozilla.deepspeech-ios-testUITests"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + TEST_TARGET_NAME = deepspeech_ios_test; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F788172497683A00D52237 /* Debug */, + 50F788182497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F7881A2497683A00D52237 /* Debug */, + 50F7881B2497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F7881D2497683A00D52237 /* Debug */, + 50F7881E2497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 50F788202497683A00D52237 /* Debug */, + 50F788212497683A00D52237 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 50F787E72497683900D52237 /* Project object */; +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..bb9f69fc --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 00000000..18d98100 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme new file mode 100644 index 00000000..c6adb9bb --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift new file mode 100644 index 00000000..b589df39 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift @@ -0,0 +1,255 @@ +// +// AppDelegate.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import UIKit +import Foundation +import AVFoundation +import AudioToolbox +import Accelerate + +import deepspeech_ios + +/// Holds audio information used for building waveforms +final class AudioContext { + + /// The audio asset URL used to load the context + public let audioURL: URL + + /// Total number of samples in loaded asset + public let totalSamples: Int + + /// Loaded asset + public let asset: AVAsset + + // Loaded assetTrack + public let assetTrack: AVAssetTrack + + private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) { + self.audioURL = audioURL + self.totalSamples = totalSamples + self.asset = asset + self.assetTrack = assetTrack + } + + public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) { + let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)]) + + guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else { + fatalError("Couldn't load AVAssetTrack") + } + + asset.loadValuesAsynchronously(forKeys: ["duration"]) { + var error: NSError? + let status = asset.statusOfValue(forKey: "duration", error: &error) + switch status { + case .loaded: + guard + let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription], + let audioFormatDesc = formatDescriptions.first, + let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc) + else { break } + + let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale)) + let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack) + completionHandler(audioContext) + return + + case .failed, .cancelled, .loading, .unknown: + print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")") + } + + completionHandler(nil) + } + } +} + +func render(audioContext: AudioContext?, stream: DeepSpeechStream) { + guard let audioContext = audioContext else { + fatalError("Couldn't create the audioContext") + } + + let sampleRange: CountableRange = 0..? + CMBlockBufferGetDataPointer(readBuffer, + atOffset: 0, + lengthAtOffsetOut: &readBufferLength, + totalLengthOut: nil, + dataPointerOut: &readBufferPointer) + sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength)) + CMSampleBufferInvalidate(readSampleBuffer) + + let totalSamples = sampleBuffer.count / MemoryLayout.size + print("read \(totalSamples) samples") + + sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in + let unsafeBufferPointer = samples.bindMemory(to: Int16.self) + stream.feedAudioContent(buffer: unsafeBufferPointer) + } + + sampleBuffer.removeAll() + } + + // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown) + guard reader.status == .completed else { + fatalError("Couldn't read the audio file") + } +} + +func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> ()) { + let url = URL(fileURLWithPath: audioPath) + + //var format = AudioStreamBasicDescription.init() + //format.mSampleRate = 16000; + //format.mFormatID = kAudioFormatLinearPCM; + //format.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked; + //format.mBitsPerChannel = 16; + //format.mChannelsPerFrame = 1; + //format.mBytesPerFrame = format.mChannelsPerFrame * format.mBitsPerChannel / 8; + //format.mFramesPerPacket = 1; + //format.mBytesPerPacket = format.mFramesPerPacket * format.mBytesPerFrame; + // + //var file = Optional.init(nilLiteral: ()); + //let status = ExtAudioFileCreateWithURL(url as CFURL, + // kAudioFileWAVEType, + // &format, + // nil, + // 0, + // &file) + //print("status: \(status)") + //let status2 = ExtAudioFileSetProperty(file!, + // kExtAudioFileProperty_ClientDataFormat, + // UInt32(MemoryLayout.size), + // &format) + //print("status: \(status2)") + // + //ExtAudioFileRead(file, <#T##ioNumberFrames: UnsafeMutablePointer##UnsafeMutablePointer#>, <#T##ioData: UnsafeMutablePointer##UnsafeMutablePointer#>) + + let stream = try! model.createStream() + print("\(audioPath)") + let start = CFAbsoluteTimeGetCurrent() + AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in + guard let audioContext = audioContext else { + fatalError("Couldn't create the audioContext") + } + render(audioContext: audioContext, stream: stream) + let result = stream.finishStream() + let end = CFAbsoluteTimeGetCurrent() + print("\"\(audioPath)\": \(end - start) - \(result)") + completion() + }) + + //let file = try! AVAudioFile(forReading: url) + //print("file length \(file.length)") + //let format = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: false)! + //let stream = createStream(modelState: modelState) + //while file.framePosition < file.length { + // let pcmBuf = AVAudioPCMBuffer.init(pcmFormat: format, frameCapacity: 8 * 1024)! // arbitrary frameCapacity + // try! file.read(into: pcmBuf) + // if pcmBuf.frameLength == 0 { + // break + // } + // print("read \(pcmBuf.frameLength) frames into buffer") + // let rawPtr = pcmBuf.audioBufferList.pointee.mBuffers.mData! + // let ptr = rawPtr.bindMemory(to: Int16.self, capacity: Int(pcmBuf.frameLength)) + // print("first few samples: \(ptr[0]) \(ptr[1]) \(ptr[2]) \(ptr[3]) ") + // DS_FeedAudioContent(stream, ptr, UInt32(pcmBuf.frameLength)) + //} + //let result = DS_FinishStream(stream) + //return String.init(cString: result!) +} + +@UIApplicationMain +class AppDelegate: UIResponder, UIApplicationDelegate { + func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { + let model = try! DeepSpeechModel(modelPath: Bundle.main.path(forResource: "output_graph", ofType: "tflite")!) + try! model.enableExternalScorer(scorerPath: Bundle.main.path(forResource: "librispeech_en_utf8_nonpruned_o6", ofType: "scorer")!) + + let files = [ + "5639-40744-0008", + "1089-134686-0019", + "2094-142345-0053", + "8463-294825-0010", + "121-123852-0001", + "7021-79740-0008", + "6930-76324-0010", + "5105-28240-0001", + "1089-134691-0012", + "5142-33396-0027", + "260-123288-0004", + "6930-75918-0008", + "8463-294828-0005", + "61-70970-0002" + ] + + let serialQueue = DispatchQueue(label: "serialQueue") + let group = DispatchGroup() + group.enter() + serialQueue.async { + test(model: model, audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) { + group.leave() + } + } + for path in files { + group.wait() + group.enter() + test(model: model, audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) { + group.leave() + } + } + return true + } + + // MARK: UISceneSession Lifecycle + + func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration { + // Called when a new scene session is being created. + // Use this method to select a configuration to create the new scene with. + return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role) + } + + func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set) { + // Called when the user discards a scene session. + // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. + // Use this method to release any resources that were specific to the discarded scenes, as they will not return. + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000..9221b9bb --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,98 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "20x20" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "29x29" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "40x40" + }, + { + "idiom" : "iphone", + "scale" : "2x", + "size" : "60x60" + }, + { + "idiom" : "iphone", + "scale" : "3x", + "size" : "60x60" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "20x20" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "29x29" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "40x40" + }, + { + "idiom" : "ipad", + "scale" : "1x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "76x76" + }, + { + "idiom" : "ipad", + "scale" : "2x", + "size" : "83.5x83.5" + }, + { + "idiom" : "ios-marketing", + "scale" : "1x", + "size" : "1024x1024" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 00000000..865e9329 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift new file mode 100644 index 00000000..5f7442f9 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/ContentView.swift @@ -0,0 +1,21 @@ +// +// ContentView.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import SwiftUI + +struct ContentView: View { + var body: some View { + Text("Hello, World!") + } +} + +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist new file mode 100644 index 00000000..9742bf0f --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Info.plist @@ -0,0 +1,60 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UIApplicationSceneManifest + + UIApplicationSupportsMultipleScenes + + UISceneConfigurations + + UIWindowSceneSessionRoleApplication + + + UISceneConfigurationName + Default Configuration + UISceneDelegateClassName + $(PRODUCT_MODULE_NAME).SceneDelegate + + + + + UILaunchStoryboardName + LaunchScreen + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift new file mode 100644 index 00000000..40d85e4a --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/SceneDelegate.swift @@ -0,0 +1,64 @@ +// +// SceneDelegate.swift +// deepspeech_ios_test +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import UIKit +import SwiftUI + +class SceneDelegate: UIResponder, UIWindowSceneDelegate { + + var window: UIWindow? + + + func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) { + // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`. + // If using a storyboard, the `window` property will automatically be initialized and attached to the scene. + // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead). + + // Create the SwiftUI view that provides the window contents. + let contentView = ContentView() + + // Use a UIHostingController as window root view controller. + if let windowScene = scene as? UIWindowScene { + let window = UIWindow(windowScene: windowScene) + window.rootViewController = UIHostingController(rootView: contentView) + self.window = window + window.makeKeyAndVisible() + } + } + + func sceneDidDisconnect(_ scene: UIScene) { + // Called as the scene is being released by the system. + // This occurs shortly after the scene enters the background, or when its session is discarded. + // Release any resources associated with this scene that can be re-created the next time the scene connects. + // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead). + } + + func sceneDidBecomeActive(_ scene: UIScene) { + // Called when the scene has moved from an inactive state to an active state. + // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive. + } + + func sceneWillResignActive(_ scene: UIScene) { + // Called when the scene will move from an active state to an inactive state. + // This may occur due to temporary interruptions (ex. an incoming phone call). + } + + func sceneWillEnterForeground(_ scene: UIScene) { + // Called as the scene transitions from the background to the foreground. + // Use this method to undo the changes made on entering the background. + } + + func sceneDidEnterBackground(_ scene: UIScene) { + // Called as the scene transitions from the foreground to the background. + // Use this method to save data, release shared resources, and store enough scene-specific state information + // to restore the scene back to its current state. + } + + +} + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist new file mode 100644 index 00000000..64d65ca4 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift new file mode 100644 index 00000000..0e5b449d --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testTests/deepspeech_ios_testTests.swift @@ -0,0 +1,34 @@ +// +// deepspeech_ios_testTests.swift +// deepspeech_ios_testTests +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import XCTest +@testable import deepspeech_ios_test + +class deepspeech_ios_testTests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // This is an example of a functional test case. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testPerformanceExample() throws { + // This is an example of a performance test case. + self.measure { + // Put the code you want to measure the time of here. + } + } + +} diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist new file mode 100644 index 00000000..64d65ca4 --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift new file mode 100644 index 00000000..493a6b8d --- /dev/null +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift @@ -0,0 +1,43 @@ +// +// deepspeech_ios_testUITests.swift +// deepspeech_ios_testUITests +// +// Created by Reuben Morais on 15.06.20. +// Copyright © 2020 Mozilla. All rights reserved. +// + +import XCTest + +class deepspeech_ios_testUITests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + + // In UI tests it is usually best to stop immediately when a failure occurs. + continueAfterFailure = false + + // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // UI tests must launch the application that they test. + let app = XCUIApplication() + app.launch() + + // Use recording to get started writing UI tests. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testLaunchPerformance() throws { + if #available(macOS 10.15, iOS 13.0, tvOS 13.0, *) { + // This measures how long it takes to launch your application. + measure(metrics: [XCTOSSignpostMetric.applicationLaunch]) { + XCUIApplication().launch() + } + } + } +} From f7c50663e1094631b1b6931aba5cb19cd695ac71 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 14 Jul 2020 16:05:00 +0200 Subject: [PATCH 06/16] Checkout fixed formulas commit in tf_tc-brew.sh --- taskcluster/darwin-opt-base.tyml | 7 +-- taskcluster/tf_tc-brew.sh | 76 ++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/taskcluster/darwin-opt-base.tyml b/taskcluster/darwin-opt-base.tyml index ea7ba95d..7503fd52 100644 --- a/taskcluster/darwin-opt-base.tyml +++ b/taskcluster/darwin-opt-base.tyml @@ -35,11 +35,8 @@ payload: # There is no VM yet running tasks on OSX # so one should install by hand: # - brew - # - xcode (brew would install) - # - brew install gnu-tar - # - brew install git - # - brew install pixz - # - brew cask install java + # - Xcode 10.1 in /Applications/Xcode.app, then sudo chown -R root:wheel /Applications/Xcode.app + # - brew install gnu-tar git pixz wget coreutils pyenv-virtualenv # - sudo easy_install pip command: diff --git a/taskcluster/tf_tc-brew.sh b/taskcluster/tf_tc-brew.sh index 297b2bba..8d4128a2 100755 --- a/taskcluster/tf_tc-brew.sh +++ b/taskcluster/tf_tc-brew.sh @@ -2,43 +2,55 @@ set -ex -if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then - echo "No TASKCLUSTER_TASK_DIR, aborting." - exit 1 -fi +# if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then +# echo "No TASKCLUSTER_TASK_DIR, aborting." +# exit 1 +# fi -LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" -export PATH=${LOCAL_BREW}/bin:$PATH -export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" -export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" +# LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" +# export PATH=${LOCAL_BREW}/bin:$PATH +# export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" +# export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" +# export HOMEBREW_FORMULAS_COMMIT=93fe256e0168db3b1c70c26a01941be59ce76311 +# export HOMEBREW_NO_AUTO_UPDATE=1 -# Never fail on pre-existing homebrew/ directory -mkdir -p "${LOCAL_BREW}" || true -mkdir -p "${HOMEBREW_CACHE}" || true +# # Never fail on pre-existing homebrew/ directory +# mkdir -p "${LOCAL_BREW}" || true +# mkdir -p "${HOMEBREW_CACHE}" || true -# Make sure to verify there is a 'brew' binary there, otherwise install things. -if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then - curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" -fi; +# # Make sure to verify there is a 'brew' binary there, otherwise install things. +# if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then +# curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" +# fi; -echo "local brew list (should be empty) ..." -brew list +# echo "local brew list (should be empty) ..." +# brew list -echo "local brew prefix ..." -local_prefix=$(brew --prefix) -echo "${local_prefix}" +# echo "local brew prefix ..." +# local_prefix=$(brew --prefix) +# echo "${local_prefix}" -if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then - echo "Weird state:" - echo "LOCAL_BREW=${LOCAL_BREW}" - echo "local_prefix=${local_prefix}" - exit 1 -fi; +# if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then +# echo "Weird state:" +# echo "LOCAL_BREW=${LOCAL_BREW}" +# echo "local_prefix=${local_prefix}" +# exit 1 +# fi; -# coreutils, pyenv-virtualenv required for build of tensorflow -all_pkgs="coreutils pyenv-virtualenv" -for pkg in ${all_pkgs}; -do - (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} -done; +# # Then we force onto a specific well-known commit +# mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" +# pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" +# git init +# git remote add origin https://github.com/Homebrew/homebrew-core.git +# git fetch origin +# git checkout ${HOMEBREW_FORMULAS_COMMIT} +# popd + +# # coreutils, pyenv-virtualenv required for build of tensorflow +# all_pkgs="coreutils pyenv-virtualenv" + +# for pkg in ${all_pkgs}; +# do +# (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} +# done; From e8d642bf44c9285f33efc0cba05e28d45e0cbad4 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 14 Jul 2020 22:33:48 +0200 Subject: [PATCH 07/16] Bump TensorFlow to remove usage of -z linker keyword on iOS --- taskcluster/.shared.yml | 44 ++++++++++++++++++++--------------------- tensorflow | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 9323c995..01d00a2a 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -142,38 +142,38 @@ system: namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" tensorflow: linux_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cpu" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cpu" linux_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.cuda" linux_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm" linux_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx" android_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" android_armv7: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.android-armv7" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-armv7" win_amd64_cpu: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win" win_amd64_cuda: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.win-cuda" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.fc464111ac5c49791e44122e5946e521b25840bd.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/tensorflow b/tensorflow index fc464111..0854bb51 160000 --- a/tensorflow +++ b/tensorflow @@ -1 +1 @@ -Subproject commit fc464111ac5c49791e44122e5946e521b25840bd +Subproject commit 0854bb5188a3150a4d75a1c71ee610b0d45cfcb1 From 6c38d569685a8209295ccc021607e408b60c0d05 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jul 2020 10:10:42 +0200 Subject: [PATCH 08/16] Use submodule TF tc-vars.sh --- taskcluster/ios-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index 1b913130..bb885d36 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -4,7 +4,7 @@ set -xe source $(dirname "$0")/tc-tests-utils.sh -source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh +source $(dirname "$0")/tf_tc-vars.sh BAZEL_TARGETS=" //native_client:libdeepspeech.so From 2f568e77858c8bea93c67362b0f16c458bf481f7 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 15 Jul 2020 14:38:56 +0200 Subject: [PATCH 09/16] Don't use BAZEL_OPT_FLAGS in iOS builds --- taskcluster/.shared.yml | 8 ++++---- taskcluster/ios-build.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 01d00a2a..c64a5bc9 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -169,11 +169,11 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index bb885d36..bc8860e6 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -10,7 +10,7 @@ BAZEL_TARGETS=" //native_client:libdeepspeech.so " -BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS}" +BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_EXTRA_FLAGS}" BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" From aa8e9b064736c3c483391ccc62eacf61eb8bd472 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 01:46:19 +0200 Subject: [PATCH 10/16] Use correct build flags for ARM64 vs x86_64 --- taskcluster/ios-arm64-tflite-opt.yml | 2 +- taskcluster/ios-build.sh | 8 +++++++- taskcluster/ios-x86_64-tflite-opt.yml | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/taskcluster/ios-arm64-tflite-opt.yml b/taskcluster/ios-arm64-tflite-opt.yml index dab0d24b..cd85ca7f 100644 --- a/taskcluster/ios-arm64-tflite-opt.yml +++ b/taskcluster/ios-arm64-tflite-opt.yml @@ -12,7 +12,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.ios_arm64-tflite.${event.head.sha}" tensorflow: ${system.tensorflow.ios_arm64.url} scripts: - build: "taskcluster/ios-build.sh" + build: "taskcluster/ios-build.sh --arm64" package: "taskcluster/package.sh" nc_asset_name: "native_client.arm64.tflite.ios.tar.xz" maxRunTime: 14400 diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index bc8860e6..aa3e8ec6 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -2,6 +2,8 @@ set -xe +platform=$1 + source $(dirname "$0")/tc-tests-utils.sh source $(dirname "$0")/tf_tc-vars.sh @@ -10,7 +12,11 @@ BAZEL_TARGETS=" //native_client:libdeepspeech.so " -BAZEL_BUILD_FLAGS="--config=ios_arm64 --define=runtime=tflite ${BAZEL_EXTRA_FLAGS}" +if [ "${platform}" = "--arm64" ]; then + BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +else + BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}" +fi BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" diff --git a/taskcluster/ios-x86_64-tflite-opt.yml b/taskcluster/ios-x86_64-tflite-opt.yml index 144f0bd5..b55cebe7 100644 --- a/taskcluster/ios-x86_64-tflite-opt.yml +++ b/taskcluster/ios-x86_64-tflite-opt.yml @@ -12,7 +12,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.ios_x86_64-tflite.${event.head.sha}" tensorflow: ${system.tensorflow.ios_x86_64.url} scripts: - build: "taskcluster/ios-build.sh" + build: "taskcluster/ios-build.sh --x86_64" package: "taskcluster/package.sh" nc_asset_name: "native_client.x86_64.tflite.ios.tar.xz" maxRunTime: 14400 From f0f4b0ddc17317c4f809898265eb6f422dfc328f Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 09:20:50 +0200 Subject: [PATCH 11/16] Remove even more bazel flags --- taskcluster/.shared.yml | 8 ++++---- taskcluster/ios-build.sh | 4 ++-- taskcluster/tf_tc-build.sh | 4 ++-- taskcluster/tf_tc-vars.sh | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index c64a5bc9..b8eef38b 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -169,11 +169,11 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.win-cuda" ios_arm64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_arm64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_arm64" ios_x86_64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.1.ios_x86_64" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_x86_64/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.ios_x86_64" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/ios-build.sh b/taskcluster/ios-build.sh index aa3e8ec6..ed99cb60 100755 --- a/taskcluster/ios-build.sh +++ b/taskcluster/ios-build.sh @@ -13,9 +13,9 @@ BAZEL_TARGETS=" " if [ "${platform}" = "--arm64" ]; then - BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS}" + BAZEL_BUILD_FLAGS="${BAZEL_IOS_ARM64_FLAGS}" else - BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS}" + BAZEL_BUILD_FLAGS="${BAZEL_IOS_X86_64_FLAGS}" fi BAZEL_ENV_FLAGS="TF_NEED_CUDA=0" diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index cb620906..ed3c306e 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -91,11 +91,11 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ fi; if [ "${build_ios_arm64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; if [ "${build_ios_x86_64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB} fi; if [ $? -ne 0 ]; then diff --git a/taskcluster/tf_tc-vars.sh b/taskcluster/tf_tc-vars.sh index 8c30ea2a..b768ad4b 100755 --- a/taskcluster/tf_tc-vars.sh +++ b/taskcluster/tf_tc-vars.sh @@ -171,8 +171,8 @@ BAZEL_ARM64_FLAGS="--config=rpi3-armv8 --config=rpi3-armv8_opt --copt=-DTFLITE_W BAZEL_ANDROID_ARM_FLAGS="--config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_ANDROID_ARM64_FLAGS="--config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 --copt=-DTFLITE_WITH_RUY_GEMV" BAZEL_CUDA_FLAGS="--config=cuda" -BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --copt=-DTFLITE_WITH_RUY_GEMV" -BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_ARM64_FLAGS="--config=ios_arm64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" +BAZEL_IOS_X86_64_FLAGS="--config=ios_x86_64 --define=runtime=tflite --copt=-DTFLITE_WITH_RUY_GEMV" if [ "${OS}" = "${TC_MSYS_VERSION}" ]; then # Somehow, even with Python being in the PATH, Bazel on windows struggles From be43b3fdc1ba0636e9a716a796d2ebb117485957 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 16 Jul 2020 22:22:03 +0200 Subject: [PATCH 12/16] Bump caches for artifacts rebuilt on new worker --- taskcluster/.shared.yml | 4 ++-- taskcluster/tc-build-utils.sh | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index b8eef38b..d80ef2f7 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -154,8 +154,8 @@ system: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.arm64" darwin_amd64: - url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx/artifacts/public/home.tar.xz" - namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.osx" + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx/artifacts/public/home.tar.xz" + namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.3.osx" android_arm64: url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64/artifacts/public/home.tar.xz" namespace: "project.deepspeech.tensorflow.pip.r2.2.0854bb5188a3150a4d75a1c71ee610b0d45cfcb1.android-arm64" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index f4042fbd..75645582 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -33,6 +33,8 @@ do_deepspeech_python_build() virtualenv_activate "${pyalias}" "deepspeech" python --version + pip --version + pip3 --version which pip which pip3 From de7a249fcd9c7e5f3eebdee4357626d8098569d9 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Sun, 19 Jul 2020 15:04:04 +0200 Subject: [PATCH 13/16] Fix linker issues during tests with new workers --- taskcluster/.build.yml | 2 +- taskcluster/homebrew-build.sh | 9 +++++---- taskcluster/homebrew_builds-darwin-amd64.yml | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index 76fc9703..61fadf17 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -25,7 +25,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v2.2.0-16-gfc46411' + tensorflow_git_desc: 'TensorFlow: v2.2.0-17-g0854bb5' test_model_task: '' homebrew: url: '' diff --git a/taskcluster/homebrew-build.sh b/taskcluster/homebrew-build.sh index 6cdda80a..4ddb9ad8 100755 --- a/taskcluster/homebrew-build.sh +++ b/taskcluster/homebrew-build.sh @@ -42,12 +42,13 @@ do_prepare_homebrew() check_homebrew "${_brew_instance}" - # Force an upgrade to fetch formulae - brew search openssl - # Then we force onto a specific well-known commit + mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" - git fetch origin && git checkout ${BREW_FORMULAS_COMMIT} + git init + git remote add origin https://github.com/Homebrew/homebrew-core.git + git fetch origin + git checkout ${BREW_FORMULAS_COMMIT} popd } diff --git a/taskcluster/homebrew_builds-darwin-amd64.yml b/taskcluster/homebrew_builds-darwin-amd64.yml index d2fe03f5..f5d60902 100644 --- a/taskcluster/homebrew_builds-darwin-amd64.yml +++ b/taskcluster/homebrew_builds-darwin-amd64.yml @@ -9,4 +9,4 @@ build: package: "taskcluster/homebrew-package.sh --builds" metadata: name: "Builds Homebrew macOS AMD64" - description: "Setup a buildsl Homebrew for macOS/AMD64" + description: "Setup a builds Homebrew for macOS/AMD64" From 5e5db17371f167c57ad8255e8324629acf1dea7d Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:16:57 +0200 Subject: [PATCH 14/16] Address review comments --- .../swift/deepspeech_ios/deepspeech.h | 357 ------------------ .../deepspeech_ios_test/AppDelegate.swift | 45 --- taskcluster/.shared.yml | 2 - taskcluster/tf_darwin-amd64-opt.yml | 3 - taskcluster/tf_ios-arm64-opt.yml | 3 - taskcluster/tf_ios-x86_64-opt.yml | 3 - taskcluster/tf_tc-brew.sh | 56 --- 7 files changed, 469 deletions(-) delete mode 100644 native_client/swift/deepspeech_ios/deepspeech.h delete mode 100755 taskcluster/tf_tc-brew.sh diff --git a/native_client/swift/deepspeech_ios/deepspeech.h b/native_client/swift/deepspeech_ios/deepspeech.h deleted file mode 100644 index 1df3cf2e..00000000 --- a/native_client/swift/deepspeech_ios/deepspeech.h +++ /dev/null @@ -1,357 +0,0 @@ -#ifndef DEEPSPEECH_H -#define DEEPSPEECH_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef SWIG - #if defined _MSC_VER - #define DEEPSPEECH_EXPORT __declspec(dllexport) - #else - #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) - #endif /*End of _MSC_VER*/ -#else - #define DEEPSPEECH_EXPORT -#endif - -typedef struct ModelState ModelState; - -typedef struct StreamingState StreamingState; - -/** - * @brief Stores text of an individual token, along with its timing information - */ -typedef struct TokenMetadata { - /** The text corresponding to this token */ - const char* const text; - - /** Position of the token in units of 20ms */ - const unsigned int timestep; - - /** Position of the token in seconds */ - const float start_time; -} TokenMetadata; - -/** - * @brief A single transcript computed by the model, including a confidence - * value and the metadata for its constituent tokens. - */ -typedef struct CandidateTranscript { - /** Array of TokenMetadata objects */ - const TokenMetadata* const tokens; - /** Size of the tokens array */ - const unsigned int num_tokens; - /** Approximated confidence value for this transcript. This is roughly the - * sum of the acoustic model logit values for each timestep/character that - * contributed to the creation of this transcript. - */ - const double confidence; -} CandidateTranscript; - -/** - * @brief An array of CandidateTranscript objects computed by the model. - */ -typedef struct Metadata { - /** Array of CandidateTranscript objects */ - const CandidateTranscript* const transcripts; - /** Size of the transcripts array */ - const unsigned int num_transcripts; -} Metadata; - -// sphinx-doc: error_code_listing_start - -#define DS_FOR_EACH_ERROR(APPLY) \ - APPLY(DS_ERR_OK, 0x0000, "No error.") \ - APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ - APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ - APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ - APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ - APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ - APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ - APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ - APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ - APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ - APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ - APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ - APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ - APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ - APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ - APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ - APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ - APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ - APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ - APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") - -// sphinx-doc: error_code_listing_end - -enum DeepSpeech_Error_Codes -{ -#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, -DS_FOR_EACH_ERROR(DEFINE) -#undef DEFINE -}; - -/** - * @brief An object providing an interface to a trained DeepSpeech model. - * - * @param aModelPath The path to the frozen model graph. - * @param[out] retval a ModelState pointer - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_CreateModel(const char* aModelPath, - ModelState** retval); - -/** - * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} - * was not called before, will return the default value loaded from the - * model file. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * - * @return Beam width value used by the model. - */ -DEEPSPEECH_EXPORT -unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); - -/** - * @brief Set beam width value used by the model. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * @param aBeamWidth The beam width used by the model. A larger beam width value - * generates better results at the cost of decoding time. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_SetModelBeamWidth(ModelState* aCtx, - unsigned int aBeamWidth); - -/** - * @brief Return the sample rate expected by a model. - * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. - * - * @return Sample rate expected by the model for its input. - */ -DEEPSPEECH_EXPORT -int DS_GetModelSampleRate(const ModelState* aCtx); - -/** - * @brief Frees associated resources and destroys model object. - */ -DEEPSPEECH_EXPORT -void DS_FreeModel(ModelState* ctx); - -/** - * @brief Enable decoding using an external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * @param aScorerPath The path to the external scorer file. - * - * @return Zero on success, non-zero on failure (invalid arguments). - */ -DEEPSPEECH_EXPORT -int DS_EnableExternalScorer(ModelState* aCtx, - const char* aScorerPath); - -/** - * @brief Disable decoding using an external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_DisableExternalScorer(ModelState* aCtx); - -/** - * @brief Set hyperparameters alpha and beta of the external scorer. - * - * @param aCtx The ModelState pointer for the model being changed. - * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. - * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. - * - * @return Zero on success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_SetScorerAlphaBeta(ModelState* aCtx, - float aAlpha, - float aBeta); - -/** - * @brief Use the DeepSpeech model to convert speech to text. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * - * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. Returns NULL on error. - */ -DEEPSPEECH_EXPORT -char* DS_SpeechToText(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize); - -/** - * @brief Use the DeepSpeech model to convert speech to text and output results - * including metadata. - * - * @param aCtx The ModelState pointer for the model to use. - * @param aBuffer A 16-bit, mono raw audio signal at the appropriate - * sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in the audio signal. - * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. - * - * @return Metadata struct containing multiple CandidateTranscript structs. Each - * transcript has per-token metadata including timing information. The - * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - */ -DEEPSPEECH_EXPORT -Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, - const short* aBuffer, - unsigned int aBufferSize, - unsigned int aNumResults); - -/** - * @brief Create a new streaming inference state. The streaming state returned - * by this function can then be passed to {@link DS_FeedAudioContent()} - * and {@link DS_FinishStream()}. - * - * @param aCtx The ModelState pointer for the model to use. - * @param[out] retval an opaque pointer that represents the streaming state. Can - * be NULL if an error occurs. - * - * @return Zero for success, non-zero on failure. - */ -DEEPSPEECH_EXPORT -int DS_CreateStream(ModelState* aCtx, - StreamingState** retval); - -/** - * @brief Feed audio samples to an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aBuffer An array of 16-bit, mono raw audio samples at the - * appropriate sample rate (matching what the model was trained on). - * @param aBufferSize The number of samples in @p aBuffer. - */ -DEEPSPEECH_EXPORT -void DS_FeedAudioContent(StreamingState* aSctx, - const short* aBuffer, - unsigned int aBufferSize); - -/** - * @brief Compute the intermediate decoding of an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @return The STT intermediate result. The user is responsible for freeing the - * string using {@link DS_FreeString()}. - */ -DEEPSPEECH_EXPORT -char* DS_IntermediateDecode(const StreamingState* aSctx); - -/** - * @brief Compute the intermediate decoding of an ongoing streaming inference, - * return results including metadata. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aNumResults The number of candidate transcripts to return. - * - * @return Metadata struct containing multiple candidate transcripts. Each transcript - * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - */ -DEEPSPEECH_EXPORT -Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, - unsigned int aNumResults); - -/** - * @brief Compute the final decoding of an ongoing streaming inference and return - * the result. Signals the end of an ongoing streaming inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -char* DS_FinishStream(StreamingState* aSctx); - -/** - * @brief Compute the final decoding of an ongoing streaming inference and return - * results including metadata. Signals the end of an ongoing streaming - * inference. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * @param aNumResults The number of candidate transcripts to return. - * - * @return Metadata struct containing multiple candidate transcripts. Each transcript - * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. - * Returns NULL on error. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, - unsigned int aNumResults); - -/** - * @brief Destroy a streaming state without decoding the computed logits. This - * can be used if you no longer need the result of an ongoing streaming - * inference and don't want to perform a costly decode operation. - * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. - * - * @note This method will free the state pointer (@p aSctx). - */ -DEEPSPEECH_EXPORT -void DS_FreeStream(StreamingState* aSctx); - -/** - * @brief Free memory allocated for metadata information. - */ -DEEPSPEECH_EXPORT -void DS_FreeMetadata(Metadata* m); - -/** - * @brief Free a char* string returned by the DeepSpeech API. - */ -DEEPSPEECH_EXPORT -void DS_FreeString(char* str); - -/** - * @brief Returns the version of this library. The returned version is a semantic - * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. - * - * @return The version string. - */ -DEEPSPEECH_EXPORT -char* DS_Version(); - -/** - * @brief Returns a textual description corresponding to an error code. - * The string returned must be freed with @{link DS_FreeString()}. - * - * @return The error description. - */ -DEEPSPEECH_EXPORT -char* DS_ErrorCodeToErrorMessage(int aErrorCode); - -#undef DEEPSPEECH_EXPORT - -#ifdef __cplusplus -} -#endif - -#endif /* DEEPSPEECH_H */ diff --git a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift index b589df39..a2dcb427 100644 --- a/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift +++ b/native_client/swift/deepspeech_ios_test/deepspeech_ios_test/AppDelegate.swift @@ -138,32 +138,6 @@ func render(audioContext: AudioContext?, stream: DeepSpeechStream) { func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> ()) { let url = URL(fileURLWithPath: audioPath) - //var format = AudioStreamBasicDescription.init() - //format.mSampleRate = 16000; - //format.mFormatID = kAudioFormatLinearPCM; - //format.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked; - //format.mBitsPerChannel = 16; - //format.mChannelsPerFrame = 1; - //format.mBytesPerFrame = format.mChannelsPerFrame * format.mBitsPerChannel / 8; - //format.mFramesPerPacket = 1; - //format.mBytesPerPacket = format.mFramesPerPacket * format.mBytesPerFrame; - // - //var file = Optional.init(nilLiteral: ()); - //let status = ExtAudioFileCreateWithURL(url as CFURL, - // kAudioFileWAVEType, - // &format, - // nil, - // 0, - // &file) - //print("status: \(status)") - //let status2 = ExtAudioFileSetProperty(file!, - // kExtAudioFileProperty_ClientDataFormat, - // UInt32(MemoryLayout.size), - // &format) - //print("status: \(status2)") - // - //ExtAudioFileRead(file, <#T##ioNumberFrames: UnsafeMutablePointer##UnsafeMutablePointer#>, <#T##ioData: UnsafeMutablePointer##UnsafeMutablePointer#>) - let stream = try! model.createStream() print("\(audioPath)") let start = CFAbsoluteTimeGetCurrent() @@ -177,25 +151,6 @@ func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> print("\"\(audioPath)\": \(end - start) - \(result)") completion() }) - - //let file = try! AVAudioFile(forReading: url) - //print("file length \(file.length)") - //let format = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 16000, channels: 1, interleaved: false)! - //let stream = createStream(modelState: modelState) - //while file.framePosition < file.length { - // let pcmBuf = AVAudioPCMBuffer.init(pcmFormat: format, frameCapacity: 8 * 1024)! // arbitrary frameCapacity - // try! file.read(into: pcmBuf) - // if pcmBuf.frameLength == 0 { - // break - // } - // print("read \(pcmBuf.frameLength) frames into buffer") - // let rawPtr = pcmBuf.audioBufferList.pointee.mBuffers.mData! - // let ptr = rawPtr.bindMemory(to: Int16.self, capacity: Int(pcmBuf.frameLength)) - // print("first few samples: \(ptr[0]) \(ptr[1]) \(ptr[2]) \(ptr[3]) ") - // DS_FeedAudioContent(stream, ptr, UInt32(pcmBuf.frameLength)) - //} - //let result = DS_FinishStream(stream) - //return String.init(cString: result!) } @UIApplicationMain diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index d80ef2f7..03bdd3fd 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -14,8 +14,6 @@ deepspeech: tensorflow: packages_xenial: apt: 'apt-get -qq update && apt-get -qq -y install realpath build-essential python-virtualenv python-dev python-pip libblas-dev liblapack-dev gfortran wget software-properties-common pixz zip zlib1g-dev unzip' - packages_macos: - brew: '$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tf_tc-brew.sh' packages_win: pacman: 'pacman --noconfirm -S patch unzip tar' msys64: 'ln -s $USERPROFILE/msys64 $TASKCLUSTER_TASK_DIR/msys64' diff --git a/taskcluster/tf_darwin-amd64-opt.yml b/taskcluster/tf_darwin-amd64-opt.yml index 365e1700..64674b1f 100644 --- a/taskcluster/tf_darwin-amd64-opt.yml +++ b/taskcluster/tf_darwin-amd64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.darwin_amd64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --cpu" diff --git a/taskcluster/tf_ios-arm64-opt.yml b/taskcluster/tf_ios-arm64-opt.yml index 9f253b3f..edb3eb2b 100644 --- a/taskcluster/tf_ios-arm64-opt.yml +++ b/taskcluster/tf_ios-arm64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.ios_arm64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --ios-arm64" diff --git a/taskcluster/tf_ios-x86_64-opt.yml b/taskcluster/tf_ios-x86_64-opt.yml index c56ad3ca..8f82cb95 100644 --- a/taskcluster/tf_ios-x86_64-opt.yml +++ b/taskcluster/tf_ios-x86_64-opt.yml @@ -5,9 +5,6 @@ build: artifact_namespace: ${system.tensorflow.ios_x86_64.namespace} generic: workerType: "ds-macos-heavy" - system_config: - > - ${tensorflow.packages_macos.brew} scripts: setup: "taskcluster/tf_tc-setup.sh" build: "taskcluster/tf_tc-build.sh --ios-x86_64" diff --git a/taskcluster/tf_tc-brew.sh b/taskcluster/tf_tc-brew.sh deleted file mode 100755 index 8d4128a2..00000000 --- a/taskcluster/tf_tc-brew.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -set -ex - -# if [ -z "${TASKCLUSTER_TASK_DIR}" ]; then -# echo "No TASKCLUSTER_TASK_DIR, aborting." -# exit 1 -# fi - -# LOCAL_BREW="${TASKCLUSTER_TASK_DIR}/homebrew" -# export PATH=${LOCAL_BREW}/bin:$PATH -# export HOMEBREW_LOGS="${TASKCLUSTER_TASK_DIR}/homebrew.logs/" -# export HOMEBREW_CACHE="${TASKCLUSTER_TASK_DIR}/homebrew.cache/" -# export HOMEBREW_FORMULAS_COMMIT=93fe256e0168db3b1c70c26a01941be59ce76311 -# export HOMEBREW_NO_AUTO_UPDATE=1 - -# # Never fail on pre-existing homebrew/ directory -# mkdir -p "${LOCAL_BREW}" || true -# mkdir -p "${HOMEBREW_CACHE}" || true - -# # Make sure to verify there is a 'brew' binary there, otherwise install things. -# if [ ! -x "${LOCAL_BREW}/bin/brew" ]; then -# curl -L https://github.com/Homebrew/brew/tarball/2.2.17 | tar xz --strip 1 -C "${LOCAL_BREW}" -# fi; - -# echo "local brew list (should be empty) ..." -# brew list - -# echo "local brew prefix ..." -# local_prefix=$(brew --prefix) -# echo "${local_prefix}" - -# if [ "${LOCAL_BREW}" != "${local_prefix}" ]; then -# echo "Weird state:" -# echo "LOCAL_BREW=${LOCAL_BREW}" -# echo "local_prefix=${local_prefix}" -# exit 1 -# fi; - - -# # Then we force onto a specific well-known commit -# mkdir -p "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" -# pushd "$(brew --prefix)/Library/Taps/homebrew/homebrew-core" -# git init -# git remote add origin https://github.com/Homebrew/homebrew-core.git -# git fetch origin -# git checkout ${HOMEBREW_FORMULAS_COMMIT} -# popd - -# # coreutils, pyenv-virtualenv required for build of tensorflow -# all_pkgs="coreutils pyenv-virtualenv" - -# for pkg in ${all_pkgs}; -# do -# (brew list --versions ${pkg} && brew upgrade ${pkg}) || brew install ${pkg} -# done; From d9dac13343b634996e0875770b7cd70259e60060 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:17:05 +0200 Subject: [PATCH 15/16] Clean up tf_tc-build.sh --- taskcluster/tf_tc-build.sh | 107 ++++++++----------------------------- 1 file changed, 22 insertions(+), 85 deletions(-) diff --git a/taskcluster/tf_tc-build.sh b/taskcluster/tf_tc-build.sh index ed3c306e..6ac03120 100755 --- a/taskcluster/tf_tc-build.sh +++ b/taskcluster/tf_tc-build.sh @@ -4,50 +4,6 @@ set -ex source $(dirname $0)/tf_tc-vars.sh -build_amd64=no -build_gpu=no -build_android_arm=no -build_android_arm64=no -build_linux_arm=no -build_linux_arm64=no -build_ios_arm64=no -build_ios_x86_64=no - -if [ "$1" = "--cpu" ]; then - build_amd64=yes -fi - -if [ "$1" = "--gpu" ]; then - build_amd64=yes - build_gpu=yes -fi - -if [ "$1" = "--arm" ]; then - build_amd64=yes - build_linux_arm=yes -fi - -if [ "$1" = "--arm64" ]; then - build_amd64=yes - build_linux_arm64=yes -fi - -if [ "$1" = "--android-armv7" ]; then - build_android_arm=yes -fi - -if [ "$1" = "--android-arm64" ]; then - build_android_arm64=yes -fi - -if [ "$1" = "--ios-arm64" ]; then - build_ios_arm64=yes -fi - -if [ "$1" = "--ios-x86_64" ]; then - build_ios_x86_64=yes -fi - pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ BAZEL_BUILD="bazel ${BAZEL_OUTPUT_USER_ROOT} build -s --explain bazel_monolithic_tf.log --verbose_explanations --experimental_strict_action_env --config=monolithic" @@ -58,51 +14,32 @@ pushd ${DS_ROOT_TASK}/DeepSpeech/ds/tensorflow/ # Force toolchain sync (useful on macOS ?) bazel ${BAZEL_OUTPUT_USER_ROOT} sync --configure - if [ "${build_amd64}" = "yes" ]; then - # Pure amd64 CPU-only build - if [ "${OS}" = "${TC_MSYS_VERSION}" -a "${build_gpu}" = "no" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} - elif [ "${build_gpu}" = "no" -a "${build_linux_arm}" = "no" -a "${build_linux_arm64}" = "no" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} - fi - - # Cross RPi3 CPU-only build - if [ "${build_linux_arm}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi - - # Cross ARM64 Cortex-A53 build - if [ "${build_linux_arm64}" = "yes" ]; then - echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi - - # Pure amd64 GPU-enabled build - if [ "${build_gpu}" = "yes" ]; then - eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} -c opt ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API} - fi - fi - - if [ "${build_android_arm}" = "yes" ]; then + case "$1" in + "--cpu") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_OPT_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LIB_CPP_API} ${BUILD_TARGET_LITE_LIB} + ;; + "--gpu") + eval "export ${TF_CUDA_FLAGS}" && (echo "" | TF_NEED_CUDA=1 ./configure) && ${BAZEL_BUILD} -c opt ${BAZEL_CUDA_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BAZEL_OPT_FLAGS} ${BUILD_TARGET_LIB_CPP_API} + ;; + "--arm") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + ;; + "--arm64") + echo "" | TF_NEED_CUDA=0 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} + ;; + "--android-armv7") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_android_arm64}" = "yes" ]; then + ;; + "--android-arm64") echo "" | TF_SET_ANDROID_WORKSPACE=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_ANDROID_ARM64_FLAGS} ${BAZEL_EXTRA_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_ios_arm64}" = "yes" ]; then + ;; + "--ios-arm64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_ARM64_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ "${build_ios_x86_64}" = "yes" ]; then + ;; + "--ios-x86_64") echo "" | TF_NEED_CUDA=0 TF_CONFIGURE_IOS=1 ./configure && ${BAZEL_BUILD} -c opt ${BAZEL_IOS_X86_64_FLAGS} ${BUILD_TARGET_LITE_LIB} - fi; - - if [ $? -ne 0 ]; then - # There was a failure, just account for it. - echo "Build failure, please check the output above. Exit code was: $?" - return 1 - fi + ;; + esac bazel ${BAZEL_OUTPUT_USER_ROOT} shutdown popd From 267287861867d581f500cfc748c30ea3501a9608 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 20 Jul 2020 11:52:35 +0200 Subject: [PATCH 16/16] Add docs to Swift bindings and missing methods --- .../swift/deepspeech_ios/DeepSpeech.swift | 193 ++++++++++++++++-- 1 file changed, 173 insertions(+), 20 deletions(-) diff --git a/native_client/swift/deepspeech_ios/DeepSpeech.swift b/native_client/swift/deepspeech_ios/DeepSpeech.swift index b694995b..50c32553 100644 --- a/native_client/swift/deepspeech_ios/DeepSpeech.swift +++ b/native_client/swift/deepspeech_ios/DeepSpeech.swift @@ -30,7 +30,8 @@ public enum DeepSpeechError: Error { case failCreateSess(errorCode: Int32) case failCreateModel(errorCode: Int32) - // Additional case for invalid error codes, should never happen unless the user has mixed header and binary versions + // Additional case for invalid error codes, should never happen unless the + // user has mixed header and binary versions. case invalidErrorCode(errorCode: Int32) } @@ -115,9 +116,15 @@ private func evaluateErrorCode(errorCode: Int32) throws { } } +/// Stores text of an individual token, along with its timing information public struct DeepSpeechTokenMetadata { + /// The text corresponding to this token let text: String + + /// Position of the token in units of 20ms let timestep: Int + + /// Position of the token in seconds let startTime: Float internal init(fromInternal: TokenMetadata) { @@ -127,8 +134,17 @@ public struct DeepSpeechTokenMetadata { } } +/** A single transcript computed by the model, including a confidence value and + the metadata for its constituent tokens +*/ public struct DeepSpeechCandidateTranscript { + /// Array of DeepSpeechTokenMetadata objects private(set) var tokens: [DeepSpeechTokenMetadata] = [] + + /** Approximated confidence value for this transcript. This corresponds to + both acoustic model and language model scores that contributed to the + creation of this transcript. + */ let confidence: Double internal init(fromInternal: CandidateTranscript) { @@ -140,12 +156,16 @@ public struct DeepSpeechCandidateTranscript { } } +/// An array of DeepSpeechCandidateTranscript objects computed by the model public struct DeepSpeechMetadata { + /// Array of DeepSpeechCandidateTranscript objects private(set) var transcripts: [DeepSpeechCandidateTranscript] = [] internal init(fromInternal: UnsafeMutablePointer) { let md = fromInternal.pointee - let transcriptsBuffer = UnsafeBufferPointer(start: md.transcripts, count: Int(md.num_transcripts)) + let transcriptsBuffer = UnsafeBufferPointer( + start: md.transcripts, + count: Int(md.num_transcripts)) for tr in transcriptsBuffer { transcripts.append(DeepSpeechCandidateTranscript(fromInternal: tr)) @@ -167,6 +187,13 @@ public class DeepSpeechStream { } } + /** Feed audio samples to an ongoing streaming inference. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Precondition: `finishStream()` has not been called on this stream. + */ public func feedAudioContent(buffer: Array) { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -175,12 +202,25 @@ public class DeepSpeechStream { } } + /** Feed audio samples to an ongoing streaming inference. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Precondition: `finishStream()` has not been called on this stream. + */ public func feedAudioContent(buffer: UnsafeBufferPointer) { precondition(streamCtx != nil, "calling method on invalidated Stream") DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count)) } + /** Compute the intermediate decoding of an ongoing streaming inference. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: The STT intermediate result. + */ public func intermediateDecode() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -189,6 +229,16 @@ public class DeepSpeechStream { return String(cString: result!) } + /** Compute the intermediate decoding of an ongoing streaming inference, + return results including metadata. + + - Parameter numResults: The number of candidate transcripts to return. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata { precondition(streamCtx != nil, "calling method on invalidated Stream") let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))! @@ -196,6 +246,15 @@ public class DeepSpeechStream { return DeepSpeechMetadata(fromInternal: result) } + /** Compute the final decoding of an ongoing streaming inference and return + the result. Signals the end of an ongoing streaming inference. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: The STT result. + + - Postcondition: This method will invalidate this streaming context. + */ public func finishStream() -> String { precondition(streamCtx != nil, "calling method on invalidated Stream") @@ -206,11 +265,38 @@ public class DeepSpeechStream { } return String(cString: result!) } + + /** Compute the final decoding of an ongoing streaming inference and return + results including metadata. Signals the end of an ongoing streaming + inference. + + - Parameter numResults: The number of candidate transcripts to return. + + - Precondition: `finishStream()` has not been called on this stream. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + + - Postcondition: This method will invalidate this streaming context. + */ + public func finishStreamWithMetadata(numResults: Int) -> DeepSpeechMetadata { + precondition(streamCtx != nil, "calling method on invalidated Stream") + + let result = DS_FinishStreamWithMetadata(streamCtx, UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } } +/// An object providing an interface to a trained DeepSpeech model. public class DeepSpeechModel { private var modelCtx: OpaquePointer! + /** + - Parameter modelPath: The path to the model file. + + - Throws: `DeepSpeechError` on failure. + */ public init(modelPath: String) throws { let err = DS_CreateModel(modelPath, &modelCtx) try evaluateErrorCode(errorCode: err) @@ -221,77 +307,144 @@ public class DeepSpeechModel { modelCtx = nil } + /** Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + was not called before, will return the default value loaded from the + model file. + + - Returns: Beam width value used by the model. + */ public func getBeamWidth() -> Int { return Int(DS_GetModelBeamWidth(modelCtx)) } + /** Set beam width value used by the model. + + - Parameter beamWidth: The beam width used by the model. A larger beam + width value generates better results at the cost + of decoding time. + + - Throws: `DeepSpeechError` on failure. + */ public func setBeamWidth(beamWidth: Int) throws { let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth)) try evaluateErrorCode(errorCode: err) } + // The sample rate expected by the model. public var sampleRate: Int { get { return Int(DS_GetModelSampleRate(modelCtx)) } } + /** Enable decoding using an external scorer. + + - Parameter scorerPath: The path to the external scorer file. + + - Throws: `DeepSpeechError` on failure. + */ public func enableExternalScorer(scorerPath: String) throws { let err = DS_EnableExternalScorer(modelCtx, scorerPath) try evaluateErrorCode(errorCode: err) } + /** Disable decoding using an external scorer. + + - Throws: `DeepSpeechError` on failure. + */ public func disableExternalScorer() throws { let err = DS_DisableExternalScorer(modelCtx) try evaluateErrorCode(errorCode: err) } + /** Set hyperparameters alpha and beta of the external scorer. + + - Parameter alpha: The alpha hyperparameter of the decoder. Language model weight. + - Parameter beta: The beta hyperparameter of the decoder. Word insertion weight. + + - Throws: `DeepSpeechError` on failure. + */ public func setScorerAlphaBeta(alpha: Float, beta: Float) throws { let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta) try evaluateErrorCode(errorCode: err) } + /** Use the DeepSpeech model to convert speech to text. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Returns: The STT result. + */ public func speechToText(buffer: Array) -> String { return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> String in return speechToText(buffer: unsafeBufferPointer) } } + /** Use the DeepSpeech model to convert speech to text. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + + - Returns: The STT result. + */ public func speechToText(buffer: UnsafeBufferPointer) -> String { let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count)) defer { DS_FreeString(result) } return String(cString: result!) } + /** Use the DeepSpeech model to convert speech to text and output results + including metadata. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + - Parameter numResults: The maximum number of DeepSpeechCandidateTranscript + structs to return. Returned value might be smaller than this. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ public func speechToTextWithMetadata(buffer: Array, numResults: Int) -> DeepSpeechMetadata { return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> DeepSpeechMetadata in - let result = DS_SpeechToTextWithMetadata(modelCtx, unsafeBufferPointer.baseAddress, UInt32(buffer.count), UInt32(numResults))! - defer { DS_FreeMetadata(result) } - return DeepSpeechMetadata(fromInternal: result) + return speechToTextWithMetadata(buffer: unsafeBufferPointer, numResults: numResults) } } + /** Use the DeepSpeech model to convert speech to text and output results + including metadata. + + - Parameter buffer: A 16-bit, mono raw audio signal at the appropriate + sample rate (matching what the model was trained on). + - Parameter numResults: The maximum number of DeepSpeechCandidateTranscript + structs to return. Returned value might be smaller than this. + + - Returns: Metadata struct containing multiple CandidateTranscript structs. + Each transcript has per-token metadata including timing information. + */ + public func speechToTextWithMetadata(buffer: UnsafeBufferPointer, numResults: Int) -> DeepSpeechMetadata { + let result = DS_SpeechToTextWithMetadata( + modelCtx, + buffer.baseAddress, + UInt32(buffer.count), + UInt32(numResults))! + defer { DS_FreeMetadata(result) } + return DeepSpeechMetadata(fromInternal: result) + } + + /** Create a new streaming inference state. + + - Returns: DeepSpeechStream object representing the streaming state. + + - Throws: `DeepSpeechError` on failure. + */ public func createStream() throws -> DeepSpeechStream { var streamContext: OpaquePointer! let err = DS_CreateStream(modelCtx, &streamContext) try evaluateErrorCode(errorCode: err) return DeepSpeechStream(streamContext: streamContext) } - - public class func open(path: String, scorerPath: Optional = nil) -> OpaquePointer { - var fooOpaque: OpaquePointer! - DS_CreateModel(path, &fooOpaque) - if let scorerPath = scorerPath { - DS_EnableExternalScorer(fooOpaque, scorerPath) - } - return fooOpaque - } - - public class func createStream(modelState: OpaquePointer) -> OpaquePointer { - var fooOpaque: OpaquePointer! - DS_CreateStream(modelState, &fooOpaque) - return fooOpaque - } } public func DeepSpeechVersion() -> String {