From 6d4d1a71531a3369fa5225770cd81c2c011c0d9e Mon Sep 17 00:00:00 2001 From: Kelly Davis Date: Sun, 7 Mar 2021 14:29:02 +0100 Subject: [PATCH] More rebranding, API names, iOS, .NET --- .taskcluster.yml => .taskcluster.yml.disabled | 0 bin/run-ldc93s1.sh | 2 +- doc/BUILDING.rst | 14 +- doc/C-API.rst | 84 +++---- doc/DotNet-API.rst | 34 +-- doc/DotNet-Examples.rst | 8 +- doc/Java-API.rst | 18 +- doc/Java-Examples.rst | 8 +- doc/SUPPORTED_PLATFORMS.rst | 28 +-- doc/Structs.rst | 6 +- doc/conf.py | 6 +- ds_lib.supp | 2 +- ds_openfst.supp | 4 +- native_client/args.h | 8 +- native_client/client.cc | 58 ++--- native_client/coqui-stt.h | 140 +++++------ native_client/ctcdecode/__init__.py | 2 +- native_client/ctcdecode/scorer.cpp | 12 +- .../DeepSpeechClient/Enums/ErrorCodes.cs | 33 --- .../dotnet/{DeepSpeech.sln => STT.sln} | 4 +- .../dotnet/STTClient/Enums/ErrorCodes.cs | 33 +++ .../Extensions/NativeExtensions.cs | 8 +- .../Interfaces/ISTT.cs} | 26 +- .../Models/CandidateTranscript.cs | 2 +- .../Models/Metadata.cs | 2 +- .../Models/Stream.cs} | 8 +- .../Models/TokenMetadata.cs | 2 +- .../NativeImp.cs | 54 ++--- .../DeepSpeech.cs => STTClient/STT.cs} | 92 ++++---- .../STTClient.csproj} | 0 .../Structs/CandidateTranscript.cs | 2 +- .../Structs/Metadata.cs | 2 +- .../Structs/TokenMetadata.cs | 2 +- .../App.config | 0 .../Program.cs | 8 +- .../Properties/AssemblyInfo.cs | 8 +- .../STTConsole.csproj} | 8 +- .../arctic_a0024.wav | Bin .../packages.config | 0 .../{DeepSpeechWPF => STTWPF}/.gitignore | 0 .../{DeepSpeechWPF => STTWPF}/App.config | 0 .../dotnet/{DeepSpeechWPF => STTWPF}/App.xaml | 4 +- .../{DeepSpeechWPF => STTWPF}/App.xaml.cs | 18 +- .../{DeepSpeechWPF => STTWPF}/MainWindow.xaml | 4 +- .../MainWindow.xaml.cs | 4 +- .../Properties/AssemblyInfo.cs | 8 +- .../Properties/Resources.Designer.cs | 4 +- .../Properties/Resources.resx | 0 .../Properties/Settings.Designer.cs | 2 +- .../Properties/Settings.settings | 0 .../STT.WPF.csproj} | 8 +- .../DeepSpeech.WPF.sln => STTWPF/STT.WPF.sln} | 4 +- .../ViewModels/BindableBase.cs | 2 +- .../ViewModels/MainWindowViewModel.cs | 16 +- .../{DeepSpeechWPF => STTWPF}/packages.config | 0 .../nupkg/{stt.nuspec.in => STT.spec.in} | 0 .../build/{DeepSpeech.targets => STT.targets} | 0 native_client/generate_scorer_package.cpp | 4 +- .../java/app/src/main/AndroidManifest.xml | 2 +- ...epSpeechActivity.java => STTActivity.java} | 10 +- ...ivity_deep_speech.xml => activity_stt.xml} | 2 +- native_client/java/jni/stt.i | 16 +- .../java/ai/coqui/libstt/test/BasicTest.java | 32 +-- .../{DeepSpeechModel.java => STTModel.java} | 30 +-- ...amingState.java => STTStreamingState.java} | 4 +- .../coqui/libstt_doc/CandidateTranscript.java | 2 +- .../java/ai/coqui/libstt_doc/Metadata.java | 2 +- ..._Error_Codes.java => STT_Error_Codes.java} | 18 +- .../ai/coqui/libstt_doc/TokenMetadata.java | 2 +- native_client/javascript/stt.i | 18 +- native_client/modelstate.cc | 2 +- native_client/modelstate.h | 2 +- native_client/python/impl.i | 22 +- native_client/stt.cc | 102 ++++---- native_client/stt_errors.cc | 4 +- .../deepspeech_ios/deepspeech_ios.modulemap | 12 - ...deepspeech-ios.podspec => stt-ios.podspec} | 6 +- .../project.pbxproj | 106 ++++----- .../contents.xcworkspacedata | 2 +- .../xcshareddata/IDEWorkspaceChecks.plist | 0 .../xcshareddata/xcschemes/stt_ios.xcscheme} | 18 +- .../contents.xcworkspacedata | 4 +- .../xcshareddata/IDEWorkspaceChecks.plist | 0 .../xcshareddata/WorkspaceSettings.xcsettings | 0 .../{deepspeech_ios => stt_ios}/Info.plist | 0 .../DeepSpeech.swift => stt_ios/STT.swift} | 222 +++++++++--------- .../deepspeech_ios.h => stt_ios/stt_ios.h} | 6 +- native_client/swift/stt_ios/stt_ios.modulemap | 12 + .../project.pbxproj | 152 ++++++------ .../contents.xcworkspacedata | 2 +- .../xcshareddata/IDEWorkspaceChecks.plist | 0 .../xcschemes/stt_ios_test.xcscheme} | 30 +-- .../AppDelegate.swift | 2 +- .../AppIcon.appiconset/Contents.json | 0 .../Assets.xcassets/Contents.json | 0 .../AudioContext.swift | 4 +- .../Base.lproj/LaunchScreen.storyboard | 0 .../ContentView.swift | 2 +- .../Info.plist | 0 .../Preview Assets.xcassets/Contents.json | 0 .../SceneDelegate.swift | 2 +- .../SpeechRecognitionImpl.swift | 16 +- .../Info.plist | 0 .../stt_ios_testTests.swift} | 8 +- .../Info.plist | 0 .../stt_ios_testUITests.swift} | 10 +- native_client/tflitemodelstate.cc | 24 +- native_client/tfmodelstate.cc | 24 +- requirements_eval_tflite.txt | 2 +- taskcluster/tc-netframework-ds-tests.sh | 8 +- tensorflow_full_runtime.supp | 30 +-- training/coqui_stt_training/train.py | 2 +- 112 files changed, 891 insertions(+), 891 deletions(-) rename .taskcluster.yml => .taskcluster.yml.disabled (100%) delete mode 100644 native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs rename native_client/dotnet/{DeepSpeech.sln => STT.sln} (79%) create mode 100644 native_client/dotnet/STTClient/Enums/ErrorCodes.cs rename native_client/dotnet/{DeepSpeechClient => STTClient}/Extensions/NativeExtensions.cs (95%) rename native_client/dotnet/{DeepSpeechClient/Interfaces/IDeepSpeech.cs => STTClient/Interfaces/ISTT.cs} (88%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Models/CandidateTranscript.cs (93%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Models/Metadata.cs (89%) rename native_client/dotnet/{DeepSpeechClient/Models/DeepSpeechStream.cs => STTClient/Models/Stream.cs} (81%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Models/TokenMetadata.cs (93%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/NativeImp.cs (59%) rename native_client/dotnet/{DeepSpeechClient/DeepSpeech.cs => STTClient/STT.cs} (75%) rename native_client/dotnet/{DeepSpeechClient/DeepSpeechClient.csproj => STTClient/STTClient.csproj} (100%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Structs/CandidateTranscript.cs (94%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Structs/Metadata.cs (92%) rename native_client/dotnet/{DeepSpeechClient => STTClient}/Structs/TokenMetadata.cs (93%) rename native_client/dotnet/{DeepSpeechConsole => STTConsole}/App.config (100%) rename native_client/dotnet/{DeepSpeechConsole => STTConsole}/Program.cs (96%) rename native_client/dotnet/{DeepSpeechConsole => STTConsole}/Properties/AssemblyInfo.cs (85%) rename native_client/dotnet/{DeepSpeechConsole/DeepSpeechConsole.csproj => STTConsole/STTConsole.csproj} (93%) rename native_client/dotnet/{DeepSpeechConsole => STTConsole}/arctic_a0024.wav (100%) rename native_client/dotnet/{DeepSpeechConsole => STTConsole}/packages.config (100%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/.gitignore (100%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/App.config (100%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/App.xaml (74%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/App.xaml.cs (61%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/MainWindow.xaml (98%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/MainWindow.xaml.cs (87%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/Properties/AssemblyInfo.cs (91%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/Properties/Resources.Designer.cs (94%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/Properties/Resources.resx (100%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/Properties/Settings.Designer.cs (96%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/Properties/Settings.settings (100%) rename native_client/dotnet/{DeepSpeechWPF/DeepSpeech.WPF.csproj => STTWPF/STT.WPF.csproj} (95%) rename native_client/dotnet/{DeepSpeechWPF/DeepSpeech.WPF.sln => STTWPF/STT.WPF.sln} (80%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/ViewModels/BindableBase.cs (98%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/ViewModels/MainWindowViewModel.cs (97%) rename native_client/dotnet/{DeepSpeechWPF => STTWPF}/packages.config (100%) rename native_client/dotnet/nupkg/{stt.nuspec.in => STT.spec.in} (100%) rename native_client/dotnet/nupkg/build/{DeepSpeech.targets => STT.targets} (100%) rename native_client/java/app/src/main/java/ai/coqui/sttexampleapp/{DeepSpeechActivity.java => STTActivity.java} (95%) rename native_client/java/app/src/main/res/layout/{activity_deep_speech.xml => activity_stt.xml} (99%) rename native_client/java/libstt/src/main/java/ai/coqui/libstt/{DeepSpeechModel.java => STTModel.java} (88%) rename native_client/java/libstt/src/main/java/ai/coqui/libstt/{DeepSpeechStreamingState.java => STTStreamingState.java} (60%) rename native_client/java/libstt/src/main/java/ai/coqui/libstt_doc/{DeepSpeech_Error_Codes.java => STT_Error_Codes.java} (76%) delete mode 100644 native_client/swift/deepspeech_ios/deepspeech_ios.modulemap rename native_client/swift/{deepspeech-ios.podspec => stt-ios.podspec} (77%) rename native_client/swift/{deepspeech_ios.xcodeproj => stt_ios.xcodeproj}/project.pbxproj (77%) rename native_client/swift/{deepspeech_ios.xcodeproj => stt_ios.xcodeproj}/project.xcworkspace/contents.xcworkspacedata (68%) rename native_client/swift/{deepspeech_ios.xcodeproj => stt_ios.xcodeproj}/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist (100%) rename native_client/swift/{deepspeech_ios.xcodeproj/xcshareddata/xcschemes/deepspeech_ios.xcscheme => stt_ios.xcodeproj/xcshareddata/xcschemes/stt_ios.xcscheme} (81%) rename native_client/swift/{deepspeech_ios.xcworkspace => stt_ios.xcworkspace}/contents.xcworkspacedata (55%) rename native_client/swift/{deepspeech_ios.xcworkspace => stt_ios.xcworkspace}/xcshareddata/IDEWorkspaceChecks.plist (100%) rename native_client/swift/{deepspeech_ios.xcworkspace => stt_ios.xcworkspace}/xcshareddata/WorkspaceSettings.xcsettings (100%) rename native_client/swift/{deepspeech_ios => stt_ios}/Info.plist (100%) rename native_client/swift/{deepspeech_ios/DeepSpeech.swift => stt_ios/STT.swift} (64%) rename native_client/swift/{deepspeech_ios/deepspeech_ios.h => stt_ios/stt_ios.h} (64%) create mode 100644 native_client/swift/stt_ios/stt_ios.modulemap rename native_client/swift/{deepspeech_ios_test.xcodeproj => stt_ios_test.xcodeproj}/project.pbxproj (76%) rename native_client/swift/{deepspeech_ios_test.xcodeproj => stt_ios_test.xcodeproj}/project.xcworkspace/contents.xcworkspacedata (66%) rename native_client/swift/{deepspeech_ios_test.xcodeproj => stt_ios_test.xcodeproj}/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist (100%) rename native_client/swift/{deepspeech_ios_test.xcodeproj/xcshareddata/xcschemes/deepspeech_ios_test.xcscheme => stt_ios_test.xcodeproj/xcshareddata/xcschemes/stt_ios_test.xcscheme} (75%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/AppDelegate.swift (98%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/Assets.xcassets/AppIcon.appiconset/Contents.json (100%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/Assets.xcassets/Contents.json (100%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/AudioContext.swift (98%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/Base.lproj/LaunchScreen.storyboard (100%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/ContentView.swift (98%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/Info.plist (100%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/Preview Content/Preview Assets.xcassets/Contents.json (100%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/SceneDelegate.swift (99%) rename native_client/swift/{deepspeech_ios_test => stt_ios_test}/SpeechRecognitionImpl.swift (95%) rename native_client/swift/{deepspeech_ios_testTests => stt_ios_testTests}/Info.plist (100%) rename native_client/swift/{deepspeech_ios_testTests/deepspeech_ios_testTests.swift => stt_ios_testTests/stt_ios_testTests.swift} (85%) rename native_client/swift/{deepspeech_ios_testUITests => stt_ios_testUITests}/Info.plist (100%) rename native_client/swift/{deepspeech_ios_testUITests/deepspeech_ios_testUITests.swift => stt_ios_testUITests/stt_ios_testUITests.swift} (88%) diff --git a/.taskcluster.yml b/.taskcluster.yml.disabled similarity index 100% rename from .taskcluster.yml rename to .taskcluster.yml.disabled diff --git a/bin/run-ldc93s1.sh b/bin/run-ldc93s1.sh index 3f635da5..d19722b0 100755 --- a/bin/run-ldc93s1.sh +++ b/bin/run-ldc93s1.sh @@ -13,7 +13,7 @@ fi; if [ -d "${COMPUTE_KEEP_DIR}" ]; then checkpoint_dir=$COMPUTE_KEEP_DIR else - checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("deepspeech/ldc93s1"))') + checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("stt/ldc93s1"))') fi # Force only one visible device because we have a single-sample dataset diff --git a/doc/BUILDING.rst b/doc/BUILDING.rst index fea38f40..3f0457d4 100644 --- a/doc/BUILDING.rst +++ b/doc/BUILDING.rst @@ -60,7 +60,7 @@ Compile Coqui STT ----------------- Compile ``libstt.so`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^ Within your TensorFlow directory, there should be a symbolic link to the 🐸STT ``native_client`` directory. If it is not present, create it with the follow command: @@ -238,8 +238,8 @@ Due to the discontinuation of Bintray JCenter we do not have pre-built Android p implementation 'stt.coqui.ai:libstt:VERSION@aar' -Building ``libstt.so`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Building ``libstt.so`` for Android +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can build the ``libstt.so`` using (ARMv7): @@ -254,7 +254,7 @@ Or (ARM64): bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so Building ``libstt.aar`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^ In the unlikely event you have to rebuild the JNI bindings, source code is available under the ``libstt`` subdirectory. Building depends on shared @@ -270,7 +270,7 @@ and adapt file naming (when missing, the error message should states what filename it expects and where). Building C++ ``stt`` binary -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^ Building the ``stt`` binary will happen through ``ndk-build`` (ARMv7): @@ -306,7 +306,7 @@ mono 16kHz 16-bits file and it might fail on some WAVE file that are not following exactly the specification. Running ``stt`` via adb -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^ You should use ``adb push`` to send data to device, please refer to Android documentation on how to use that. @@ -349,7 +349,7 @@ to leverage GPU / DSP / NPU * Hexagon, the Qualcomm-specific DSP This is highly experimental: -* Requires passing environment variable ``DS_TFLITE_DELEGATE`` with values of +* Requires passing environment variable ``STT_TFLITE_DELEGATE`` with values of ``gpu``, ``nnapi`` or ``hexagon`` (only one at a time) * Might require exported model changes (some Op might not be supported) * We can't guarantee it will work, nor it will be faster than default diff --git a/doc/C-API.rst b/doc/C-API.rst index d9c2da1d..b76c06b8 100644 --- a/doc/C-API.rst +++ b/doc/C-API.rst @@ -10,65 +10,65 @@ C API See also the list of error codes including descriptions for each error in :ref:`error-codes`. -.. doxygenfunction:: DS_CreateModel - :project: deepspeech-c +.. doxygenfunction:: STT_CreateModel + :project: stt-c -.. doxygenfunction:: DS_FreeModel - :project: deepspeech-c +.. doxygenfunction:: STT_FreeModel + :project: stt-c -.. doxygenfunction:: DS_EnableExternalScorer - :project: deepspeech-c +.. doxygenfunction:: STT_EnableExternalScorer + :project: stt-c -.. doxygenfunction:: DS_DisableExternalScorer - :project: deepspeech-c +.. doxygenfunction:: STT_DisableExternalScorer + :project: stt-c -.. doxygenfunction:: DS_AddHotWord - :project: deepspeech-c +.. doxygenfunction:: STT_AddHotWord + :project: stt-c -.. doxygenfunction:: DS_EraseHotWord - :project: deepspeech-c +.. doxygenfunction:: STT_EraseHotWord + :project: stt-c -.. doxygenfunction:: DS_ClearHotWords - :project: deepspeech-c +.. doxygenfunction:: STT_ClearHotWords + :project: stt-c -.. doxygenfunction:: DS_SetScorerAlphaBeta - :project: deepspeech-c +.. doxygenfunction:: STT_SetScorerAlphaBeta + :project: stt-c -.. doxygenfunction:: DS_GetModelSampleRate - :project: deepspeech-c +.. doxygenfunction:: STT_GetModelSampleRate + :project: stt-c -.. doxygenfunction:: DS_SpeechToText - :project: deepspeech-c +.. doxygenfunction:: STT_SpeechToText + :project: stt-c -.. doxygenfunction:: DS_SpeechToTextWithMetadata - :project: deepspeech-c +.. doxygenfunction:: STT_SpeechToTextWithMetadata + :project: stt-c -.. doxygenfunction:: DS_CreateStream - :project: deepspeech-c +.. doxygenfunction:: STT_CreateStream + :project: stt-c -.. doxygenfunction:: DS_FeedAudioContent - :project: deepspeech-c +.. doxygenfunction:: STT_FeedAudioContent + :project: stt-c -.. doxygenfunction:: DS_IntermediateDecode - :project: deepspeech-c +.. doxygenfunction:: STT_IntermediateDecode + :project: stt-c -.. doxygenfunction:: DS_IntermediateDecodeWithMetadata - :project: deepspeech-c +.. doxygenfunction:: STT_IntermediateDecodeWithMetadata + :project: stt-c -.. doxygenfunction:: DS_FinishStream - :project: deepspeech-c +.. doxygenfunction:: STT_FinishStream + :project: stt-c -.. doxygenfunction:: DS_FinishStreamWithMetadata - :project: deepspeech-c +.. doxygenfunction:: STT_FinishStreamWithMetadata + :project: stt-c -.. doxygenfunction:: DS_FreeStream - :project: deepspeech-c +.. doxygenfunction:: STT_FreeStream + :project: stt-c -.. doxygenfunction:: DS_FreeMetadata - :project: deepspeech-c +.. doxygenfunction:: STT_FreeMetadata + :project: stt-c -.. doxygenfunction:: DS_FreeString - :project: deepspeech-c +.. doxygenfunction:: STT_FreeString + :project: stt-c -.. doxygenfunction:: DS_Version - :project: deepspeech-c +.. doxygenfunction:: STT_Version + :project: stt-c diff --git a/doc/DotNet-API.rst b/doc/DotNet-API.rst index 92342ded..bba28896 100644 --- a/doc/DotNet-API.rst +++ b/doc/DotNet-API.rst @@ -2,18 +2,18 @@ ============== -DeepSpeech Class +STT Class ---------------- -.. doxygenclass:: DeepSpeechClient::DeepSpeech - :project: deepspeech-dotnet +.. doxygenclass:: STTClient::STT + :project: stt-dotnet :members: -DeepSpeechStream Class +Stream Class ---------------------- -.. doxygenclass:: DeepSpeechClient::Models::DeepSpeechStream - :project: deepspeech-dotnet +.. doxygenclass:: STTClient::Models::Stream + :project: stt-dotnet :members: ErrorCodes @@ -21,33 +21,33 @@ ErrorCodes See also the main definition including descriptions for each error in :ref:`error-codes`. -.. doxygenenum:: DeepSpeechClient::Enums::ErrorCodes - :project: deepspeech-dotnet +.. doxygenenum:: STTClient::Enums::ErrorCodes + :project: stt-dotnet Metadata -------- -.. doxygenclass:: DeepSpeechClient::Models::Metadata - :project: deepspeech-dotnet +.. doxygenclass:: STTClient::Models::Metadata + :project: stt-dotnet :members: Transcripts CandidateTranscript ------------------- -.. doxygenclass:: DeepSpeechClient::Models::CandidateTranscript - :project: deepspeech-dotnet +.. doxygenclass:: STTClient::Models::CandidateTranscript + :project: stt-dotnet :members: Tokens, Confidence TokenMetadata ------------- -.. doxygenclass:: DeepSpeechClient::Models::TokenMetadata - :project: deepspeech-dotnet +.. doxygenclass:: STTClient::Models::TokenMetadata + :project: stt-dotnet :members: Text, Timestep, StartTime -DeepSpeech Interface +STT Interface -------------------- -.. doxygeninterface:: DeepSpeechClient::Interfaces::IDeepSpeech - :project: deepspeech-dotnet +.. doxygeninterface:: STTClient::Interfaces::ISTT + :project: stt-dotnet :members: diff --git a/doc/DotNet-Examples.rst b/doc/DotNet-Examples.rst index a00ee833..beec6243 100644 --- a/doc/DotNet-Examples.rst +++ b/doc/DotNet-Examples.rst @@ -1,12 +1,12 @@ .NET API Usage example ====================== -Examples are from `native_client/dotnet/DeepSpeechConsole/Program.cs`. +Examples are from `native_client/dotnet/STTConsole/Program.cs`. Creating a model instance and loading model ------------------------------------------- -.. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs +.. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs :language: csharp :linenos: :lineno-match: @@ -16,7 +16,7 @@ Creating a model instance and loading model Performing inference -------------------- -.. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs +.. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs :language: csharp :linenos: :lineno-match: @@ -26,4 +26,4 @@ Performing inference Full source code ---------------- -See :download:`Full source code<../native_client/dotnet/DeepSpeechConsole/Program.cs>`. +See :download:`Full source code<../native_client/dotnet/STTConsole/Program.cs>`. diff --git a/doc/Java-API.rst b/doc/Java-API.rst index a61bd1b1..69603141 100644 --- a/doc/Java-API.rst +++ b/doc/Java-API.rst @@ -1,29 +1,29 @@ Java ==== -DeepSpeechModel +STTModel --------------- -.. doxygenclass:: org::deepspeech::libdeepspeech::DeepSpeechModel - :project: deepspeech-java +.. doxygenclass:: ai::coqui::libstt::STTModel + :project: stt-java :members: Metadata -------- -.. doxygenclass:: org::deepspeech::libdeepspeech::Metadata - :project: deepspeech-java +.. doxygenclass:: ai::coqui::libstt::Metadata + :project: stt-java :members: getNumTranscripts, getTranscript CandidateTranscript ------------------- -.. doxygenclass:: org::deepspeech::libdeepspeech::CandidateTranscript - :project: deepspeech-java +.. doxygenclass:: ai::coqui::libstt::CandidateTranscript + :project: stt-java :members: getNumTokens, getConfidence, getToken TokenMetadata ------------- -.. doxygenclass:: org::deepspeech::libdeepspeech::TokenMetadata - :project: deepspeech-java +.. doxygenclass:: ai::coqui::libstt::TokenMetadata + :project: stt-java :members: getText, getTimestep, getStartTime diff --git a/doc/Java-Examples.rst b/doc/Java-Examples.rst index 04836ed5..834354df 100644 --- a/doc/Java-Examples.rst +++ b/doc/Java-Examples.rst @@ -1,12 +1,12 @@ Java API Usage example ====================== -Examples are from `native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java`. +Examples are from `native_client/java/app/src/main/java/ai/coqui/STTActivity.java`. Creating a model instance and loading model ------------------------------------------- -.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java +.. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/STTActivity.java :language: java :linenos: :lineno-match: @@ -16,7 +16,7 @@ Creating a model instance and loading model Performing inference -------------------- -.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java +.. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/STTActivity.java :language: java :linenos: :lineno-match: @@ -26,4 +26,4 @@ Performing inference Full source code ---------------- -See :download:`Full source code<../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java>`. +See :download:`Full source code<../native_client/java/app/src/main/java/ai/coqui/STTActivity.java>`. diff --git a/doc/SUPPORTED_PLATFORMS.rst b/doc/SUPPORTED_PLATFORMS.rst index 1ccfb7e3..800d92f2 100644 --- a/doc/SUPPORTED_PLATFORMS.rst +++ b/doc/SUPPORTED_PLATFORMS.rst @@ -9,61 +9,61 @@ Linux / AMD64 without GPU ^^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) -* Full TensorFlow runtime (``deepspeech`` packages) -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* Full TensorFlow runtime (``stt`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Linux / AMD64 with GPU ^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) * CUDA 10.0 (and capable GPU) -* Full TensorFlow runtime (``deepspeech`` packages) -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* Full TensorFlow runtime (``stt`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Linux / ARMv7 ^^^^^^^^^^^^^ * Cortex-A53 compatible ARMv7 SoC with Neon support * Raspbian Buster-compatible distribution -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Linux / Aarch64 ^^^^^^^^^^^^^^^ * Cortex-A72 compatible Aarch64 SoC * ARMbian Buster-compatible distribution -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Android / ARMv7 ^^^^^^^^^^^^^^^ * ARMv7 SoC with Neon support * Android 7.0-10.0 * NDK API level >= 21 -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Android / Aarch64 ^^^^^^^^^^^^^^^^^ * Aarch64 SoC * Android 7.0-10.0 * NDK API level >= 21 -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) macOS / AMD64 ^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * macOS >= 10.10 -* Full TensorFlow runtime (``deepspeech`` packages) -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* Full TensorFlow runtime (``stt`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Windows / AMD64 without GPU ^^^^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Windows Server >= 2012 R2 ; Windows >= 8.1 -* Full TensorFlow runtime (``deepspeech`` packages) -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* Full TensorFlow runtime (``stt`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) Windows / AMD64 with GPU ^^^^^^^^^^^^^^^^^^^^^^^^ * x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) * Windows Server >= 2012 R2 ; Windows >= 8.1 * CUDA 10.0 (and capable GPU) -* Full TensorFlow runtime (``deepspeech`` packages) -* TensorFlow Lite runtime (``deepspeech-tflite`` packages) +* Full TensorFlow runtime (``stt`` packages) +* TensorFlow Lite runtime (``stt-tflite`` packages) diff --git a/doc/Structs.rst b/doc/Structs.rst index 5d532277..14869dd2 100644 --- a/doc/Structs.rst +++ b/doc/Structs.rst @@ -5,19 +5,19 @@ Metadata -------- .. doxygenstruct:: Metadata - :project: deepspeech-c + :project: stt-c :members: CandidateTranscript ------------------- .. doxygenstruct:: CandidateTranscript - :project: deepspeech-c + :project: stt-c :members: TokenMetadata ------------- .. doxygenstruct:: TokenMetadata - :project: deepspeech-c + :project: stt-c :members: diff --git a/doc/conf.py b/doc/conf.py index dc447452..92b315e9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -81,9 +81,9 @@ extensions = [ breathe_projects = { - "deepspeech-c": "xml-c/", - "deepspeech-java": "xml-java/", - "deepspeech-dotnet": "xml-dotnet/", + "stt-c": "xml-c/", + "stt-java": "xml-java/", + "stt-dotnet": "xml-dotnet/", } js_source_path = "../native_client/javascript/index.ts" diff --git a/ds_lib.supp b/ds_lib.supp index d7748e34..98f40177 100644 --- a/ds_lib.supp +++ b/ds_lib.supp @@ -5,6 +5,6 @@ fun:_Znwm fun:_ZN6tflite20DefaultErrorReporterEv fun:_ZN16TFLiteModelState4initEPKc - fun:DS_CreateModel + fun:STT_CreateModel fun:main } diff --git a/ds_openfst.supp b/ds_openfst.supp index 378659db..8cb96016 100644 --- a/ds_openfst.supp +++ b/ds_openfst.supp @@ -815,7 +815,7 @@ fun:_ZN6Scorer9load_trieERSt14basic_ifstreamIcSt11char_traitsIcEERKNSt7__cxx1112basic_stringIcS2_SaIcEEE fun:_ZN6Scorer7load_lmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE fun:_ZN6Scorer4initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERK8Alphabet - fun:DS_EnableExternalScorer + fun:STT_EnableExternalScorer fun:main } { @@ -831,7 +831,7 @@ fun:_ZN6Scorer9load_trieERSt14basic_ifstreamIcSt11char_traitsIcEERKNSt7__cxx1112basic_stringIcS2_SaIcEEE fun:_ZN6Scorer7load_lmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE fun:_ZN6Scorer4initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERK8Alphabet - fun:DS_EnableExternalScorer + fun:STT_EnableExternalScorer fun:main } { diff --git a/native_client/args.h b/native_client/args.h index 04c5eb88..30ed3181 100644 --- a/native_client/args.h +++ b/native_client/args.h @@ -64,9 +64,9 @@ void PrintHelp(const char* bin) "\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n" "\t--help\t\t\t\tShow help\n" "\t--version\t\t\tPrint version and exits\n"; - char* version = DS_Version(); + char* version = STT_Version(); std::cerr << "Coqui STT " << version << "\n"; - DS_FreeString(version); + STT_FreeString(version); exit(1); } @@ -169,9 +169,9 @@ bool ProcessArgs(int argc, char** argv) } if (has_versions) { - char* version = DS_Version(); + char* version = STT_Version(); std::cout << "Coqui " << version << "\n"; - DS_FreeString(version); + STT_FreeString(version); return false; } diff --git a/native_client/client.cc b/native_client/client.cc index 70c199e7..93afa555 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -168,17 +168,17 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, // sphinx-doc: c_ref_inference_start if (extended_output) { - Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); + Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); res.string = CandidateTranscriptToString(&result->transcripts[0]); - DS_FreeMetadata(result); + STT_FreeMetadata(result); } else if (json_output) { - Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts); + Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts); res.string = MetadataToJSON(result); - DS_FreeMetadata(result); + STT_FreeMetadata(result); } else if (stream_size > 0) { StreamingState* ctx; - int status = DS_CreateStream(aCtx, &ctx); - if (status != DS_ERR_OK) { + int status = STT_CreateStream(aCtx, &ctx); + if (status != STT_ERR_OK) { res.string = strdup(""); return res; } @@ -187,28 +187,28 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, const char *prev = nullptr; while (off < aBufferSize) { size_t cur = aBufferSize - off > stream_size ? stream_size : aBufferSize - off; - DS_FeedAudioContent(ctx, aBuffer + off, cur); + STT_FeedAudioContent(ctx, aBuffer + off, cur); off += cur; prev = last; - const char* partial = DS_IntermediateDecode(ctx); + const char* partial = STT_IntermediateDecode(ctx); if (last == nullptr || strcmp(last, partial)) { printf("%s\n", partial); last = partial; } else { - DS_FreeString((char *) partial); + STT_FreeString((char *) partial); } if (prev != nullptr && prev != last) { - DS_FreeString((char *) prev); + STT_FreeString((char *) prev); } } if (last != nullptr) { - DS_FreeString((char *) last); + STT_FreeString((char *) last); } - res.string = DS_FinishStream(ctx); + res.string = STT_FinishStream(ctx); } else if (extended_stream_size > 0) { StreamingState* ctx; - int status = DS_CreateStream(aCtx, &ctx); - if (status != DS_ERR_OK) { + int status = STT_CreateStream(aCtx, &ctx); + if (status != STT_ERR_OK) { res.string = strdup(""); return res; } @@ -217,10 +217,10 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, const char *prev = nullptr; while (off < aBufferSize) { size_t cur = aBufferSize - off > extended_stream_size ? extended_stream_size : aBufferSize - off; - DS_FeedAudioContent(ctx, aBuffer + off, cur); + STT_FeedAudioContent(ctx, aBuffer + off, cur); off += cur; prev = last; - const Metadata* result = DS_IntermediateDecodeWithMetadata(ctx, 1); + const Metadata* result = STT_IntermediateDecodeWithMetadata(ctx, 1); const char* partial = CandidateTranscriptToString(&result->transcripts[0]); if (last == nullptr || strcmp(last, partial)) { printf("%s\n", partial); @@ -231,14 +231,14 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, if (prev != nullptr && prev != last) { free((char *) prev); } - DS_FreeMetadata((Metadata *)result); + STT_FreeMetadata((Metadata *)result); } - const Metadata* result = DS_FinishStreamWithMetadata(ctx, 1); + const Metadata* result = STT_FinishStreamWithMetadata(ctx, 1); res.string = CandidateTranscriptToString(&result->transcripts[0]); - DS_FreeMetadata((Metadata *)result); + STT_FreeMetadata((Metadata *)result); free((char *) last); } else { - res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize); + res.string = STT_SpeechToText(aCtx, aBuffer, aBufferSize); } // sphinx-doc: c_ref_inference_stop @@ -404,7 +404,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate) void ProcessFile(ModelState* context, const char* path, bool show_times) { - ds_audio_buffer audio = GetAudioBuffer(path, DS_GetModelSampleRate(context)); + ds_audio_buffer audio = GetAudioBuffer(path, STT_GetModelSampleRate(context)); // Pass audio to STT // We take half of buffer_size because buffer is a char* while @@ -418,7 +418,7 @@ ProcessFile(ModelState* context, const char* path, bool show_times) if (result.string) { printf("%s\n", result.string); - DS_FreeString((char*)result.string); + STT_FreeString((char*)result.string); } if (show_times) { @@ -453,16 +453,16 @@ main(int argc, char **argv) // Initialise STT ModelState* ctx; // sphinx-doc: c_ref_model_start - int status = DS_CreateModel(model, &ctx); + int status = STT_CreateModel(model, &ctx); if (status != 0) { - char* error = DS_ErrorCodeToErrorMessage(status); + char* error = STT_ErrorCodeToErrorMessage(status); fprintf(stderr, "Could not create model: %s\n", error); free(error); return 1; } if (set_beamwidth) { - status = DS_SetModelBeamWidth(ctx, beam_width); + status = STT_SetModelBeamWidth(ctx, beam_width); if (status != 0) { fprintf(stderr, "Could not set model beam width.\n"); return 1; @@ -470,13 +470,13 @@ main(int argc, char **argv) } if (scorer) { - status = DS_EnableExternalScorer(ctx, scorer); + status = STT_EnableExternalScorer(ctx, scorer); if (status != 0) { fprintf(stderr, "Could not enable external scorer.\n"); return 1; } if (set_alphabeta) { - status = DS_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta); + status = STT_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta); if (status != 0) { fprintf(stderr, "Error setting scorer alpha and beta.\n"); return 1; @@ -494,7 +494,7 @@ main(int argc, char **argv) // so, check the boost string before we turn it into a float bool boost_is_valid = (pair_[1].find_first_not_of("-.0123456789") == std::string::npos); float boost = strtof((pair_[1]).c_str(),0); - status = DS_AddHotWord(ctx, word, boost); + status = STT_AddHotWord(ctx, word, boost); if (status != 0 || !boost_is_valid) { fprintf(stderr, "Could not enable hot-word.\n"); return 1; @@ -555,7 +555,7 @@ main(int argc, char **argv) sox_quit(); #endif // NO_SOX - DS_FreeModel(ctx); + STT_FreeModel(ctx); return 0; } diff --git a/native_client/coqui-stt.h b/native_client/coqui-stt.h index 24c7ef66..7794bc79 100644 --- a/native_client/coqui-stt.h +++ b/native_client/coqui-stt.h @@ -61,37 +61,37 @@ typedef struct Metadata { // sphinx-doc: error_code_listing_start -#define DS_FOR_EACH_ERROR(APPLY) \ - APPLY(DS_ERR_OK, 0x0000, "No error.") \ - APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ - APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ - APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ - APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ - APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ - APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ - APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ - APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ - APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ - APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ - APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ - APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ - APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ - APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ - APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ - APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ - APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ - APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ - APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \ - APPLY(DS_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \ - APPLY(DS_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \ - APPLY(DS_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.") +#define STT_FOR_EACH_ERROR(APPLY) \ + APPLY(STT_ERR_OK, 0x0000, "No error.") \ + APPLY(STT_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(STT_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(STT_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(STT_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(STT_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(STT_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(STT_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(STT_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(STT_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(STT_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(STT_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(STT_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(STT_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(STT_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(STT_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(STT_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(STT_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(STT_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(STT_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \ + APPLY(STT_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \ + APPLY(STT_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \ + APPLY(STT_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.") // sphinx-doc: error_code_listing_end -enum DeepSpeech_Error_Codes +enum STT_Error_Codes { #define DEFINE(NAME, VALUE, DESC) NAME = VALUE, -DS_FOR_EACH_ERROR(DEFINE) +STT_FOR_EACH_ERROR(DEFINE) #undef DEFINE }; @@ -104,49 +104,49 @@ DS_FOR_EACH_ERROR(DEFINE) * @return Zero on success, non-zero on failure. */ STT_EXPORT -int DS_CreateModel(const char* aModelPath, +int STT_CreateModel(const char* aModelPath, ModelState** retval); /** - * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + * @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth} * was not called before, will return the default value loaded from the * model file. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * * @return Beam width value used by the model. */ STT_EXPORT -unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); +unsigned int STT_GetModelBeamWidth(const ModelState* aCtx); /** * @brief Set beam width value used by the model. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * @param aBeamWidth The beam width used by the model. A larger beam width value * generates better results at the cost of decoding time. * * @return Zero on success, non-zero on failure. */ STT_EXPORT -int DS_SetModelBeamWidth(ModelState* aCtx, +int STT_SetModelBeamWidth(ModelState* aCtx, unsigned int aBeamWidth); /** * @brief Return the sample rate expected by a model. * - * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aCtx A ModelState pointer created with {@link STT_CreateModel}. * * @return Sample rate expected by the model for its input. */ STT_EXPORT -int DS_GetModelSampleRate(const ModelState* aCtx); +int STT_GetModelSampleRate(const ModelState* aCtx); /** * @brief Frees associated resources and destroys model object. */ STT_EXPORT -void DS_FreeModel(ModelState* ctx); +void STT_FreeModel(ModelState* ctx); /** * @brief Enable decoding using an external scorer. @@ -157,7 +157,7 @@ void DS_FreeModel(ModelState* ctx); * @return Zero on success, non-zero on failure (invalid arguments). */ STT_EXPORT -int DS_EnableExternalScorer(ModelState* aCtx, +int STT_EnableExternalScorer(ModelState* aCtx, const char* aScorerPath); /** @@ -172,7 +172,7 @@ int DS_EnableExternalScorer(ModelState* aCtx, * @return Zero on success, non-zero on failure (invalid arguments). */ STT_EXPORT -int DS_AddHotWord(ModelState* aCtx, +int STT_AddHotWord(ModelState* aCtx, const char* word, float boost); @@ -185,7 +185,7 @@ int DS_AddHotWord(ModelState* aCtx, * @return Zero on success, non-zero on failure (invalid arguments). */ STT_EXPORT -int DS_EraseHotWord(ModelState* aCtx, +int STT_EraseHotWord(ModelState* aCtx, const char* word); /** @@ -196,7 +196,7 @@ int DS_EraseHotWord(ModelState* aCtx, * @return Zero on success, non-zero on failure (invalid arguments). */ STT_EXPORT -int DS_ClearHotWords(ModelState* aCtx); +int STT_ClearHotWords(ModelState* aCtx); /** * @brief Disable decoding using an external scorer. @@ -206,7 +206,7 @@ int DS_ClearHotWords(ModelState* aCtx); * @return Zero on success, non-zero on failure. */ STT_EXPORT -int DS_DisableExternalScorer(ModelState* aCtx); +int STT_DisableExternalScorer(ModelState* aCtx); /** * @brief Set hyperparameters alpha and beta of the external scorer. @@ -218,7 +218,7 @@ int DS_DisableExternalScorer(ModelState* aCtx); * @return Zero on success, non-zero on failure. */ STT_EXPORT -int DS_SetScorerAlphaBeta(ModelState* aCtx, +int STT_SetScorerAlphaBeta(ModelState* aCtx, float aAlpha, float aBeta); @@ -231,10 +231,10 @@ int DS_SetScorerAlphaBeta(ModelState* aCtx, * @param aBufferSize The number of samples in the audio signal. * * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. Returns NULL on error. + * {@link STT_FreeString()}. Returns NULL on error. */ STT_EXPORT -char* DS_SpeechToText(ModelState* aCtx, +char* STT_SpeechToText(ModelState* aCtx, const short* aBuffer, unsigned int aBufferSize); @@ -250,19 +250,19 @@ char* DS_SpeechToText(ModelState* aCtx, * * @return Metadata struct containing multiple CandidateTranscript structs. Each * transcript has per-token metadata including timing information. The - * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * user is responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. */ STT_EXPORT -Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, +Metadata* STT_SpeechToTextWithMetadata(ModelState* aCtx, const short* aBuffer, unsigned int aBufferSize, unsigned int aNumResults); /** * @brief Create a new streaming inference state. The streaming state returned - * by this function can then be passed to {@link DS_FeedAudioContent()} - * and {@link DS_FinishStream()}. + * by this function can then be passed to {@link STT_FeedAudioContent()} + * and {@link STT_FinishStream()}. * * @param aCtx The ModelState pointer for the model to use. * @param[out] retval an opaque pointer that represents the streaming state. Can @@ -271,80 +271,80 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, * @return Zero for success, non-zero on failure. */ STT_EXPORT -int DS_CreateStream(ModelState* aCtx, +int STT_CreateStream(ModelState* aCtx, StreamingState** retval); /** * @brief Feed audio samples to an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aBuffer An array of 16-bit, mono raw audio samples at the * appropriate sample rate (matching what the model was trained on). * @param aBufferSize The number of samples in @p aBuffer. */ STT_EXPORT -void DS_FeedAudioContent(StreamingState* aSctx, +void STT_FeedAudioContent(StreamingState* aSctx, const short* aBuffer, unsigned int aBufferSize); /** * @brief Compute the intermediate decoding of an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @return The STT intermediate result. The user is responsible for freeing the - * string using {@link DS_FreeString()}. + * string using {@link STT_FreeString()}. */ STT_EXPORT -char* DS_IntermediateDecode(const StreamingState* aSctx); +char* STT_IntermediateDecode(const StreamingState* aSctx); /** * @brief Compute the intermediate decoding of an ongoing streaming inference, * return results including metadata. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aNumResults The number of candidate transcripts to return. * * @return Metadata struct containing multiple candidate transcripts. Each transcript * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. */ STT_EXPORT -Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, +Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx, unsigned int aNumResults); /** * @brief Compute the final decoding of an ongoing streaming inference and return * the result. Signals the end of an ongoing streaming inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @return The STT result. The user is responsible for freeing the string using - * {@link DS_FreeString()}. + * {@link STT_FreeString()}. * * @note This method will free the state pointer (@p aSctx). */ STT_EXPORT -char* DS_FinishStream(StreamingState* aSctx); +char* STT_FinishStream(StreamingState* aSctx); /** * @brief Compute the final decoding of an ongoing streaming inference and return * results including metadata. Signals the end of an ongoing streaming * inference. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * @param aNumResults The number of candidate transcripts to return. * * @return Metadata struct containing multiple candidate transcripts. Each transcript * has per-token metadata including timing information. The user is - * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * responsible for freeing Metadata by calling {@link STT_FreeMetadata()}. * Returns NULL on error. * * @note This method will free the state pointer (@p aSctx). */ STT_EXPORT -Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, +Metadata* STT_FinishStreamWithMetadata(StreamingState* aSctx, unsigned int aNumResults); /** @@ -352,42 +352,42 @@ Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, * can be used if you no longer need the result of an ongoing streaming * inference and don't want to perform a costly decode operation. * - * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}. * * @note This method will free the state pointer (@p aSctx). */ STT_EXPORT -void DS_FreeStream(StreamingState* aSctx); +void STT_FreeStream(StreamingState* aSctx); /** * @brief Free memory allocated for metadata information. */ STT_EXPORT -void DS_FreeMetadata(Metadata* m); +void STT_FreeMetadata(Metadata* m); /** * @brief Free a char* string returned by the Coqui STT API. */ STT_EXPORT -void DS_FreeString(char* str); +void STT_FreeString(char* str); /** * @brief Returns the version of this library. The returned version is a semantic - * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + * version (SemVer 2.0.0). The string returned must be freed with {@link STT_FreeString()}. * * @return The version string. */ STT_EXPORT -char* DS_Version(); +char* STT_Version(); /** * @brief Returns a textual description corresponding to an error code. - * The string returned must be freed with @{link DS_FreeString()}. + * The string returned must be freed with @{link STT_FreeString()}. * * @return The error description. */ STT_EXPORT -char* DS_ErrorCodeToErrorMessage(int aErrorCode); +char* STT_ErrorCodeToErrorMessage(int aErrorCode); #undef STT_EXPORT diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 80edc51d..fc8f3255 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -9,7 +9,7 @@ __version__ = swigwrapper.__version__.decode('utf-8') # Hack: import error codes by matching on their names, as SWIG unfortunately # does not support binding enums to Python in a scoped manner yet. for symbol in dir(swigwrapper): - if symbol.startswith('DS_ERR_'): + if symbol.startswith('STT_ERR_'): globals()[symbol] = getattr(swigwrapper, symbol) class Scorer(swigwrapper.Scorer): diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index b77c63f7..e5c6c359 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path) // Check if file is readable to avoid KenLM throwing an exception const char* filename = lm_path.c_str(); if (access(filename, R_OK) != 0) { - return DS_ERR_SCORER_UNREADABLE; + return STT_ERR_SCORER_UNREADABLE; } // Check if the file format is valid to avoid KenLM throwing an exception lm::ngram::ModelType model_type; if (!lm::ngram::RecognizeBinary(filename, model_type)) { - return DS_ERR_SCORER_INVALID_LM; + return STT_ERR_SCORER_INVALID_LM; } // Load the LM @@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path) uint64_t trie_offset = language_model_->GetEndOfSearchOffset(); if (package_size <= trie_offset) { // File ends without a trie structure - return DS_ERR_SCORER_NO_TRIE; + return STT_ERR_SCORER_NO_TRIE; } // Read metadata and trie from file @@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) if (magic != MAGIC) { std::cerr << "Error: Can't parse scorer file, invalid header. Try updating " "your scorer file." << std::endl; - return DS_ERR_SCORER_INVALID_TRIE; + return STT_ERR_SCORER_INVALID_TRIE; } int version; @@ -128,7 +128,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) std::cerr << "Downgrade your scorer file or update your version of Coqui STT."; } std::cerr << std::endl; - return DS_ERR_SCORER_VERSION_MISMATCH; + return STT_ERR_SCORER_VERSION_MISMATCH; } fin.read(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); @@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) opt.mode = fst::FstReadOptions::MAP; opt.source = file_path; dictionary.reset(FstType::Read(fin, opt)); - return DS_ERR_OK; + return STT_ERR_OK; } bool Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs deleted file mode 100644 index cbcb8f43..00000000 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ /dev/null @@ -1,33 +0,0 @@ -namespace DeepSpeechClient.Enums -{ - /// - /// Error codes from the native DeepSpeech binary. - /// - internal enum ErrorCodes - { - // OK - DS_ERR_OK = 0x0000, - - // Missing invormations - DS_ERR_NO_MODEL = 0x1000, - - // Invalid parameters - DS_ERR_INVALID_ALPHABET = 0x2000, - DS_ERR_INVALID_SHAPE = 0x2001, - DS_ERR_INVALID_SCORER = 0x2002, - DS_ERR_MODEL_INCOMPATIBLE = 0x2003, - DS_ERR_SCORER_NOT_ENABLED = 0x2004, - - // Runtime failures - DS_ERR_FAIL_INIT_MMAP = 0x3000, - DS_ERR_FAIL_INIT_SESS = 0x3001, - DS_ERR_FAIL_INTERPRETER = 0x3002, - DS_ERR_FAIL_RUN_SESS = 0x3003, - DS_ERR_FAIL_CREATE_STREAM = 0x3004, - DS_ERR_FAIL_READ_PROTOBUF = 0x3005, - DS_ERR_FAIL_CREATE_SESS = 0x3006, - DS_ERR_FAIL_INSERT_HOTWORD = 0x3008, - DS_ERR_FAIL_CLEAR_HOTWORD = 0x3009, - DS_ERR_FAIL_ERASE_HOTWORD = 0x3010 - } -} diff --git a/native_client/dotnet/DeepSpeech.sln b/native_client/dotnet/STT.sln similarity index 79% rename from native_client/dotnet/DeepSpeech.sln rename to native_client/dotnet/STT.sln index 78afe7db..58fd6c8e 100644 --- a/native_client/dotnet/DeepSpeech.sln +++ b/native_client/dotnet/STT.sln @@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.30204.135 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "STTClient", "STTClient\STTClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STTConsole", "STTConsole\STTConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/STTClient/Enums/ErrorCodes.cs b/native_client/dotnet/STTClient/Enums/ErrorCodes.cs new file mode 100644 index 00000000..b3e76456 --- /dev/null +++ b/native_client/dotnet/STTClient/Enums/ErrorCodes.cs @@ -0,0 +1,33 @@ +namespace STTClient.Enums +{ + /// + /// Error codes from the native Coqui STT binary. + /// + internal enum ErrorCodes + { + // OK + STT_ERR_OK = 0x0000, + + // Missing invormations + STT_ERR_NO_MODEL = 0x1000, + + // Invalid parameters + STT_ERR_INVALID_ALPHABET = 0x2000, + STT_ERR_INVALID_SHAPE = 0x2001, + STT_ERR_INVALID_SCORER = 0x2002, + STT_ERR_MODEL_INCOMPATIBLE = 0x2003, + STT_ERR_SCORER_NOT_ENABLED = 0x2004, + + // Runtime failures + STT_ERR_FAIL_INIT_MMAP = 0x3000, + STT_ERR_FAIL_INIT_SESS = 0x3001, + STT_ERR_FAIL_INTERPRETER = 0x3002, + STT_ERR_FAIL_RUN_SESS = 0x3003, + STT_ERR_FAIL_CREATE_STREAM = 0x3004, + STT_ERR_FAIL_READ_PROTOBUF = 0x3005, + STT_ERR_FAIL_CREATE_SESS = 0x3006, + STT_ERR_FAIL_INSERT_HOTWORD = 0x3008, + STT_ERR_FAIL_CLEAR_HOTWORD = 0x3009, + STT_ERR_FAIL_ERASE_HOTWORD = 0x3010 + } +} diff --git a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs b/native_client/dotnet/STTClient/Extensions/NativeExtensions.cs similarity index 95% rename from native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs rename to native_client/dotnet/STTClient/Extensions/NativeExtensions.cs index 9325f4b8..297a311d 100644 --- a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs +++ b/native_client/dotnet/STTClient/Extensions/NativeExtensions.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Structs; +using STTClient.Structs; using System; using System.Runtime.InteropServices; using System.Text; -namespace DeepSpeechClient.Extensions +namespace STTClient.Extensions { internal static class NativeExtensions { @@ -20,7 +20,7 @@ namespace DeepSpeechClient.Extensions byte[] buffer = new byte[len]; Marshal.Copy(intPtr, buffer, 0, buffer.Length); if (releasePtr) - NativeImp.DS_FreeString(intPtr); + NativeImp.STT_FreeString(intPtr); string result = Encoding.UTF8.GetString(buffer); return result; } @@ -86,7 +86,7 @@ namespace DeepSpeechClient.Extensions metadata.transcripts += sizeOfCandidateTranscript; } - NativeImp.DS_FreeMetadata(intPtr); + NativeImp.STT_FreeMetadata(intPtr); return managedMetadata; } } diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/STTClient/Interfaces/ISTT.cs similarity index 88% rename from native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs rename to native_client/dotnet/STTClient/Interfaces/ISTT.cs index fca21a57..7486796d 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/STTClient/Interfaces/ISTT.cs @@ -1,13 +1,13 @@ -using DeepSpeechClient.Models; +using STTClient.Models; using System; using System.IO; -namespace DeepSpeechClient.Interfaces +namespace STTClient.Interfaces { /// - /// Client interface for DeepSpeech + /// Client interface for Coqui STT /// - public interface IDeepSpeech : IDisposable + public interface ISTT : IDisposable { /// /// Return version of this library. The returned version is a semantic version @@ -80,7 +80,7 @@ namespace DeepSpeechClient.Interfaces unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -89,7 +89,7 @@ namespace DeepSpeechClient.Interfaces uint aBufferSize); /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -104,26 +104,26 @@ namespace DeepSpeechClient.Interfaces /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - unsafe void FreeStream(DeepSpeechStream stream); + unsafe void FreeStream(Stream stream); /// /// Creates a new streaming inference state. /// - unsafe DeepSpeechStream CreateStream(); + unsafe Stream CreateStream(); /// /// Feeds audio samples to an ongoing streaming inference. /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize); + unsafe void FeedAudioContent(Stream stream, short[] aBuffer, uint aBufferSize); /// /// Computes the intermediate decoding of an ongoing streaming inference. /// /// Instance of the stream to decode. /// The STT intermediate result. - unsafe string IntermediateDecode(DeepSpeechStream stream); + unsafe string IntermediateDecode(Stream stream); /// /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. @@ -131,14 +131,14 @@ namespace DeepSpeechClient.Interfaces /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata IntermediateDecodeWithMetadata(Stream stream, uint aNumResults); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// /// Instance of the stream to finish. /// The STT result. - unsafe string FinishStream(DeepSpeechStream stream); + unsafe string FinishStream(Stream stream); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. @@ -146,6 +146,6 @@ namespace DeepSpeechClient.Interfaces /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata FinishStreamWithMetadata(Stream stream, uint aNumResults); } } diff --git a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs b/native_client/dotnet/STTClient/Models/CandidateTranscript.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs rename to native_client/dotnet/STTClient/Models/CandidateTranscript.cs index cc6b5d28..f158e2c2 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs +++ b/native_client/dotnet/STTClient/Models/CandidateTranscript.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace STTClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs b/native_client/dotnet/STTClient/Models/Metadata.cs similarity index 89% rename from native_client/dotnet/DeepSpeechClient/Models/Metadata.cs rename to native_client/dotnet/STTClient/Models/Metadata.cs index fb6c613d..537a22e8 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs +++ b/native_client/dotnet/STTClient/Models/Metadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace STTClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs b/native_client/dotnet/STTClient/Models/Stream.cs similarity index 81% rename from native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs rename to native_client/dotnet/STTClient/Models/Stream.cs index e4605f5e..49f92dfa 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs +++ b/native_client/dotnet/STTClient/Models/Stream.cs @@ -1,19 +1,19 @@ using System; -namespace DeepSpeechClient.Models +namespace STTClient.Models { /// /// Wrapper of the pointer used for the decoding stream. /// - public class DeepSpeechStream : IDisposable + public class Stream : IDisposable { private unsafe IntPtr** _streamingStatePp; /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// Native pointer of the native stream. - public unsafe DeepSpeechStream(IntPtr** streamingStatePP) + public unsafe Stream(IntPtr** streamingStatePP) { _streamingStatePp = streamingStatePP; } diff --git a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs b/native_client/dotnet/STTClient/Models/TokenMetadata.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs rename to native_client/dotnet/STTClient/Models/TokenMetadata.cs index 5f2dea56..c5ef94d8 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs +++ b/native_client/dotnet/STTClient/Models/TokenMetadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace STTClient.Models { /// /// Stores each individual character, along with its timing information. diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/STTClient/NativeImp.cs similarity index 59% rename from native_client/dotnet/DeepSpeechClient/NativeImp.cs rename to native_client/dotnet/STTClient/NativeImp.cs index 49532360..a3491171 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/STTClient/NativeImp.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Enums; +using STTClient.Enums; using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient +namespace STTClient { /// /// Wrapper for the native implementation of "libstt.so" @@ -13,101 +13,101 @@ namespace DeepSpeechClient #region Native Implementation [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static extern IntPtr DS_Version(); + internal static extern IntPtr STT_Version(); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, + internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath, ref IntPtr** pint); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern IntPtr DS_ErrorCodeToErrorMessage(int aErrorCode); + internal unsafe static extern IntPtr STT_ErrorCodeToErrorMessage(int aErrorCode); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern uint DS_GetModelBeamWidth(IntPtr** aCtx); + internal unsafe static extern uint STT_GetModelBeamWidth(IntPtr** aCtx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_SetModelBeamWidth(IntPtr** aCtx, + internal unsafe static extern ErrorCodes STT_SetModelBeamWidth(IntPtr** aCtx, uint aBeamWidth); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, + internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath, uint aBeamWidth, ref IntPtr** pint); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal unsafe static extern int DS_GetModelSampleRate(IntPtr** aCtx); + internal unsafe static extern int STT_GetModelSampleRate(IntPtr** aCtx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_EnableExternalScorer(IntPtr** aCtx, + internal static unsafe extern ErrorCodes STT_EnableExternalScorer(IntPtr** aCtx, string aScorerPath); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_AddHotWord(IntPtr** aCtx, + internal static unsafe extern ErrorCodes STT_AddHotWord(IntPtr** aCtx, string aWord, float aBoost); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_EraseHotWord(IntPtr** aCtx, + internal static unsafe extern ErrorCodes STT_EraseHotWord(IntPtr** aCtx, string aWord); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_ClearHotWords(IntPtr** aCtx); + internal static unsafe extern ErrorCodes STT_ClearHotWords(IntPtr** aCtx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_DisableExternalScorer(IntPtr** aCtx); + internal static unsafe extern ErrorCodes STT_DisableExternalScorer(IntPtr** aCtx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_SetScorerAlphaBeta(IntPtr** aCtx, + internal static unsafe extern ErrorCodes STT_SetScorerAlphaBeta(IntPtr** aCtx, float aAlpha, float aBeta); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx, + internal static unsafe extern IntPtr STT_SpeechToText(IntPtr** aCtx, short[] aBuffer, uint aBufferSize); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)] - internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx, + internal static unsafe extern IntPtr STT_SpeechToTextWithMetadata(IntPtr** aCtx, short[] aBuffer, uint aBufferSize, uint aNumResults); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeModel(IntPtr** aCtx); + internal static unsafe extern void STT_FreeModel(IntPtr** aCtx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx, + internal static unsafe extern ErrorCodes STT_CreateStream(IntPtr** aCtx, ref IntPtr** retval); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeStream(IntPtr** aSctx); + internal static unsafe extern void STT_FreeStream(IntPtr** aSctx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeMetadata(IntPtr metadata); + internal static unsafe extern void STT_FreeMetadata(IntPtr metadata); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern void DS_FreeString(IntPtr str); + internal static unsafe extern void STT_FreeString(IntPtr str); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern void DS_FeedAudioContent(IntPtr** aSctx, + internal static unsafe extern void STT_FeedAudioContent(IntPtr** aSctx, short[] aBuffer, uint aBufferSize); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_IntermediateDecode(IntPtr** aSctx); + internal static unsafe extern IntPtr STT_IntermediateDecode(IntPtr** aSctx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_IntermediateDecodeWithMetadata(IntPtr** aSctx, + internal static unsafe extern IntPtr STT_IntermediateDecodeWithMetadata(IntPtr** aSctx, uint aNumResults); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, CharSet = CharSet.Ansi, SetLastError = true)] - internal static unsafe extern IntPtr DS_FinishStream(IntPtr** aSctx); + internal static unsafe extern IntPtr STT_FinishStream(IntPtr** aSctx); [DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)] - internal static unsafe extern IntPtr DS_FinishStreamWithMetadata(IntPtr** aSctx, + internal static unsafe extern IntPtr STT_FinishStreamWithMetadata(IntPtr** aSctx, uint aNumResults); #endregion } diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/STTClient/STT.cs similarity index 75% rename from native_client/dotnet/DeepSpeechClient/DeepSpeech.cs rename to native_client/dotnet/STTClient/STT.cs index 79b276c2..60eeda9f 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/STTClient/STT.cs @@ -1,34 +1,34 @@ -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Extensions; +using STTClient.Interfaces; +using STTClient.Extensions; using System; using System.IO; -using DeepSpeechClient.Enums; -using DeepSpeechClient.Models; +using STTClient.Enums; +using STTClient.Models; -namespace DeepSpeechClient +namespace STTClient { /// - /// Concrete implementation of . + /// Concrete implementation of . /// - public class DeepSpeech : IDeepSpeech + public class STT : ISTT { private unsafe IntPtr** _modelStatePP; - + /// - /// Initializes a new instance of class and creates a new acoustic model. + /// Initializes a new instance of class and creates a new acoustic model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. - public DeepSpeech(string aModelPath) + public STT(string aModelPath) { CreateModel(aModelPath); } - #region IDeepSpeech + #region ISTT /// - /// Create an object providing an interface to a trained DeepSpeech model. + /// Create an object providing an interface to a trained STT model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. @@ -48,7 +48,7 @@ namespace DeepSpeechClient { throw new FileNotFoundException(exceptionMessage); } - var resultCode = NativeImp.DS_CreateModel(aModelPath, + var resultCode = NativeImp.STT_CreateModel(aModelPath, ref _modelStatePP); EvaluateResultCode(resultCode); } @@ -60,7 +60,7 @@ namespace DeepSpeechClient /// Beam width value used by the model. public unsafe uint GetModelBeamWidth() { - return NativeImp.DS_GetModelBeamWidth(_modelStatePP); + return NativeImp.STT_GetModelBeamWidth(_modelStatePP); } /// @@ -70,13 +70,13 @@ namespace DeepSpeechClient /// Thrown on failure. public unsafe void SetModelBeamWidth(uint aBeamWidth) { - var resultCode = NativeImp.DS_SetModelBeamWidth(_modelStatePP, aBeamWidth); + var resultCode = NativeImp.STT_SetModelBeamWidth(_modelStatePP, aBeamWidth); EvaluateResultCode(resultCode); } /// /// Add a hot-word. - /// + /// /// Words that don't occur in the scorer (e.g. proper nouns) or strings that contain spaces won't be taken into account. /// /// Some word @@ -84,7 +84,7 @@ namespace DeepSpeechClient /// Thrown on failure. public unsafe void AddHotWord(string aWord, float aBoost) { - var resultCode = NativeImp.DS_AddHotWord(_modelStatePP, aWord, aBoost); + var resultCode = NativeImp.STT_AddHotWord(_modelStatePP, aWord, aBoost); EvaluateResultCode(resultCode); } @@ -95,7 +95,7 @@ namespace DeepSpeechClient /// Thrown on failure. public unsafe void EraseHotWord(string aWord) { - var resultCode = NativeImp.DS_EraseHotWord(_modelStatePP, aWord); + var resultCode = NativeImp.STT_EraseHotWord(_modelStatePP, aWord); EvaluateResultCode(resultCode); } @@ -105,7 +105,7 @@ namespace DeepSpeechClient /// Thrown on failure. public unsafe void ClearHotWords() { - var resultCode = NativeImp.DS_ClearHotWords(_modelStatePP); + var resultCode = NativeImp.STT_ClearHotWords(_modelStatePP); EvaluateResultCode(resultCode); } @@ -115,7 +115,7 @@ namespace DeepSpeechClient /// Sample rate. public unsafe int GetModelSampleRate() { - return NativeImp.DS_GetModelSampleRate(_modelStatePP); + return NativeImp.STT_GetModelSampleRate(_modelStatePP); } /// @@ -124,9 +124,9 @@ namespace DeepSpeechClient /// Native result code. private void EvaluateResultCode(ErrorCodes resultCode) { - if (resultCode != ErrorCodes.DS_ERR_OK) + if (resultCode != ErrorCodes.STT_ERR_OK) { - throw new ArgumentException(NativeImp.DS_ErrorCodeToErrorMessage((int)resultCode).PtrToString()); + throw new ArgumentException(NativeImp.STT_ErrorCodeToErrorMessage((int)resultCode).PtrToString()); } } @@ -135,7 +135,7 @@ namespace DeepSpeechClient /// public unsafe void Dispose() { - NativeImp.DS_FreeModel(_modelStatePP); + NativeImp.STT_FreeModel(_modelStatePP); } /// @@ -155,7 +155,7 @@ namespace DeepSpeechClient throw new FileNotFoundException($"Cannot find the scorer file: {aScorerPath}"); } - var resultCode = NativeImp.DS_EnableExternalScorer(_modelStatePP, aScorerPath); + var resultCode = NativeImp.STT_EnableExternalScorer(_modelStatePP, aScorerPath); EvaluateResultCode(resultCode); } @@ -165,7 +165,7 @@ namespace DeepSpeechClient /// Thrown when an external scorer is not enabled. public unsafe void DisableExternalScorer() { - var resultCode = NativeImp.DS_DisableExternalScorer(_modelStatePP); + var resultCode = NativeImp.STT_DisableExternalScorer(_modelStatePP); EvaluateResultCode(resultCode); } @@ -177,7 +177,7 @@ namespace DeepSpeechClient /// Thrown when an external scorer is not enabled. public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta) { - var resultCode = NativeImp.DS_SetScorerAlphaBeta(_modelStatePP, + var resultCode = NativeImp.STT_SetScorerAlphaBeta(_modelStatePP, aAlpha, aBeta); EvaluateResultCode(resultCode); @@ -188,9 +188,9 @@ namespace DeepSpeechClient /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) + public unsafe void FeedAudioContent(Stream stream, short[] aBuffer, uint aBufferSize) { - NativeImp.DS_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); + NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); } /// @@ -198,9 +198,9 @@ namespace DeepSpeechClient /// /// Instance of the stream to finish. /// The STT result. - public unsafe string FinishStream(DeepSpeechStream stream) + public unsafe string FinishStream(Stream stream) { - return NativeImp.DS_FinishStream(stream.GetNativePointer()).PtrToString(); + return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString(); } /// @@ -209,9 +209,9 @@ namespace DeepSpeechClient /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata FinishStreamWithMetadata(Stream stream, uint aNumResults) { - return NativeImp.DS_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); + return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } /// @@ -219,9 +219,9 @@ namespace DeepSpeechClient /// /// Instance of the stream to decode. /// The STT intermediate result. - public unsafe string IntermediateDecode(DeepSpeechStream stream) + public unsafe string IntermediateDecode(Stream stream) { - return NativeImp.DS_IntermediateDecode(stream.GetNativePointer()).PtrToString(); + return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString(); } /// @@ -230,9 +230,9 @@ namespace DeepSpeechClient /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The STT intermediate result. - public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata IntermediateDecodeWithMetadata(Stream stream, uint aNumResults) { - return NativeImp.DS_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); + return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } /// @@ -241,18 +241,18 @@ namespace DeepSpeechClient /// public unsafe string Version() { - return NativeImp.DS_Version().PtrToString(); + return NativeImp.STT_Version().PtrToString(); } /// /// Creates a new streaming inference state. /// - public unsafe DeepSpeechStream CreateStream() + public unsafe Stream CreateStream() { IntPtr** streamingStatePointer = null; - var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref streamingStatePointer); + var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer); EvaluateResultCode(resultCode); - return new DeepSpeechStream(streamingStatePointer); + return new Stream(streamingStatePointer); } /// @@ -260,25 +260,25 @@ namespace DeepSpeechClient /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - public unsafe void FreeStream(DeepSpeechStream stream) + public unsafe void FreeStream(Stream stream) { - NativeImp.DS_FreeStream(stream.GetNativePointer()); + NativeImp.STT_FreeStream(stream.GetNativePointer()); stream.Dispose(); } /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. /// The STT result. Returns NULL on error. public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) { - return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString(); + return NativeImp.STT_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString(); } /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -286,7 +286,7 @@ namespace DeepSpeechClient /// The extended metadata. Returns NULL on error. public unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aNumResults) { - return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata(); + return NativeImp.STT_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata(); } #endregion diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj b/native_client/dotnet/STTClient/STTClient.csproj similarity index 100% rename from native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj rename to native_client/dotnet/STTClient/STTClient.csproj diff --git a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs b/native_client/dotnet/STTClient/Structs/CandidateTranscript.cs similarity index 94% rename from native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs rename to native_client/dotnet/STTClient/Structs/CandidateTranscript.cs index 54581f6f..4743810b 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs +++ b/native_client/dotnet/STTClient/Structs/CandidateTranscript.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace STTClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct CandidateTranscript diff --git a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs b/native_client/dotnet/STTClient/Structs/Metadata.cs similarity index 92% rename from native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs rename to native_client/dotnet/STTClient/Structs/Metadata.cs index 0a9beddc..f2db6bcd 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs +++ b/native_client/dotnet/STTClient/Structs/Metadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace STTClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct Metadata diff --git a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs b/native_client/dotnet/STTClient/Structs/TokenMetadata.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs rename to native_client/dotnet/STTClient/Structs/TokenMetadata.cs index 1c660c71..a21c1d26 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs +++ b/native_client/dotnet/STTClient/Structs/TokenMetadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace STTClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct TokenMetadata diff --git a/native_client/dotnet/DeepSpeechConsole/App.config b/native_client/dotnet/STTConsole/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/App.config rename to native_client/dotnet/STTConsole/App.config diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/STTConsole/Program.cs similarity index 96% rename from native_client/dotnet/DeepSpeechConsole/Program.cs rename to native_client/dotnet/STTConsole/Program.cs index 55bd8fd5..e09d0c1f 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/STTConsole/Program.cs @@ -1,6 +1,6 @@ -using DeepSpeechClient; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using STTClient; +using STTClient.Interfaces; +using STTClient.Models; using NAudio.Wave; using System; using System.Collections.Generic; @@ -54,7 +54,7 @@ namespace CSharpExamples Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start - using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm")) + using (ISTT sttClient = new STT(model ?? "output_graph.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); diff --git a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs b/native_client/dotnet/STTConsole/Properties/AssemblyInfo.cs similarity index 85% rename from native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs rename to native_client/dotnet/STTConsole/Properties/AssemblyInfo.cs index 845851a1..f7600c7c 100644 --- a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/STTConsole/Properties/AssemblyInfo.cs @@ -5,12 +5,12 @@ using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeechConsole")] +[assembly: AssemblyTitle("STTConsole")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("CSharpExamples")] -[assembly: AssemblyCopyright("Copyright © 2018")] +[assembly: AssemblyCompany("Coqui GmbH")] +[assembly: AssemblyProduct("STTConsole")] +[assembly: AssemblyCopyright("Copyright © 2018-2020 Mozilla, © 2021 Coqui GmbH")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj b/native_client/dotnet/STTConsole/STTConsole.csproj similarity index 93% rename from native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj rename to native_client/dotnet/STTConsole/STTConsole.csproj index a05fca61..54e11eb0 100644 --- a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj +++ b/native_client/dotnet/STTConsole/STTConsole.csproj @@ -6,8 +6,8 @@ AnyCPU {312965E5-C4F6-4D95-BA64-79906B8BC7AC} Exe - DeepSpeechConsole - DeepSpeechConsole + STTConsole + STTConsole v4.6.2 512 true @@ -56,9 +56,9 @@ - + {56DE4091-BBBE-47E4-852D-7268B33B971F} - DeepSpeechClient + STTClient diff --git a/native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav b/native_client/dotnet/STTConsole/arctic_a0024.wav similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav rename to native_client/dotnet/STTConsole/arctic_a0024.wav diff --git a/native_client/dotnet/DeepSpeechConsole/packages.config b/native_client/dotnet/STTConsole/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/packages.config rename to native_client/dotnet/STTConsole/packages.config diff --git a/native_client/dotnet/DeepSpeechWPF/.gitignore b/native_client/dotnet/STTWPF/.gitignore similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/.gitignore rename to native_client/dotnet/STTWPF/.gitignore diff --git a/native_client/dotnet/DeepSpeechWPF/App.config b/native_client/dotnet/STTWPF/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/App.config rename to native_client/dotnet/STTWPF/App.config diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml b/native_client/dotnet/STTWPF/App.xaml similarity index 74% rename from native_client/dotnet/DeepSpeechWPF/App.xaml rename to native_client/dotnet/STTWPF/App.xaml index 16ebb0d4..97292db8 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml +++ b/native_client/dotnet/STTWPF/App.xaml @@ -1,8 +1,8 @@  diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs b/native_client/dotnet/STTWPF/App.xaml.cs similarity index 61% rename from native_client/dotnet/DeepSpeechWPF/App.xaml.cs rename to native_client/dotnet/STTWPF/App.xaml.cs index d4b87d6e..80dd818a 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs +++ b/native_client/dotnet/STTWPF/App.xaml.cs @@ -1,10 +1,10 @@ using CommonServiceLocator; -using DeepSpeech.WPF.ViewModels; -using DeepSpeechClient.Interfaces; +using STT.WPF.ViewModels; +using STTClient.Interfaces; using GalaSoft.MvvmLight.Ioc; using System.Windows; -namespace DeepSpeechWPF +namespace STTWPF { /// /// Interaction logic for App.xaml @@ -18,11 +18,11 @@ namespace DeepSpeechWPF try { - //Register instance of DeepSpeech - DeepSpeechClient.DeepSpeech deepSpeechClient = - new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm"); + //Register instance of STT + STTClient.STT client = + new STTClient.STT("coqui-stt-0.8.0-models.pbmm"); - SimpleIoc.Default.Register(() => deepSpeechClient); + SimpleIoc.Default.Register(() => client); SimpleIoc.Default.Register(); } catch (System.Exception ex) @@ -35,8 +35,8 @@ namespace DeepSpeechWPF protected override void OnExit(ExitEventArgs e) { base.OnExit(e); - //Dispose instance of DeepSpeech - ServiceLocator.Current.GetInstance()?.Dispose(); + //Dispose instance of STT + ServiceLocator.Current.GetInstance()?.Dispose(); } } } diff --git a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml b/native_client/dotnet/STTWPF/MainWindow.xaml similarity index 98% rename from native_client/dotnet/DeepSpeechWPF/MainWindow.xaml rename to native_client/dotnet/STTWPF/MainWindow.xaml index 4fbe5e72..569f6ad2 100644 --- a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml +++ b/native_client/dotnet/STTWPF/MainWindow.xaml @@ -1,10 +1,10 @@  /// Interaction logic for MainWindow.xaml diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs b/native_client/dotnet/STTWPF/Properties/AssemblyInfo.cs similarity index 91% rename from native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs rename to native_client/dotnet/STTWPF/Properties/AssemblyInfo.cs index f9ae7d76..f2e32102 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/STTWPF/Properties/AssemblyInfo.cs @@ -7,12 +7,12 @@ using System.Windows; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeech.WPF")] +[assembly: AssemblyTitle("STT.WPF")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")] -[assembly: AssemblyCopyright("Copyright © 2018")] +[assembly: AssemblyCompany("Coqui GmbH")] +[assembly: AssemblyProduct("STT.WPF.SingleFiles")] +[assembly: AssemblyCopyright("Copyright © 2018-2020 Mozilla, © 2021 Coqui GmbH")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs b/native_client/dotnet/STTWPF/Properties/Resources.Designer.cs similarity index 94% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs rename to native_client/dotnet/STTWPF/Properties/Resources.Designer.cs index 2da2b4b2..2478decd 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs +++ b/native_client/dotnet/STTWPF/Properties/Resources.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace STT.WPF.Properties { using System; @@ -39,7 +39,7 @@ namespace DeepSpeech.WPF.Properties { internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("STT.WPF.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx b/native_client/dotnet/STTWPF/Properties/Resources.resx similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx rename to native_client/dotnet/STTWPF/Properties/Resources.resx diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs b/native_client/dotnet/STTWPF/Properties/Settings.Designer.cs similarity index 96% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs rename to native_client/dotnet/STTWPF/Properties/Settings.Designer.cs index 0f464bc4..de63d157 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs +++ b/native_client/dotnet/STTWPF/Properties/Settings.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace STT.WPF.Properties { [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings b/native_client/dotnet/STTWPF/Properties/Settings.settings similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings rename to native_client/dotnet/STTWPF/Properties/Settings.settings diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj b/native_client/dotnet/STTWPF/STT.WPF.csproj similarity index 95% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj rename to native_client/dotnet/STTWPF/STT.WPF.csproj index 7f46a31e..160adafe 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj +++ b/native_client/dotnet/STTWPF/STT.WPF.csproj @@ -6,8 +6,8 @@ AnyCPU {54BFD766-4305-4F4C-BA59-AF45505DF3C1} WinExe - DeepSpeech.WPF - DeepSpeech.WPF + STT.WPF + STT.WPF v4.6.2 512 {60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} @@ -131,9 +131,9 @@ - + {56de4091-bbbe-47e4-852d-7268b33b971f} - DeepSpeechClient + STTClient diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln b/native_client/dotnet/STTWPF/STT.WPF.sln similarity index 80% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln rename to native_client/dotnet/STTWPF/STT.WPF.sln index cd29025e..96c87ee5 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln +++ b/native_client/dotnet/STTWPF/STT.WPF.sln @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.28307.421 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STT.WPF", "STT.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STTClient", "..\STTClient\STTClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs b/native_client/dotnet/STTWPF/ViewModels/BindableBase.cs similarity index 98% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs rename to native_client/dotnet/STTWPF/ViewModels/BindableBase.cs index 909327ee..e5187cd6 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs +++ b/native_client/dotnet/STTWPF/ViewModels/BindableBase.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.ComponentModel; using System.Runtime.CompilerServices; -namespace DeepSpeech.WPF.ViewModels +namespace STT.WPF.ViewModels { /// /// Implementation of to simplify models. diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs b/native_client/dotnet/STTWPF/ViewModels/MainWindowViewModel.cs similarity index 97% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs rename to native_client/dotnet/STTWPF/ViewModels/MainWindowViewModel.cs index 230fd42a..0ed4822b 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs +++ b/native_client/dotnet/STTWPF/ViewModels/MainWindowViewModel.cs @@ -3,8 +3,8 @@ using CSCore; using CSCore.CoreAudioAPI; using CSCore.SoundIn; using CSCore.Streams; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using STTClient.Interfaces; +using STTClient.Models; using GalaSoft.MvvmLight.CommandWpf; using Microsoft.Win32; using System; @@ -15,7 +15,7 @@ using System.IO; using System.Threading; using System.Threading.Tasks; -namespace DeepSpeech.WPF.ViewModels +namespace STT.WPF.ViewModels { /// /// View model of the MainWindow View. @@ -27,7 +27,7 @@ namespace DeepSpeech.WPF.ViewModels private const string ScorerPath = "kenlm.scorer"; #endregion - private readonly IDeepSpeech _sttClient; + private readonly ISTT _sttClient; #region Commands /// @@ -62,7 +62,7 @@ namespace DeepSpeech.WPF.ViewModels /// /// Stream used to feed data into the acoustic model. /// - private DeepSpeechStream _sttStream; + private Stream _sttStream; /// /// Records the audio of the selected device. @@ -75,7 +75,7 @@ namespace DeepSpeech.WPF.ViewModels private SoundInSource _soundInSource; /// - /// Target wave source.(16KHz Mono 16bit for DeepSpeech) + /// Target wave source.(16KHz Mono 16bit for STT) /// private IWaveSource _convertedSource; @@ -200,7 +200,7 @@ namespace DeepSpeech.WPF.ViewModels #endregion #region Ctors - public MainWindowViewModel(IDeepSpeech sttClient) + public MainWindowViewModel(ISTT sttClient) { _sttClient = sttClient; @@ -290,7 +290,7 @@ namespace DeepSpeech.WPF.ViewModels //read data from the converedSource //important: don't use the e.Data here //the e.Data contains the raw data provided by the - //soundInSource which won't have the deepspeech required audio format + //soundInSource which won't have the STT required audio format byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2]; int read; diff --git a/native_client/dotnet/DeepSpeechWPF/packages.config b/native_client/dotnet/STTWPF/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/packages.config rename to native_client/dotnet/STTWPF/packages.config diff --git a/native_client/dotnet/nupkg/stt.nuspec.in b/native_client/dotnet/nupkg/STT.spec.in similarity index 100% rename from native_client/dotnet/nupkg/stt.nuspec.in rename to native_client/dotnet/nupkg/STT.spec.in diff --git a/native_client/dotnet/nupkg/build/DeepSpeech.targets b/native_client/dotnet/nupkg/build/STT.targets similarity index 100% rename from native_client/dotnet/nupkg/build/DeepSpeech.targets rename to native_client/dotnet/nupkg/build/STT.targets diff --git a/native_client/generate_scorer_package.cpp b/native_client/generate_scorer_package.cpp index 0cadb429..dbc4bcd9 100644 --- a/native_client/generate_scorer_package.cpp +++ b/native_client/generate_scorer_package.cpp @@ -66,9 +66,9 @@ create_package(absl::optional alphabet_path, scorer.set_utf8_mode(force_bytes_output_mode.value()); scorer.reset_params(default_alpha, default_beta); int err = scorer.load_lm(lm_path); - if (err != DS_ERR_SCORER_NO_TRIE) { + if (err != STT_ERR_SCORER_NO_TRIE) { cerr << "Error loading language model file: " - << (err == DS_ERR_SCORER_UNREADABLE ? "Can't open binary LM file." : DS_ErrorCodeToErrorMessage(err)) + << (err == STT_ERR_SCORER_UNREADABLE ? "Can't open binary LM file." : STT_ErrorCodeToErrorMessage(err)) << "\n"; return 1; } diff --git a/native_client/java/app/src/main/AndroidManifest.xml b/native_client/java/app/src/main/AndroidManifest.xml index e9a371d1..dcf69307 100644 --- a/native_client/java/app/src/main/AndroidManifest.xml +++ b/native_client/java/app/src/main/AndroidManifest.xml @@ -9,7 +9,7 @@ android:roundIcon="@mipmap/ic_launcher_round" android:supportsRtl="true" android:theme="@style/AppTheme"> - + diff --git a/native_client/java/app/src/main/java/ai/coqui/sttexampleapp/DeepSpeechActivity.java b/native_client/java/app/src/main/java/ai/coqui/sttexampleapp/STTActivity.java similarity index 95% rename from native_client/java/app/src/main/java/ai/coqui/sttexampleapp/DeepSpeechActivity.java rename to native_client/java/app/src/main/java/ai/coqui/sttexampleapp/STTActivity.java index 7f0836d9..32395fdf 100644 --- a/native_client/java/app/src/main/java/ai/coqui/sttexampleapp/DeepSpeechActivity.java +++ b/native_client/java/app/src/main/java/ai/coqui/sttexampleapp/STTActivity.java @@ -16,11 +16,11 @@ import java.io.IOException; import java.nio.ByteOrder; import java.nio.ByteBuffer; -import ai.coqui.libstt.DeepSpeechModel; +import ai.coqui.libstt.STTModel; -public class DeepSpeechActivity extends AppCompatActivity { +public class STTActivity extends AppCompatActivity { - DeepSpeechModel _m = null; + STTModel _m = null; EditText _tfliteModel; EditText _audioFile; @@ -50,7 +50,7 @@ public class DeepSpeechActivity extends AppCompatActivity { this._tfliteStatus.setText("Creating model"); if (this._m == null) { // sphinx-doc: java_ref_model_start - this._m = new DeepSpeechModel(tfliteModel); + this._m = new STTModel(tfliteModel); this._m.setBeamWidth(BEAM_WIDTH); // sphinx-doc: java_ref_model_stop } @@ -124,7 +124,7 @@ public class DeepSpeechActivity extends AppCompatActivity { @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); - setContentView(R.layout.activity_deep_speech); + setContentView(R.layout.activity_stt); this._decodedString = (TextView) findViewById(R.id.decodedString); this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus); diff --git a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml b/native_client/java/app/src/main/res/layout/activity_stt.xml similarity index 99% rename from native_client/java/app/src/main/res/layout/activity_deep_speech.xml rename to native_client/java/app/src/main/res/layout/activity_stt.xml index 02c383d4..849b9e8d 100644 --- a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml +++ b/native_client/java/app/src/main/res/layout/activity_stt.xml @@ -4,7 +4,7 @@ xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent" android:layout_height="match_parent" - tools:context=".DeepSpeechActivity"> + tools:context=".STTActivity">