More rebranding, API names, iOS, .NET
This commit is contained in:
parent
136ca35ca2
commit
6d4d1a7153
|
@ -13,7 +13,7 @@ fi;
|
|||
if [ -d "${COMPUTE_KEEP_DIR}" ]; then
|
||||
checkpoint_dir=$COMPUTE_KEEP_DIR
|
||||
else
|
||||
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("deepspeech/ldc93s1"))')
|
||||
checkpoint_dir=$(python -c 'from xdg import BaseDirectory as xdg; print(xdg.save_data_path("stt/ldc93s1"))')
|
||||
fi
|
||||
|
||||
# Force only one visible device because we have a single-sample dataset
|
||||
|
|
|
@ -60,7 +60,7 @@ Compile Coqui STT
|
|||
-----------------
|
||||
|
||||
Compile ``libstt.so``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Within your TensorFlow directory, there should be a symbolic link to the 🐸STT ``native_client`` directory. If it is not present, create it with the follow command:
|
||||
|
||||
|
@ -238,8 +238,8 @@ Due to the discontinuation of Bintray JCenter we do not have pre-built Android p
|
|||
|
||||
implementation 'stt.coqui.ai:libstt:VERSION@aar'
|
||||
|
||||
Building ``libstt.so``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Building ``libstt.so`` for Android
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can build the ``libstt.so`` using (ARMv7):
|
||||
|
||||
|
@ -254,7 +254,7 @@ Or (ARM64):
|
|||
bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=android --config=android_arm64 --define=runtime=tflite --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++14 --copt=-D_GLIBCXX_USE_C99 //native_client:libstt.so
|
||||
|
||||
Building ``libstt.aar``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the unlikely event you have to rebuild the JNI bindings, source code is
|
||||
available under the ``libstt`` subdirectory. Building depends on shared
|
||||
|
@ -270,7 +270,7 @@ and adapt file naming (when missing, the error message should states what
|
|||
filename it expects and where).
|
||||
|
||||
Building C++ ``stt`` binary
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Building the ``stt`` binary will happen through ``ndk-build`` (ARMv7):
|
||||
|
||||
|
@ -306,7 +306,7 @@ mono 16kHz 16-bits file and it might fail on some WAVE file that are not
|
|||
following exactly the specification.
|
||||
|
||||
Running ``stt`` via adb
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You should use ``adb push`` to send data to device, please refer to Android
|
||||
documentation on how to use that.
|
||||
|
@ -349,7 +349,7 @@ to leverage GPU / DSP / NPU * Hexagon, the Qualcomm-specific DSP
|
|||
|
||||
This is highly experimental:
|
||||
|
||||
* Requires passing environment variable ``DS_TFLITE_DELEGATE`` with values of
|
||||
* Requires passing environment variable ``STT_TFLITE_DELEGATE`` with values of
|
||||
``gpu``, ``nnapi`` or ``hexagon`` (only one at a time)
|
||||
* Might require exported model changes (some Op might not be supported)
|
||||
* We can't guarantee it will work, nor it will be faster than default
|
||||
|
|
|
@ -10,65 +10,65 @@ C API
|
|||
|
||||
See also the list of error codes including descriptions for each error in :ref:`error-codes`.
|
||||
|
||||
.. doxygenfunction:: DS_CreateModel
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_CreateModel
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeModel
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FreeModel
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_EnableExternalScorer
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_EnableExternalScorer
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_DisableExternalScorer
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_DisableExternalScorer
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_AddHotWord
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_AddHotWord
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_EraseHotWord
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_EraseHotWord
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_ClearHotWords
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_ClearHotWords
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_SetScorerAlphaBeta
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_SetScorerAlphaBeta
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_GetModelSampleRate
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_GetModelSampleRate
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_SpeechToText
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_SpeechToText
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_SpeechToTextWithMetadata
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_SpeechToTextWithMetadata
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_CreateStream
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_CreateStream
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FeedAudioContent
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FeedAudioContent
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_IntermediateDecode
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_IntermediateDecode
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_IntermediateDecodeWithMetadata
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_IntermediateDecodeWithMetadata
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FinishStream
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FinishStream
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FinishStreamWithMetadata
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FinishStreamWithMetadata
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeStream
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FreeStream
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeMetadata
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FreeMetadata
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_FreeString
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_FreeString
|
||||
:project: stt-c
|
||||
|
||||
.. doxygenfunction:: DS_Version
|
||||
:project: deepspeech-c
|
||||
.. doxygenfunction:: STT_Version
|
||||
:project: stt-c
|
||||
|
|
|
@ -2,18 +2,18 @@
|
|||
==============
|
||||
|
||||
|
||||
DeepSpeech Class
|
||||
STT Class
|
||||
----------------
|
||||
|
||||
.. doxygenclass:: DeepSpeechClient::DeepSpeech
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenclass:: STTClient::STT
|
||||
:project: stt-dotnet
|
||||
:members:
|
||||
|
||||
DeepSpeechStream Class
|
||||
Stream Class
|
||||
----------------------
|
||||
|
||||
.. doxygenclass:: DeepSpeechClient::Models::DeepSpeechStream
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenclass:: STTClient::Models::Stream
|
||||
:project: stt-dotnet
|
||||
:members:
|
||||
|
||||
ErrorCodes
|
||||
|
@ -21,33 +21,33 @@ ErrorCodes
|
|||
|
||||
See also the main definition including descriptions for each error in :ref:`error-codes`.
|
||||
|
||||
.. doxygenenum:: DeepSpeechClient::Enums::ErrorCodes
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenenum:: STTClient::Enums::ErrorCodes
|
||||
:project: stt-dotnet
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. doxygenclass:: DeepSpeechClient::Models::Metadata
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenclass:: STTClient::Models::Metadata
|
||||
:project: stt-dotnet
|
||||
:members: Transcripts
|
||||
|
||||
CandidateTranscript
|
||||
-------------------
|
||||
|
||||
.. doxygenclass:: DeepSpeechClient::Models::CandidateTranscript
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenclass:: STTClient::Models::CandidateTranscript
|
||||
:project: stt-dotnet
|
||||
:members: Tokens, Confidence
|
||||
|
||||
TokenMetadata
|
||||
-------------
|
||||
|
||||
.. doxygenclass:: DeepSpeechClient::Models::TokenMetadata
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygenclass:: STTClient::Models::TokenMetadata
|
||||
:project: stt-dotnet
|
||||
:members: Text, Timestep, StartTime
|
||||
|
||||
DeepSpeech Interface
|
||||
STT Interface
|
||||
--------------------
|
||||
|
||||
.. doxygeninterface:: DeepSpeechClient::Interfaces::IDeepSpeech
|
||||
:project: deepspeech-dotnet
|
||||
.. doxygeninterface:: STTClient::Interfaces::ISTT
|
||||
:project: stt-dotnet
|
||||
:members:
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
.NET API Usage example
|
||||
======================
|
||||
|
||||
Examples are from `native_client/dotnet/DeepSpeechConsole/Program.cs`.
|
||||
Examples are from `native_client/dotnet/STTConsole/Program.cs`.
|
||||
|
||||
Creating a model instance and loading model
|
||||
-------------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs
|
||||
.. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs
|
||||
:language: csharp
|
||||
:linenos:
|
||||
:lineno-match:
|
||||
|
@ -16,7 +16,7 @@ Creating a model instance and loading model
|
|||
Performing inference
|
||||
--------------------
|
||||
|
||||
.. literalinclude:: ../native_client/dotnet/DeepSpeechConsole/Program.cs
|
||||
.. literalinclude:: ../native_client/dotnet/STTConsole/Program.cs
|
||||
:language: csharp
|
||||
:linenos:
|
||||
:lineno-match:
|
||||
|
@ -26,4 +26,4 @@ Performing inference
|
|||
Full source code
|
||||
----------------
|
||||
|
||||
See :download:`Full source code<../native_client/dotnet/DeepSpeechConsole/Program.cs>`.
|
||||
See :download:`Full source code<../native_client/dotnet/STTConsole/Program.cs>`.
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
Java
|
||||
====
|
||||
|
||||
DeepSpeechModel
|
||||
STTModel
|
||||
---------------
|
||||
|
||||
.. doxygenclass:: org::deepspeech::libdeepspeech::DeepSpeechModel
|
||||
:project: deepspeech-java
|
||||
.. doxygenclass:: ai::coqui::libstt::STTModel
|
||||
:project: stt-java
|
||||
:members:
|
||||
|
||||
Metadata
|
||||
--------
|
||||
|
||||
.. doxygenclass:: org::deepspeech::libdeepspeech::Metadata
|
||||
:project: deepspeech-java
|
||||
.. doxygenclass:: ai::coqui::libstt::Metadata
|
||||
:project: stt-java
|
||||
:members: getNumTranscripts, getTranscript
|
||||
|
||||
CandidateTranscript
|
||||
-------------------
|
||||
|
||||
.. doxygenclass:: org::deepspeech::libdeepspeech::CandidateTranscript
|
||||
:project: deepspeech-java
|
||||
.. doxygenclass:: ai::coqui::libstt::CandidateTranscript
|
||||
:project: stt-java
|
||||
:members: getNumTokens, getConfidence, getToken
|
||||
|
||||
TokenMetadata
|
||||
-------------
|
||||
.. doxygenclass:: org::deepspeech::libdeepspeech::TokenMetadata
|
||||
:project: deepspeech-java
|
||||
.. doxygenclass:: ai::coqui::libstt::TokenMetadata
|
||||
:project: stt-java
|
||||
:members: getText, getTimestep, getStartTime
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
Java API Usage example
|
||||
======================
|
||||
|
||||
Examples are from `native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java`.
|
||||
Examples are from `native_client/java/app/src/main/java/ai/coqui/STTActivity.java`.
|
||||
|
||||
Creating a model instance and loading model
|
||||
-------------------------------------------
|
||||
|
||||
.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java
|
||||
.. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/STTActivity.java
|
||||
:language: java
|
||||
:linenos:
|
||||
:lineno-match:
|
||||
|
@ -16,7 +16,7 @@ Creating a model instance and loading model
|
|||
Performing inference
|
||||
--------------------
|
||||
|
||||
.. literalinclude:: ../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java
|
||||
.. literalinclude:: ../native_client/java/app/src/main/java/ai/coqui/STTActivity.java
|
||||
:language: java
|
||||
:linenos:
|
||||
:lineno-match:
|
||||
|
@ -26,4 +26,4 @@ Performing inference
|
|||
Full source code
|
||||
----------------
|
||||
|
||||
See :download:`Full source code<../native_client/java/app/src/main/java/org/deepspeech/DeepSpeechActivity.java>`.
|
||||
See :download:`Full source code<../native_client/java/app/src/main/java/ai/coqui/STTActivity.java>`.
|
||||
|
|
|
@ -9,61 +9,61 @@ Linux / AMD64 without GPU
|
|||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference)
|
||||
* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8)
|
||||
* Full TensorFlow runtime (``deepspeech`` packages)
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Linux / AMD64 with GPU
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference)
|
||||
* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8)
|
||||
* CUDA 10.0 (and capable GPU)
|
||||
* Full TensorFlow runtime (``deepspeech`` packages)
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Linux / ARMv7
|
||||
^^^^^^^^^^^^^
|
||||
* Cortex-A53 compatible ARMv7 SoC with Neon support
|
||||
* Raspbian Buster-compatible distribution
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Linux / Aarch64
|
||||
^^^^^^^^^^^^^^^
|
||||
* Cortex-A72 compatible Aarch64 SoC
|
||||
* ARMbian Buster-compatible distribution
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Android / ARMv7
|
||||
^^^^^^^^^^^^^^^
|
||||
* ARMv7 SoC with Neon support
|
||||
* Android 7.0-10.0
|
||||
* NDK API level >= 21
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Android / Aarch64
|
||||
^^^^^^^^^^^^^^^^^
|
||||
* Aarch64 SoC
|
||||
* Android 7.0-10.0
|
||||
* NDK API level >= 21
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
macOS / AMD64
|
||||
^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference)
|
||||
* macOS >= 10.10
|
||||
* Full TensorFlow runtime (``deepspeech`` packages)
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Windows / AMD64 without GPU
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference)
|
||||
* Windows Server >= 2012 R2 ; Windows >= 8.1
|
||||
* Full TensorFlow runtime (``deepspeech`` packages)
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
||||
Windows / AMD64 with GPU
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference)
|
||||
* Windows Server >= 2012 R2 ; Windows >= 8.1
|
||||
* CUDA 10.0 (and capable GPU)
|
||||
* Full TensorFlow runtime (``deepspeech`` packages)
|
||||
* TensorFlow Lite runtime (``deepspeech-tflite`` packages)
|
||||
* Full TensorFlow runtime (``stt`` packages)
|
||||
* TensorFlow Lite runtime (``stt-tflite`` packages)
|
||||
|
|
|
@ -5,19 +5,19 @@ Metadata
|
|||
--------
|
||||
|
||||
.. doxygenstruct:: Metadata
|
||||
:project: deepspeech-c
|
||||
:project: stt-c
|
||||
:members:
|
||||
|
||||
CandidateTranscript
|
||||
-------------------
|
||||
|
||||
.. doxygenstruct:: CandidateTranscript
|
||||
:project: deepspeech-c
|
||||
:project: stt-c
|
||||
:members:
|
||||
|
||||
TokenMetadata
|
||||
-------------
|
||||
|
||||
.. doxygenstruct:: TokenMetadata
|
||||
:project: deepspeech-c
|
||||
:project: stt-c
|
||||
:members:
|
||||
|
|
|
@ -81,9 +81,9 @@ extensions = [
|
|||
|
||||
|
||||
breathe_projects = {
|
||||
"deepspeech-c": "xml-c/",
|
||||
"deepspeech-java": "xml-java/",
|
||||
"deepspeech-dotnet": "xml-dotnet/",
|
||||
"stt-c": "xml-c/",
|
||||
"stt-java": "xml-java/",
|
||||
"stt-dotnet": "xml-dotnet/",
|
||||
}
|
||||
|
||||
js_source_path = "../native_client/javascript/index.ts"
|
||||
|
|
|
@ -5,6 +5,6 @@
|
|||
fun:_Znwm
|
||||
fun:_ZN6tflite20DefaultErrorReporterEv
|
||||
fun:_ZN16TFLiteModelState4initEPKc
|
||||
fun:DS_CreateModel
|
||||
fun:STT_CreateModel
|
||||
fun:main
|
||||
}
|
||||
|
|
|
@ -815,7 +815,7 @@
|
|||
fun:_ZN6Scorer9load_trieERSt14basic_ifstreamIcSt11char_traitsIcEERKNSt7__cxx1112basic_stringIcS2_SaIcEEE
|
||||
fun:_ZN6Scorer7load_lmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
|
||||
fun:_ZN6Scorer4initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERK8Alphabet
|
||||
fun:DS_EnableExternalScorer
|
||||
fun:STT_EnableExternalScorer
|
||||
fun:main
|
||||
}
|
||||
{
|
||||
|
@ -831,7 +831,7 @@
|
|||
fun:_ZN6Scorer9load_trieERSt14basic_ifstreamIcSt11char_traitsIcEERKNSt7__cxx1112basic_stringIcS2_SaIcEEE
|
||||
fun:_ZN6Scorer7load_lmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
|
||||
fun:_ZN6Scorer4initERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERK8Alphabet
|
||||
fun:DS_EnableExternalScorer
|
||||
fun:STT_EnableExternalScorer
|
||||
fun:main
|
||||
}
|
||||
{
|
||||
|
|
|
@ -64,9 +64,9 @@ void PrintHelp(const char* bin)
|
|||
"\t--hot_words\t\t\tHot-words and their boosts. Word:Boost pairs are comma-separated\n"
|
||||
"\t--help\t\t\t\tShow help\n"
|
||||
"\t--version\t\t\tPrint version and exits\n";
|
||||
char* version = DS_Version();
|
||||
char* version = STT_Version();
|
||||
std::cerr << "Coqui STT " << version << "\n";
|
||||
DS_FreeString(version);
|
||||
STT_FreeString(version);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -169,9 +169,9 @@ bool ProcessArgs(int argc, char** argv)
|
|||
}
|
||||
|
||||
if (has_versions) {
|
||||
char* version = DS_Version();
|
||||
char* version = STT_Version();
|
||||
std::cout << "Coqui " << version << "\n";
|
||||
DS_FreeString(version);
|
||||
STT_FreeString(version);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -168,17 +168,17 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||
|
||||
// sphinx-doc: c_ref_inference_start
|
||||
if (extended_output) {
|
||||
Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1);
|
||||
Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1);
|
||||
res.string = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
DS_FreeMetadata(result);
|
||||
STT_FreeMetadata(result);
|
||||
} else if (json_output) {
|
||||
Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts);
|
||||
Metadata *result = STT_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, json_candidate_transcripts);
|
||||
res.string = MetadataToJSON(result);
|
||||
DS_FreeMetadata(result);
|
||||
STT_FreeMetadata(result);
|
||||
} else if (stream_size > 0) {
|
||||
StreamingState* ctx;
|
||||
int status = DS_CreateStream(aCtx, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
int status = STT_CreateStream(aCtx, &ctx);
|
||||
if (status != STT_ERR_OK) {
|
||||
res.string = strdup("");
|
||||
return res;
|
||||
}
|
||||
|
@ -187,28 +187,28 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||
const char *prev = nullptr;
|
||||
while (off < aBufferSize) {
|
||||
size_t cur = aBufferSize - off > stream_size ? stream_size : aBufferSize - off;
|
||||
DS_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||
STT_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||
off += cur;
|
||||
prev = last;
|
||||
const char* partial = DS_IntermediateDecode(ctx);
|
||||
const char* partial = STT_IntermediateDecode(ctx);
|
||||
if (last == nullptr || strcmp(last, partial)) {
|
||||
printf("%s\n", partial);
|
||||
last = partial;
|
||||
} else {
|
||||
DS_FreeString((char *) partial);
|
||||
STT_FreeString((char *) partial);
|
||||
}
|
||||
if (prev != nullptr && prev != last) {
|
||||
DS_FreeString((char *) prev);
|
||||
STT_FreeString((char *) prev);
|
||||
}
|
||||
}
|
||||
if (last != nullptr) {
|
||||
DS_FreeString((char *) last);
|
||||
STT_FreeString((char *) last);
|
||||
}
|
||||
res.string = DS_FinishStream(ctx);
|
||||
res.string = STT_FinishStream(ctx);
|
||||
} else if (extended_stream_size > 0) {
|
||||
StreamingState* ctx;
|
||||
int status = DS_CreateStream(aCtx, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
int status = STT_CreateStream(aCtx, &ctx);
|
||||
if (status != STT_ERR_OK) {
|
||||
res.string = strdup("");
|
||||
return res;
|
||||
}
|
||||
|
@ -217,10 +217,10 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||
const char *prev = nullptr;
|
||||
while (off < aBufferSize) {
|
||||
size_t cur = aBufferSize - off > extended_stream_size ? extended_stream_size : aBufferSize - off;
|
||||
DS_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||
STT_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||
off += cur;
|
||||
prev = last;
|
||||
const Metadata* result = DS_IntermediateDecodeWithMetadata(ctx, 1);
|
||||
const Metadata* result = STT_IntermediateDecodeWithMetadata(ctx, 1);
|
||||
const char* partial = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
if (last == nullptr || strcmp(last, partial)) {
|
||||
printf("%s\n", partial);
|
||||
|
@ -231,14 +231,14 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||
if (prev != nullptr && prev != last) {
|
||||
free((char *) prev);
|
||||
}
|
||||
DS_FreeMetadata((Metadata *)result);
|
||||
STT_FreeMetadata((Metadata *)result);
|
||||
}
|
||||
const Metadata* result = DS_FinishStreamWithMetadata(ctx, 1);
|
||||
const Metadata* result = STT_FinishStreamWithMetadata(ctx, 1);
|
||||
res.string = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
DS_FreeMetadata((Metadata *)result);
|
||||
STT_FreeMetadata((Metadata *)result);
|
||||
free((char *) last);
|
||||
} else {
|
||||
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize);
|
||||
res.string = STT_SpeechToText(aCtx, aBuffer, aBufferSize);
|
||||
}
|
||||
// sphinx-doc: c_ref_inference_stop
|
||||
|
||||
|
@ -404,7 +404,7 @@ GetAudioBuffer(const char* path, int desired_sample_rate)
|
|||
void
|
||||
ProcessFile(ModelState* context, const char* path, bool show_times)
|
||||
{
|
||||
ds_audio_buffer audio = GetAudioBuffer(path, DS_GetModelSampleRate(context));
|
||||
ds_audio_buffer audio = GetAudioBuffer(path, STT_GetModelSampleRate(context));
|
||||
|
||||
// Pass audio to STT
|
||||
// We take half of buffer_size because buffer is a char* while
|
||||
|
@ -418,7 +418,7 @@ ProcessFile(ModelState* context, const char* path, bool show_times)
|
|||
|
||||
if (result.string) {
|
||||
printf("%s\n", result.string);
|
||||
DS_FreeString((char*)result.string);
|
||||
STT_FreeString((char*)result.string);
|
||||
}
|
||||
|
||||
if (show_times) {
|
||||
|
@ -453,16 +453,16 @@ main(int argc, char **argv)
|
|||
// Initialise STT
|
||||
ModelState* ctx;
|
||||
// sphinx-doc: c_ref_model_start
|
||||
int status = DS_CreateModel(model, &ctx);
|
||||
int status = STT_CreateModel(model, &ctx);
|
||||
if (status != 0) {
|
||||
char* error = DS_ErrorCodeToErrorMessage(status);
|
||||
char* error = STT_ErrorCodeToErrorMessage(status);
|
||||
fprintf(stderr, "Could not create model: %s\n", error);
|
||||
free(error);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (set_beamwidth) {
|
||||
status = DS_SetModelBeamWidth(ctx, beam_width);
|
||||
status = STT_SetModelBeamWidth(ctx, beam_width);
|
||||
if (status != 0) {
|
||||
fprintf(stderr, "Could not set model beam width.\n");
|
||||
return 1;
|
||||
|
@ -470,13 +470,13 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (scorer) {
|
||||
status = DS_EnableExternalScorer(ctx, scorer);
|
||||
status = STT_EnableExternalScorer(ctx, scorer);
|
||||
if (status != 0) {
|
||||
fprintf(stderr, "Could not enable external scorer.\n");
|
||||
return 1;
|
||||
}
|
||||
if (set_alphabeta) {
|
||||
status = DS_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta);
|
||||
status = STT_SetScorerAlphaBeta(ctx, lm_alpha, lm_beta);
|
||||
if (status != 0) {
|
||||
fprintf(stderr, "Error setting scorer alpha and beta.\n");
|
||||
return 1;
|
||||
|
@ -494,7 +494,7 @@ main(int argc, char **argv)
|
|||
// so, check the boost string before we turn it into a float
|
||||
bool boost_is_valid = (pair_[1].find_first_not_of("-.0123456789") == std::string::npos);
|
||||
float boost = strtof((pair_[1]).c_str(),0);
|
||||
status = DS_AddHotWord(ctx, word, boost);
|
||||
status = STT_AddHotWord(ctx, word, boost);
|
||||
if (status != 0 || !boost_is_valid) {
|
||||
fprintf(stderr, "Could not enable hot-word.\n");
|
||||
return 1;
|
||||
|
@ -555,7 +555,7 @@ main(int argc, char **argv)
|
|||
sox_quit();
|
||||
#endif // NO_SOX
|
||||
|
||||
DS_FreeModel(ctx);
|
||||
STT_FreeModel(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -61,37 +61,37 @@ typedef struct Metadata {
|
|||
|
||||
// sphinx-doc: error_code_listing_start
|
||||
|
||||
#define DS_FOR_EACH_ERROR(APPLY) \
|
||||
APPLY(DS_ERR_OK, 0x0000, "No error.") \
|
||||
APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \
|
||||
APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \
|
||||
APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \
|
||||
APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \
|
||||
APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \
|
||||
APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \
|
||||
APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \
|
||||
APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \
|
||||
APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \
|
||||
APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \
|
||||
APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \
|
||||
APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \
|
||||
APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \
|
||||
APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \
|
||||
APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \
|
||||
APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \
|
||||
APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \
|
||||
APPLY(DS_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \
|
||||
APPLY(DS_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \
|
||||
APPLY(DS_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.")
|
||||
#define STT_FOR_EACH_ERROR(APPLY) \
|
||||
APPLY(STT_ERR_OK, 0x0000, "No error.") \
|
||||
APPLY(STT_ERR_NO_MODEL, 0x1000, "Missing model information.") \
|
||||
APPLY(STT_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \
|
||||
APPLY(STT_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \
|
||||
APPLY(STT_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \
|
||||
APPLY(STT_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \
|
||||
APPLY(STT_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \
|
||||
APPLY(STT_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \
|
||||
APPLY(STT_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \
|
||||
APPLY(STT_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \
|
||||
APPLY(STT_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \
|
||||
APPLY(STT_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \
|
||||
APPLY(STT_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \
|
||||
APPLY(STT_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \
|
||||
APPLY(STT_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \
|
||||
APPLY(STT_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \
|
||||
APPLY(STT_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \
|
||||
APPLY(STT_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \
|
||||
APPLY(STT_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \
|
||||
APPLY(STT_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \
|
||||
APPLY(STT_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \
|
||||
APPLY(STT_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \
|
||||
APPLY(STT_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.")
|
||||
|
||||
// sphinx-doc: error_code_listing_end
|
||||
|
||||
enum DeepSpeech_Error_Codes
|
||||
enum STT_Error_Codes
|
||||
{
|
||||
#define DEFINE(NAME, VALUE, DESC) NAME = VALUE,
|
||||
DS_FOR_EACH_ERROR(DEFINE)
|
||||
STT_FOR_EACH_ERROR(DEFINE)
|
||||
#undef DEFINE
|
||||
};
|
||||
|
||||
|
@ -104,49 +104,49 @@ DS_FOR_EACH_ERROR(DEFINE)
|
|||
* @return Zero on success, non-zero on failure.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_CreateModel(const char* aModelPath,
|
||||
int STT_CreateModel(const char* aModelPath,
|
||||
ModelState** retval);
|
||||
|
||||
/**
|
||||
* @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth}
|
||||
* @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth}
|
||||
* was not called before, will return the default value loaded from the
|
||||
* model file.
|
||||
*
|
||||
* @param aCtx A ModelState pointer created with {@link DS_CreateModel}.
|
||||
* @param aCtx A ModelState pointer created with {@link STT_CreateModel}.
|
||||
*
|
||||
* @return Beam width value used by the model.
|
||||
*/
|
||||
STT_EXPORT
|
||||
unsigned int DS_GetModelBeamWidth(const ModelState* aCtx);
|
||||
unsigned int STT_GetModelBeamWidth(const ModelState* aCtx);
|
||||
|
||||
/**
|
||||
* @brief Set beam width value used by the model.
|
||||
*
|
||||
* @param aCtx A ModelState pointer created with {@link DS_CreateModel}.
|
||||
* @param aCtx A ModelState pointer created with {@link STT_CreateModel}.
|
||||
* @param aBeamWidth The beam width used by the model. A larger beam width value
|
||||
* generates better results at the cost of decoding time.
|
||||
*
|
||||
* @return Zero on success, non-zero on failure.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_SetModelBeamWidth(ModelState* aCtx,
|
||||
int STT_SetModelBeamWidth(ModelState* aCtx,
|
||||
unsigned int aBeamWidth);
|
||||
|
||||
/**
|
||||
* @brief Return the sample rate expected by a model.
|
||||
*
|
||||
* @param aCtx A ModelState pointer created with {@link DS_CreateModel}.
|
||||
* @param aCtx A ModelState pointer created with {@link STT_CreateModel}.
|
||||
*
|
||||
* @return Sample rate expected by the model for its input.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_GetModelSampleRate(const ModelState* aCtx);
|
||||
int STT_GetModelSampleRate(const ModelState* aCtx);
|
||||
|
||||
/**
|
||||
* @brief Frees associated resources and destroys model object.
|
||||
*/
|
||||
STT_EXPORT
|
||||
void DS_FreeModel(ModelState* ctx);
|
||||
void STT_FreeModel(ModelState* ctx);
|
||||
|
||||
/**
|
||||
* @brief Enable decoding using an external scorer.
|
||||
|
@ -157,7 +157,7 @@ void DS_FreeModel(ModelState* ctx);
|
|||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_EnableExternalScorer(ModelState* aCtx,
|
||||
int STT_EnableExternalScorer(ModelState* aCtx,
|
||||
const char* aScorerPath);
|
||||
|
||||
/**
|
||||
|
@ -172,7 +172,7 @@ int DS_EnableExternalScorer(ModelState* aCtx,
|
|||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_AddHotWord(ModelState* aCtx,
|
||||
int STT_AddHotWord(ModelState* aCtx,
|
||||
const char* word,
|
||||
float boost);
|
||||
|
||||
|
@ -185,7 +185,7 @@ int DS_AddHotWord(ModelState* aCtx,
|
|||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_EraseHotWord(ModelState* aCtx,
|
||||
int STT_EraseHotWord(ModelState* aCtx,
|
||||
const char* word);
|
||||
|
||||
/**
|
||||
|
@ -196,7 +196,7 @@ int DS_EraseHotWord(ModelState* aCtx,
|
|||
* @return Zero on success, non-zero on failure (invalid arguments).
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_ClearHotWords(ModelState* aCtx);
|
||||
int STT_ClearHotWords(ModelState* aCtx);
|
||||
|
||||
/**
|
||||
* @brief Disable decoding using an external scorer.
|
||||
|
@ -206,7 +206,7 @@ int DS_ClearHotWords(ModelState* aCtx);
|
|||
* @return Zero on success, non-zero on failure.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_DisableExternalScorer(ModelState* aCtx);
|
||||
int STT_DisableExternalScorer(ModelState* aCtx);
|
||||
|
||||
/**
|
||||
* @brief Set hyperparameters alpha and beta of the external scorer.
|
||||
|
@ -218,7 +218,7 @@ int DS_DisableExternalScorer(ModelState* aCtx);
|
|||
* @return Zero on success, non-zero on failure.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
int STT_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
float aAlpha,
|
||||
float aBeta);
|
||||
|
||||
|
@ -231,10 +231,10 @@ int DS_SetScorerAlphaBeta(ModelState* aCtx,
|
|||
* @param aBufferSize The number of samples in the audio signal.
|
||||
*
|
||||
* @return The STT result. The user is responsible for freeing the string using
|
||||
* {@link DS_FreeString()}. Returns NULL on error.
|
||||
* {@link STT_FreeString()}. Returns NULL on error.
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* DS_SpeechToText(ModelState* aCtx,
|
||||
char* STT_SpeechToText(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
|
||||
|
@ -250,19 +250,19 @@ char* DS_SpeechToText(ModelState* aCtx,
|
|||
*
|
||||
* @return Metadata struct containing multiple CandidateTranscript structs. Each
|
||||
* transcript has per-token metadata including timing information. The
|
||||
* user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
|
||||
* user is responsible for freeing Metadata by calling {@link STT_FreeMetadata()}.
|
||||
* Returns NULL on error.
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
Metadata* STT_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize,
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
* @brief Create a new streaming inference state. The streaming state returned
|
||||
* by this function can then be passed to {@link DS_FeedAudioContent()}
|
||||
* and {@link DS_FinishStream()}.
|
||||
* by this function can then be passed to {@link STT_FeedAudioContent()}
|
||||
* and {@link STT_FinishStream()}.
|
||||
*
|
||||
* @param aCtx The ModelState pointer for the model to use.
|
||||
* @param[out] retval an opaque pointer that represents the streaming state. Can
|
||||
|
@ -271,80 +271,80 @@ Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
|||
* @return Zero for success, non-zero on failure.
|
||||
*/
|
||||
STT_EXPORT
|
||||
int DS_CreateStream(ModelState* aCtx,
|
||||
int STT_CreateStream(ModelState* aCtx,
|
||||
StreamingState** retval);
|
||||
|
||||
/**
|
||||
* @brief Feed audio samples to an ongoing streaming inference.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
* @param aBuffer An array of 16-bit, mono raw audio samples at the
|
||||
* appropriate sample rate (matching what the model was trained on).
|
||||
* @param aBufferSize The number of samples in @p aBuffer.
|
||||
*/
|
||||
STT_EXPORT
|
||||
void DS_FeedAudioContent(StreamingState* aSctx,
|
||||
void STT_FeedAudioContent(StreamingState* aSctx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize);
|
||||
|
||||
/**
|
||||
* @brief Compute the intermediate decoding of an ongoing streaming inference.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
*
|
||||
* @return The STT intermediate result. The user is responsible for freeing the
|
||||
* string using {@link DS_FreeString()}.
|
||||
* string using {@link STT_FreeString()}.
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* DS_IntermediateDecode(const StreamingState* aSctx);
|
||||
char* STT_IntermediateDecode(const StreamingState* aSctx);
|
||||
|
||||
/**
|
||||
* @brief Compute the intermediate decoding of an ongoing streaming inference,
|
||||
* return results including metadata.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
* @param aNumResults The number of candidate transcripts to return.
|
||||
*
|
||||
* @return Metadata struct containing multiple candidate transcripts. Each transcript
|
||||
* has per-token metadata including timing information. The user is
|
||||
* responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
|
||||
* responsible for freeing Metadata by calling {@link STT_FreeMetadata()}.
|
||||
* Returns NULL on error.
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||
Metadata* STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
* @brief Compute the final decoding of an ongoing streaming inference and return
|
||||
* the result. Signals the end of an ongoing streaming inference.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
*
|
||||
* @return The STT result. The user is responsible for freeing the string using
|
||||
* {@link DS_FreeString()}.
|
||||
* {@link STT_FreeString()}.
|
||||
*
|
||||
* @note This method will free the state pointer (@p aSctx).
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* DS_FinishStream(StreamingState* aSctx);
|
||||
char* STT_FinishStream(StreamingState* aSctx);
|
||||
|
||||
/**
|
||||
* @brief Compute the final decoding of an ongoing streaming inference and return
|
||||
* results including metadata. Signals the end of an ongoing streaming
|
||||
* inference.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
* @param aNumResults The number of candidate transcripts to return.
|
||||
*
|
||||
* @return Metadata struct containing multiple candidate transcripts. Each transcript
|
||||
* has per-token metadata including timing information. The user is
|
||||
* responsible for freeing Metadata by calling {@link DS_FreeMetadata()}.
|
||||
* responsible for freeing Metadata by calling {@link STT_FreeMetadata()}.
|
||||
* Returns NULL on error.
|
||||
*
|
||||
* @note This method will free the state pointer (@p aSctx).
|
||||
*/
|
||||
STT_EXPORT
|
||||
Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx,
|
||||
Metadata* STT_FinishStreamWithMetadata(StreamingState* aSctx,
|
||||
unsigned int aNumResults);
|
||||
|
||||
/**
|
||||
|
@ -352,42 +352,42 @@ Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx,
|
|||
* can be used if you no longer need the result of an ongoing streaming
|
||||
* inference and don't want to perform a costly decode operation.
|
||||
*
|
||||
* @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}.
|
||||
* @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}.
|
||||
*
|
||||
* @note This method will free the state pointer (@p aSctx).
|
||||
*/
|
||||
STT_EXPORT
|
||||
void DS_FreeStream(StreamingState* aSctx);
|
||||
void STT_FreeStream(StreamingState* aSctx);
|
||||
|
||||
/**
|
||||
* @brief Free memory allocated for metadata information.
|
||||
*/
|
||||
STT_EXPORT
|
||||
void DS_FreeMetadata(Metadata* m);
|
||||
void STT_FreeMetadata(Metadata* m);
|
||||
|
||||
/**
|
||||
* @brief Free a char* string returned by the Coqui STT API.
|
||||
*/
|
||||
STT_EXPORT
|
||||
void DS_FreeString(char* str);
|
||||
void STT_FreeString(char* str);
|
||||
|
||||
/**
|
||||
* @brief Returns the version of this library. The returned version is a semantic
|
||||
* version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}.
|
||||
* version (SemVer 2.0.0). The string returned must be freed with {@link STT_FreeString()}.
|
||||
*
|
||||
* @return The version string.
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* DS_Version();
|
||||
char* STT_Version();
|
||||
|
||||
/**
|
||||
* @brief Returns a textual description corresponding to an error code.
|
||||
* The string returned must be freed with @{link DS_FreeString()}.
|
||||
* The string returned must be freed with @{link STT_FreeString()}.
|
||||
*
|
||||
* @return The error description.
|
||||
*/
|
||||
STT_EXPORT
|
||||
char* DS_ErrorCodeToErrorMessage(int aErrorCode);
|
||||
char* STT_ErrorCodeToErrorMessage(int aErrorCode);
|
||||
|
||||
#undef STT_EXPORT
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ __version__ = swigwrapper.__version__.decode('utf-8')
|
|||
# Hack: import error codes by matching on their names, as SWIG unfortunately
|
||||
# does not support binding enums to Python in a scoped manner yet.
|
||||
for symbol in dir(swigwrapper):
|
||||
if symbol.startswith('DS_ERR_'):
|
||||
if symbol.startswith('STT_ERR_'):
|
||||
globals()[symbol] = getattr(swigwrapper, symbol)
|
||||
|
||||
class Scorer(swigwrapper.Scorer):
|
||||
|
|
|
@ -74,13 +74,13 @@ int Scorer::load_lm(const std::string& lm_path)
|
|||
// Check if file is readable to avoid KenLM throwing an exception
|
||||
const char* filename = lm_path.c_str();
|
||||
if (access(filename, R_OK) != 0) {
|
||||
return DS_ERR_SCORER_UNREADABLE;
|
||||
return STT_ERR_SCORER_UNREADABLE;
|
||||
}
|
||||
|
||||
// Check if the file format is valid to avoid KenLM throwing an exception
|
||||
lm::ngram::ModelType model_type;
|
||||
if (!lm::ngram::RecognizeBinary(filename, model_type)) {
|
||||
return DS_ERR_SCORER_INVALID_LM;
|
||||
return STT_ERR_SCORER_INVALID_LM;
|
||||
}
|
||||
|
||||
// Load the LM
|
||||
|
@ -97,7 +97,7 @@ int Scorer::load_lm(const std::string& lm_path)
|
|||
uint64_t trie_offset = language_model_->GetEndOfSearchOffset();
|
||||
if (package_size <= trie_offset) {
|
||||
// File ends without a trie structure
|
||||
return DS_ERR_SCORER_NO_TRIE;
|
||||
return STT_ERR_SCORER_NO_TRIE;
|
||||
}
|
||||
|
||||
// Read metadata and trie from file
|
||||
|
@ -113,7 +113,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
|||
if (magic != MAGIC) {
|
||||
std::cerr << "Error: Can't parse scorer file, invalid header. Try updating "
|
||||
"your scorer file." << std::endl;
|
||||
return DS_ERR_SCORER_INVALID_TRIE;
|
||||
return STT_ERR_SCORER_INVALID_TRIE;
|
||||
}
|
||||
|
||||
int version;
|
||||
|
@ -128,7 +128,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
|||
std::cerr << "Downgrade your scorer file or update your version of Coqui STT.";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
return DS_ERR_SCORER_VERSION_MISMATCH;
|
||||
return STT_ERR_SCORER_VERSION_MISMATCH;
|
||||
}
|
||||
|
||||
fin.read(reinterpret_cast<char*>(&is_utf8_mode_), sizeof(is_utf8_mode_));
|
||||
|
@ -143,7 +143,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path)
|
|||
opt.mode = fst::FstReadOptions::MAP;
|
||||
opt.source = file_path;
|
||||
dictionary.reset(FstType::Read(fin, opt));
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
|
||||
bool Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite)
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
namespace DeepSpeechClient.Enums
|
||||
{
|
||||
/// <summary>
|
||||
/// Error codes from the native DeepSpeech binary.
|
||||
/// </summary>
|
||||
internal enum ErrorCodes
|
||||
{
|
||||
// OK
|
||||
DS_ERR_OK = 0x0000,
|
||||
|
||||
// Missing invormations
|
||||
DS_ERR_NO_MODEL = 0x1000,
|
||||
|
||||
// Invalid parameters
|
||||
DS_ERR_INVALID_ALPHABET = 0x2000,
|
||||
DS_ERR_INVALID_SHAPE = 0x2001,
|
||||
DS_ERR_INVALID_SCORER = 0x2002,
|
||||
DS_ERR_MODEL_INCOMPATIBLE = 0x2003,
|
||||
DS_ERR_SCORER_NOT_ENABLED = 0x2004,
|
||||
|
||||
// Runtime failures
|
||||
DS_ERR_FAIL_INIT_MMAP = 0x3000,
|
||||
DS_ERR_FAIL_INIT_SESS = 0x3001,
|
||||
DS_ERR_FAIL_INTERPRETER = 0x3002,
|
||||
DS_ERR_FAIL_RUN_SESS = 0x3003,
|
||||
DS_ERR_FAIL_CREATE_STREAM = 0x3004,
|
||||
DS_ERR_FAIL_READ_PROTOBUF = 0x3005,
|
||||
DS_ERR_FAIL_CREATE_SESS = 0x3006,
|
||||
DS_ERR_FAIL_INSERT_HOTWORD = 0x3008,
|
||||
DS_ERR_FAIL_CLEAR_HOTWORD = 0x3009,
|
||||
DS_ERR_FAIL_ERASE_HOTWORD = 0x3010
|
||||
}
|
||||
}
|
|
@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
|||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30204.135
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "STTClient", "STTClient\STTClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STTConsole", "STTConsole\STTConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -0,0 +1,33 @@
|
|||
namespace STTClient.Enums
|
||||
{
|
||||
/// <summary>
|
||||
/// Error codes from the native Coqui STT binary.
|
||||
/// </summary>
|
||||
internal enum ErrorCodes
|
||||
{
|
||||
// OK
|
||||
STT_ERR_OK = 0x0000,
|
||||
|
||||
// Missing invormations
|
||||
STT_ERR_NO_MODEL = 0x1000,
|
||||
|
||||
// Invalid parameters
|
||||
STT_ERR_INVALID_ALPHABET = 0x2000,
|
||||
STT_ERR_INVALID_SHAPE = 0x2001,
|
||||
STT_ERR_INVALID_SCORER = 0x2002,
|
||||
STT_ERR_MODEL_INCOMPATIBLE = 0x2003,
|
||||
STT_ERR_SCORER_NOT_ENABLED = 0x2004,
|
||||
|
||||
// Runtime failures
|
||||
STT_ERR_FAIL_INIT_MMAP = 0x3000,
|
||||
STT_ERR_FAIL_INIT_SESS = 0x3001,
|
||||
STT_ERR_FAIL_INTERPRETER = 0x3002,
|
||||
STT_ERR_FAIL_RUN_SESS = 0x3003,
|
||||
STT_ERR_FAIL_CREATE_STREAM = 0x3004,
|
||||
STT_ERR_FAIL_READ_PROTOBUF = 0x3005,
|
||||
STT_ERR_FAIL_CREATE_SESS = 0x3006,
|
||||
STT_ERR_FAIL_INSERT_HOTWORD = 0x3008,
|
||||
STT_ERR_FAIL_CLEAR_HOTWORD = 0x3009,
|
||||
STT_ERR_FAIL_ERASE_HOTWORD = 0x3010
|
||||
}
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
using DeepSpeechClient.Structs;
|
||||
using STTClient.Structs;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace DeepSpeechClient.Extensions
|
||||
namespace STTClient.Extensions
|
||||
{
|
||||
internal static class NativeExtensions
|
||||
{
|
||||
|
@ -20,7 +20,7 @@ namespace DeepSpeechClient.Extensions
|
|||
byte[] buffer = new byte[len];
|
||||
Marshal.Copy(intPtr, buffer, 0, buffer.Length);
|
||||
if (releasePtr)
|
||||
NativeImp.DS_FreeString(intPtr);
|
||||
NativeImp.STT_FreeString(intPtr);
|
||||
string result = Encoding.UTF8.GetString(buffer);
|
||||
return result;
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ namespace DeepSpeechClient.Extensions
|
|||
metadata.transcripts += sizeOfCandidateTranscript;
|
||||
}
|
||||
|
||||
NativeImp.DS_FreeMetadata(intPtr);
|
||||
NativeImp.STT_FreeMetadata(intPtr);
|
||||
return managedMetadata;
|
||||
}
|
||||
}
|
|
@ -1,13 +1,13 @@
|
|||
using DeepSpeechClient.Models;
|
||||
using STTClient.Models;
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace DeepSpeechClient.Interfaces
|
||||
namespace STTClient.Interfaces
|
||||
{
|
||||
/// <summary>
|
||||
/// Client interface for DeepSpeech
|
||||
/// Client interface for Coqui STT
|
||||
/// </summary>
|
||||
public interface IDeepSpeech : IDisposable
|
||||
public interface ISTT : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Return version of this library. The returned version is a semantic version
|
||||
|
@ -80,7 +80,7 @@ namespace DeepSpeechClient.Interfaces
|
|||
unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta);
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text.
|
||||
/// Use the STT model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
|
@ -89,7 +89,7 @@ namespace DeepSpeechClient.Interfaces
|
|||
uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
|
||||
/// Use the STT model to perform Speech-To-Text, return results including metadata.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
|
@ -104,26 +104,26 @@ namespace DeepSpeechClient.Interfaces
|
|||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
unsafe void FreeStream(DeepSpeechStream stream);
|
||||
unsafe void FreeStream(Stream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
unsafe DeepSpeechStream CreateStream();
|
||||
unsafe Stream CreateStream();
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to feed the data.</param>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize);
|
||||
unsafe void FeedAudioContent(Stream stream, short[] aBuffer, uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
unsafe string IntermediateDecode(DeepSpeechStream stream);
|
||||
unsafe string IntermediateDecode(Stream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
|
||||
|
@ -131,14 +131,14 @@ namespace DeepSpeechClient.Interfaces
|
|||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults);
|
||||
unsafe Metadata IntermediateDecodeWithMetadata(Stream stream, uint aNumResults);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <returns>The STT result.</returns>
|
||||
unsafe string FinishStream(DeepSpeechStream stream);
|
||||
unsafe string FinishStream(Stream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
|
||||
|
@ -146,6 +146,6 @@ namespace DeepSpeechClient.Interfaces
|
|||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults);
|
||||
unsafe Metadata FinishStreamWithMetadata(Stream stream, uint aNumResults);
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
namespace DeepSpeechClient.Models
|
||||
namespace STTClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores the entire CTC output as an array of character metadata objects.
|
|
@ -1,4 +1,4 @@
|
|||
namespace DeepSpeechClient.Models
|
||||
namespace STTClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores the entire CTC output as an array of character metadata objects.
|
|
@ -1,19 +1,19 @@
|
|||
using System;
|
||||
|
||||
namespace DeepSpeechClient.Models
|
||||
namespace STTClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrapper of the pointer used for the decoding stream.
|
||||
/// </summary>
|
||||
public class DeepSpeechStream : IDisposable
|
||||
public class Stream : IDisposable
|
||||
{
|
||||
private unsafe IntPtr** _streamingStatePp;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="DeepSpeechStream"/>.
|
||||
/// Initializes a new instance of <see cref="Stream"/>.
|
||||
/// </summary>
|
||||
/// <param name="streamingStatePP">Native pointer of the native stream.</param>
|
||||
public unsafe DeepSpeechStream(IntPtr** streamingStatePP)
|
||||
public unsafe Stream(IntPtr** streamingStatePP)
|
||||
{
|
||||
_streamingStatePp = streamingStatePP;
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
namespace DeepSpeechClient.Models
|
||||
namespace STTClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores each individual character, along with its timing information.
|
|
@ -1,9 +1,9 @@
|
|||
using DeepSpeechClient.Enums;
|
||||
using STTClient.Enums;
|
||||
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient
|
||||
namespace STTClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrapper for the native implementation of "libstt.so"
|
||||
|
@ -13,101 +13,101 @@ namespace DeepSpeechClient
|
|||
#region Native Implementation
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl,
|
||||
CharSet = CharSet.Ansi, SetLastError = true)]
|
||||
internal static extern IntPtr DS_Version();
|
||||
internal static extern IntPtr STT_Version();
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||
internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath,
|
||||
ref IntPtr** pint);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern IntPtr DS_ErrorCodeToErrorMessage(int aErrorCode);
|
||||
internal unsafe static extern IntPtr STT_ErrorCodeToErrorMessage(int aErrorCode);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern uint DS_GetModelBeamWidth(IntPtr** aCtx);
|
||||
internal unsafe static extern uint STT_GetModelBeamWidth(IntPtr** aCtx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern ErrorCodes DS_SetModelBeamWidth(IntPtr** aCtx,
|
||||
internal unsafe static extern ErrorCodes STT_SetModelBeamWidth(IntPtr** aCtx,
|
||||
uint aBeamWidth);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||
internal unsafe static extern ErrorCodes STT_CreateModel(string aModelPath,
|
||||
uint aBeamWidth,
|
||||
ref IntPtr** pint);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern int DS_GetModelSampleRate(IntPtr** aCtx);
|
||||
internal unsafe static extern int STT_GetModelSampleRate(IntPtr** aCtx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_EnableExternalScorer(IntPtr** aCtx,
|
||||
internal static unsafe extern ErrorCodes STT_EnableExternalScorer(IntPtr** aCtx,
|
||||
string aScorerPath);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_AddHotWord(IntPtr** aCtx,
|
||||
internal static unsafe extern ErrorCodes STT_AddHotWord(IntPtr** aCtx,
|
||||
string aWord,
|
||||
float aBoost);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_EraseHotWord(IntPtr** aCtx,
|
||||
internal static unsafe extern ErrorCodes STT_EraseHotWord(IntPtr** aCtx,
|
||||
string aWord);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_ClearHotWords(IntPtr** aCtx);
|
||||
internal static unsafe extern ErrorCodes STT_ClearHotWords(IntPtr** aCtx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_DisableExternalScorer(IntPtr** aCtx);
|
||||
internal static unsafe extern ErrorCodes STT_DisableExternalScorer(IntPtr** aCtx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_SetScorerAlphaBeta(IntPtr** aCtx,
|
||||
internal static unsafe extern ErrorCodes STT_SetScorerAlphaBeta(IntPtr** aCtx,
|
||||
float aAlpha,
|
||||
float aBeta);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl,
|
||||
CharSet = CharSet.Ansi, SetLastError = true)]
|
||||
internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx,
|
||||
internal static unsafe extern IntPtr STT_SpeechToText(IntPtr** aCtx,
|
||||
short[] aBuffer,
|
||||
uint aBufferSize);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)]
|
||||
internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx,
|
||||
internal static unsafe extern IntPtr STT_SpeechToTextWithMetadata(IntPtr** aCtx,
|
||||
short[] aBuffer,
|
||||
uint aBufferSize,
|
||||
uint aNumResults);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_FreeModel(IntPtr** aCtx);
|
||||
internal static unsafe extern void STT_FreeModel(IntPtr** aCtx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx,
|
||||
internal static unsafe extern ErrorCodes STT_CreateStream(IntPtr** aCtx,
|
||||
ref IntPtr** retval);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_FreeStream(IntPtr** aSctx);
|
||||
internal static unsafe extern void STT_FreeStream(IntPtr** aSctx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
||||
internal static unsafe extern void STT_FreeMetadata(IntPtr metadata);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_FreeString(IntPtr str);
|
||||
internal static unsafe extern void STT_FreeString(IntPtr str);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl,
|
||||
CharSet = CharSet.Ansi, SetLastError = true)]
|
||||
internal static unsafe extern void DS_FeedAudioContent(IntPtr** aSctx,
|
||||
internal static unsafe extern void STT_FeedAudioContent(IntPtr** aSctx,
|
||||
short[] aBuffer,
|
||||
uint aBufferSize);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern IntPtr DS_IntermediateDecode(IntPtr** aSctx);
|
||||
internal static unsafe extern IntPtr STT_IntermediateDecode(IntPtr** aSctx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern IntPtr DS_IntermediateDecodeWithMetadata(IntPtr** aSctx,
|
||||
internal static unsafe extern IntPtr STT_IntermediateDecodeWithMetadata(IntPtr** aSctx,
|
||||
uint aNumResults);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl,
|
||||
CharSet = CharSet.Ansi, SetLastError = true)]
|
||||
internal static unsafe extern IntPtr DS_FinishStream(IntPtr** aSctx);
|
||||
internal static unsafe extern IntPtr STT_FinishStream(IntPtr** aSctx);
|
||||
|
||||
[DllImport("libstt.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern IntPtr DS_FinishStreamWithMetadata(IntPtr** aSctx,
|
||||
internal static unsafe extern IntPtr STT_FinishStreamWithMetadata(IntPtr** aSctx,
|
||||
uint aNumResults);
|
||||
#endregion
|
||||
}
|
|
@ -1,34 +1,34 @@
|
|||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Extensions;
|
||||
using STTClient.Interfaces;
|
||||
using STTClient.Extensions;
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using DeepSpeechClient.Enums;
|
||||
using DeepSpeechClient.Models;
|
||||
using STTClient.Enums;
|
||||
using STTClient.Models;
|
||||
|
||||
namespace DeepSpeechClient
|
||||
namespace STTClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Concrete implementation of <see cref="DeepSpeechClient.Interfaces.IDeepSpeech"/>.
|
||||
/// Concrete implementation of <see cref="STTClient.Interfaces.ISTT"/>.
|
||||
/// </summary>
|
||||
public class DeepSpeech : IDeepSpeech
|
||||
public class STT : ISTT
|
||||
{
|
||||
private unsafe IntPtr** _modelStatePP;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="DeepSpeech"/> class and creates a new acoustic model.
|
||||
/// Initializes a new instance of <see cref="STT"/> class and creates a new acoustic model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
public DeepSpeech(string aModelPath)
|
||||
public STT(string aModelPath)
|
||||
{
|
||||
CreateModel(aModelPath);
|
||||
}
|
||||
|
||||
#region IDeepSpeech
|
||||
#region ISTT
|
||||
|
||||
/// <summary>
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// Create an object providing an interface to a trained STT model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
|
@ -48,7 +48,7 @@ namespace DeepSpeechClient
|
|||
{
|
||||
throw new FileNotFoundException(exceptionMessage);
|
||||
}
|
||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||
var resultCode = NativeImp.STT_CreateModel(aModelPath,
|
||||
ref _modelStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ namespace DeepSpeechClient
|
|||
/// <returns>Beam width value used by the model.</returns>
|
||||
public unsafe uint GetModelBeamWidth()
|
||||
{
|
||||
return NativeImp.DS_GetModelBeamWidth(_modelStatePP);
|
||||
return NativeImp.STT_GetModelBeamWidth(_modelStatePP);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -70,13 +70,13 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown on failure.</exception>
|
||||
public unsafe void SetModelBeamWidth(uint aBeamWidth)
|
||||
{
|
||||
var resultCode = NativeImp.DS_SetModelBeamWidth(_modelStatePP, aBeamWidth);
|
||||
var resultCode = NativeImp.STT_SetModelBeamWidth(_modelStatePP, aBeamWidth);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add a hot-word.
|
||||
///
|
||||
///
|
||||
/// Words that don't occur in the scorer (e.g. proper nouns) or strings that contain spaces won't be taken into account.
|
||||
/// </summary>
|
||||
/// <param name="aWord">Some word</param>
|
||||
|
@ -84,7 +84,7 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown on failure.</exception>
|
||||
public unsafe void AddHotWord(string aWord, float aBoost)
|
||||
{
|
||||
var resultCode = NativeImp.DS_AddHotWord(_modelStatePP, aWord, aBoost);
|
||||
var resultCode = NativeImp.STT_AddHotWord(_modelStatePP, aWord, aBoost);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown on failure.</exception>
|
||||
public unsafe void EraseHotWord(string aWord)
|
||||
{
|
||||
var resultCode = NativeImp.DS_EraseHotWord(_modelStatePP, aWord);
|
||||
var resultCode = NativeImp.STT_EraseHotWord(_modelStatePP, aWord);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -105,7 +105,7 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown on failure.</exception>
|
||||
public unsafe void ClearHotWords()
|
||||
{
|
||||
var resultCode = NativeImp.DS_ClearHotWords(_modelStatePP);
|
||||
var resultCode = NativeImp.STT_ClearHotWords(_modelStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -115,7 +115,7 @@ namespace DeepSpeechClient
|
|||
/// <returns>Sample rate.</returns>
|
||||
public unsafe int GetModelSampleRate()
|
||||
{
|
||||
return NativeImp.DS_GetModelSampleRate(_modelStatePP);
|
||||
return NativeImp.STT_GetModelSampleRate(_modelStatePP);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -124,9 +124,9 @@ namespace DeepSpeechClient
|
|||
/// <param name="resultCode">Native result code.</param>
|
||||
private void EvaluateResultCode(ErrorCodes resultCode)
|
||||
{
|
||||
if (resultCode != ErrorCodes.DS_ERR_OK)
|
||||
if (resultCode != ErrorCodes.STT_ERR_OK)
|
||||
{
|
||||
throw new ArgumentException(NativeImp.DS_ErrorCodeToErrorMessage((int)resultCode).PtrToString());
|
||||
throw new ArgumentException(NativeImp.STT_ErrorCodeToErrorMessage((int)resultCode).PtrToString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,7 @@ namespace DeepSpeechClient
|
|||
/// </summary>
|
||||
public unsafe void Dispose()
|
||||
{
|
||||
NativeImp.DS_FreeModel(_modelStatePP);
|
||||
NativeImp.STT_FreeModel(_modelStatePP);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -155,7 +155,7 @@ namespace DeepSpeechClient
|
|||
throw new FileNotFoundException($"Cannot find the scorer file: {aScorerPath}");
|
||||
}
|
||||
|
||||
var resultCode = NativeImp.DS_EnableExternalScorer(_modelStatePP, aScorerPath);
|
||||
var resultCode = NativeImp.STT_EnableExternalScorer(_modelStatePP, aScorerPath);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
|
||||
public unsafe void DisableExternalScorer()
|
||||
{
|
||||
var resultCode = NativeImp.DS_DisableExternalScorer(_modelStatePP);
|
||||
var resultCode = NativeImp.STT_DisableExternalScorer(_modelStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
|
@ -177,7 +177,7 @@ namespace DeepSpeechClient
|
|||
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
|
||||
public unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta)
|
||||
{
|
||||
var resultCode = NativeImp.DS_SetScorerAlphaBeta(_modelStatePP,
|
||||
var resultCode = NativeImp.STT_SetScorerAlphaBeta(_modelStatePP,
|
||||
aAlpha,
|
||||
aBeta);
|
||||
EvaluateResultCode(resultCode);
|
||||
|
@ -188,9 +188,9 @@ namespace DeepSpeechClient
|
|||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to feed the data.</param>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize)
|
||||
public unsafe void FeedAudioContent(Stream stream, short[] aBuffer, uint aBufferSize)
|
||||
{
|
||||
NativeImp.DS_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize);
|
||||
NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -198,9 +198,9 @@ namespace DeepSpeechClient
|
|||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <returns>The STT result.</returns>
|
||||
public unsafe string FinishStream(DeepSpeechStream stream)
|
||||
public unsafe string FinishStream(Stream stream)
|
||||
{
|
||||
return NativeImp.DS_FinishStream(stream.GetNativePointer()).PtrToString();
|
||||
return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -209,9 +209,9 @@ namespace DeepSpeechClient
|
|||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults)
|
||||
public unsafe Metadata FinishStreamWithMetadata(Stream stream, uint aNumResults)
|
||||
{
|
||||
return NativeImp.DS_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -219,9 +219,9 @@ namespace DeepSpeechClient
|
|||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
public unsafe string IntermediateDecode(DeepSpeechStream stream)
|
||||
public unsafe string IntermediateDecode(Stream stream)
|
||||
{
|
||||
return NativeImp.DS_IntermediateDecode(stream.GetNativePointer()).PtrToString();
|
||||
return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -230,9 +230,9 @@ namespace DeepSpeechClient
|
|||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults)
|
||||
public unsafe Metadata IntermediateDecodeWithMetadata(Stream stream, uint aNumResults)
|
||||
{
|
||||
return NativeImp.DS_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -241,18 +241,18 @@ namespace DeepSpeechClient
|
|||
/// </summary>
|
||||
public unsafe string Version()
|
||||
{
|
||||
return NativeImp.DS_Version().PtrToString();
|
||||
return NativeImp.STT_Version().PtrToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
public unsafe DeepSpeechStream CreateStream()
|
||||
public unsafe Stream CreateStream()
|
||||
{
|
||||
IntPtr** streamingStatePointer = null;
|
||||
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref streamingStatePointer);
|
||||
var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer);
|
||||
EvaluateResultCode(resultCode);
|
||||
return new DeepSpeechStream(streamingStatePointer);
|
||||
return new Stream(streamingStatePointer);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -260,25 +260,25 @@ namespace DeepSpeechClient
|
|||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
public unsafe void FreeStream(DeepSpeechStream stream)
|
||||
public unsafe void FreeStream(Stream stream)
|
||||
{
|
||||
NativeImp.DS_FreeStream(stream.GetNativePointer());
|
||||
NativeImp.STT_FreeStream(stream.GetNativePointer());
|
||||
stream.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text.
|
||||
/// Use the STT model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
/// <returns>The STT result. Returns NULL on error.</returns>
|
||||
public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize)
|
||||
{
|
||||
return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString();
|
||||
return NativeImp.STT_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
|
||||
/// Use the STT model to perform Speech-To-Text, return results including metadata.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
|
@ -286,7 +286,7 @@ namespace DeepSpeechClient
|
|||
/// <returns>The extended metadata. Returns NULL on error.</returns>
|
||||
public unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aNumResults)
|
||||
{
|
||||
return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata();
|
||||
return NativeImp.STT_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aNumResults).PtrToMetadata();
|
||||
}
|
||||
|
||||
#endregion
|
|
@ -1,7 +1,7 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace STTClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct CandidateTranscript
|
|
@ -1,7 +1,7 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace STTClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct Metadata
|
|
@ -1,7 +1,7 @@
|
|||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace STTClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct TokenMetadata
|
|
@ -1,6 +1,6 @@
|
|||
using DeepSpeechClient;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Models;
|
||||
using STTClient;
|
||||
using STTClient.Interfaces;
|
||||
using STTClient.Models;
|
||||
using NAudio.Wave;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
@ -54,7 +54,7 @@ namespace CSharpExamples
|
|||
Console.WriteLine("Loading model...");
|
||||
stopwatch.Start();
|
||||
// sphinx-doc: csharp_ref_model_start
|
||||
using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm"))
|
||||
using (ISTT sttClient = new STT(model ?? "output_graph.pbmm"))
|
||||
{
|
||||
// sphinx-doc: csharp_ref_model_stop
|
||||
stopwatch.Stop();
|
|
@ -5,12 +5,12 @@ using System.Runtime.InteropServices;
|
|||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DeepSpeechConsole")]
|
||||
[assembly: AssemblyTitle("STTConsole")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("CSharpExamples")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2018")]
|
||||
[assembly: AssemblyCompany("Coqui GmbH")]
|
||||
[assembly: AssemblyProduct("STTConsole")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2018-2020 Mozilla, © 2021 Coqui GmbH")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
|
@ -6,8 +6,8 @@
|
|||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{312965E5-C4F6-4D95-BA64-79906B8BC7AC}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>DeepSpeechConsole</RootNamespace>
|
||||
<AssemblyName>DeepSpeechConsole</AssemblyName>
|
||||
<RootNamespace>STTConsole</RootNamespace>
|
||||
<AssemblyName>STTConsole</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
|
@ -56,9 +56,9 @@
|
|||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
|
||||
<ProjectReference Include="..\STTClient\STTClient.csproj">
|
||||
<Project>{56DE4091-BBBE-47E4-852D-7268B33B971F}</Project>
|
||||
<Name>DeepSpeechClient</Name>
|
||||
<Name>STTClient</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
|
@ -1,8 +1,8 @@
|
|||
<Application
|
||||
x:Class="DeepSpeechWPF.App"
|
||||
x:Class="STTWPF.App"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:local="clr-namespace:DeepSpeechWPF"
|
||||
xmlns:local="clr-namespace:STTWPF"
|
||||
StartupUri="MainWindow.xaml">
|
||||
<Application.Resources />
|
||||
</Application>
|
|
@ -1,10 +1,10 @@
|
|||
using CommonServiceLocator;
|
||||
using DeepSpeech.WPF.ViewModels;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using STT.WPF.ViewModels;
|
||||
using STTClient.Interfaces;
|
||||
using GalaSoft.MvvmLight.Ioc;
|
||||
using System.Windows;
|
||||
|
||||
namespace DeepSpeechWPF
|
||||
namespace STTWPF
|
||||
{
|
||||
/// <summary>
|
||||
/// Interaction logic for App.xaml
|
||||
|
@ -18,11 +18,11 @@ namespace DeepSpeechWPF
|
|||
|
||||
try
|
||||
{
|
||||
//Register instance of DeepSpeech
|
||||
DeepSpeechClient.DeepSpeech deepSpeechClient =
|
||||
new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm");
|
||||
//Register instance of STT
|
||||
STTClient.STT client =
|
||||
new STTClient.STT("coqui-stt-0.8.0-models.pbmm");
|
||||
|
||||
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
|
||||
SimpleIoc.Default.Register<ISTT>(() => client);
|
||||
SimpleIoc.Default.Register<MainWindowViewModel>();
|
||||
}
|
||||
catch (System.Exception ex)
|
||||
|
@ -35,8 +35,8 @@ namespace DeepSpeechWPF
|
|||
protected override void OnExit(ExitEventArgs e)
|
||||
{
|
||||
base.OnExit(e);
|
||||
//Dispose instance of DeepSpeech
|
||||
ServiceLocator.Current.GetInstance<IDeepSpeech>()?.Dispose();
|
||||
//Dispose instance of STT
|
||||
ServiceLocator.Current.GetInstance<ISTT>()?.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
<Window
|
||||
x:Class="DeepSpeechWPF.MainWindow"
|
||||
x:Class="STTWPF.MainWindow"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
|
||||
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||
Title="Deepspeech client"
|
||||
Title="STT client"
|
||||
Width="800"
|
||||
Height="600"
|
||||
Loaded="Window_Loaded"
|
|
@ -1,8 +1,8 @@
|
|||
using CommonServiceLocator;
|
||||
using DeepSpeech.WPF.ViewModels;
|
||||
using STT.WPF.ViewModels;
|
||||
using System.Windows;
|
||||
|
||||
namespace DeepSpeechWPF
|
||||
namespace STTWPF
|
||||
{
|
||||
/// <summary>
|
||||
/// Interaction logic for MainWindow.xaml
|
|
@ -7,12 +7,12 @@ using System.Windows;
|
|||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DeepSpeech.WPF")]
|
||||
[assembly: AssemblyTitle("STT.WPF")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2018")]
|
||||
[assembly: AssemblyCompany("Coqui GmbH")]
|
||||
[assembly: AssemblyProduct("STT.WPF.SingleFiles")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2018-2020 Mozilla, © 2021 Coqui GmbH")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
|
@ -8,7 +8,7 @@
|
|||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace DeepSpeech.WPF.Properties {
|
||||
namespace STT.WPF.Properties {
|
||||
using System;
|
||||
|
||||
|
||||
|
@ -39,7 +39,7 @@ namespace DeepSpeech.WPF.Properties {
|
|||
internal static global::System.Resources.ResourceManager ResourceManager {
|
||||
get {
|
||||
if (object.ReferenceEquals(resourceMan, null)) {
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly);
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("STT.WPF.Properties.Resources", typeof(Resources).Assembly);
|
||||
resourceMan = temp;
|
||||
}
|
||||
return resourceMan;
|
|
@ -8,7 +8,7 @@
|
|||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace DeepSpeech.WPF.Properties {
|
||||
namespace STT.WPF.Properties {
|
||||
|
||||
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
|
@ -6,8 +6,8 @@
|
|||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{54BFD766-4305-4F4C-BA59-AF45505DF3C1}</ProjectGuid>
|
||||
<OutputType>WinExe</OutputType>
|
||||
<RootNamespace>DeepSpeech.WPF</RootNamespace>
|
||||
<AssemblyName>DeepSpeech.WPF</AssemblyName>
|
||||
<RootNamespace>STT.WPF</RootNamespace>
|
||||
<AssemblyName>STT.WPF</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
|
||||
|
@ -131,9 +131,9 @@
|
|||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
|
||||
<ProjectReference Include="..\STTClient\STTClient.csproj">
|
||||
<Project>{56de4091-bbbe-47e4-852d-7268b33b971f}</Project>
|
||||
<Name>DeepSpeechClient</Name>
|
||||
<Name>STTClient</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
|
@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
|||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.28307.421
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STT.WPF", "STT.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "STTClient", "..\STTClient\STTClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -3,7 +3,7 @@ using System.Collections.Generic;
|
|||
using System.ComponentModel;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace DeepSpeech.WPF.ViewModels
|
||||
namespace STT.WPF.ViewModels
|
||||
{
|
||||
/// <summary>
|
||||
/// Implementation of <see cref="INotifyPropertyChanged"/> to simplify models.
|
|
@ -3,8 +3,8 @@ using CSCore;
|
|||
using CSCore.CoreAudioAPI;
|
||||
using CSCore.SoundIn;
|
||||
using CSCore.Streams;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Models;
|
||||
using STTClient.Interfaces;
|
||||
using STTClient.Models;
|
||||
using GalaSoft.MvvmLight.CommandWpf;
|
||||
using Microsoft.Win32;
|
||||
using System;
|
||||
|
@ -15,7 +15,7 @@ using System.IO;
|
|||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace DeepSpeech.WPF.ViewModels
|
||||
namespace STT.WPF.ViewModels
|
||||
{
|
||||
/// <summary>
|
||||
/// View model of the MainWindow View.
|
||||
|
@ -27,7 +27,7 @@ namespace DeepSpeech.WPF.ViewModels
|
|||
private const string ScorerPath = "kenlm.scorer";
|
||||
#endregion
|
||||
|
||||
private readonly IDeepSpeech _sttClient;
|
||||
private readonly ISTT _sttClient;
|
||||
|
||||
#region Commands
|
||||
/// <summary>
|
||||
|
@ -62,7 +62,7 @@ namespace DeepSpeech.WPF.ViewModels
|
|||
/// <summary>
|
||||
/// Stream used to feed data into the acoustic model.
|
||||
/// </summary>
|
||||
private DeepSpeechStream _sttStream;
|
||||
private Stream _sttStream;
|
||||
|
||||
/// <summary>
|
||||
/// Records the audio of the selected device.
|
||||
|
@ -75,7 +75,7 @@ namespace DeepSpeech.WPF.ViewModels
|
|||
private SoundInSource _soundInSource;
|
||||
|
||||
/// <summary>
|
||||
/// Target wave source.(16KHz Mono 16bit for DeepSpeech)
|
||||
/// Target wave source.(16KHz Mono 16bit for STT)
|
||||
/// </summary>
|
||||
private IWaveSource _convertedSource;
|
||||
|
||||
|
@ -200,7 +200,7 @@ namespace DeepSpeech.WPF.ViewModels
|
|||
#endregion
|
||||
|
||||
#region Ctors
|
||||
public MainWindowViewModel(IDeepSpeech sttClient)
|
||||
public MainWindowViewModel(ISTT sttClient)
|
||||
{
|
||||
_sttClient = sttClient;
|
||||
|
||||
|
@ -290,7 +290,7 @@ namespace DeepSpeech.WPF.ViewModels
|
|||
//read data from the converedSource
|
||||
//important: don't use the e.Data here
|
||||
//the e.Data contains the raw data provided by the
|
||||
//soundInSource which won't have the deepspeech required audio format
|
||||
//soundInSource which won't have the STT required audio format
|
||||
byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
|
||||
|
||||
int read;
|
|
@ -66,9 +66,9 @@ create_package(absl::optional<string> alphabet_path,
|
|||
scorer.set_utf8_mode(force_bytes_output_mode.value());
|
||||
scorer.reset_params(default_alpha, default_beta);
|
||||
int err = scorer.load_lm(lm_path);
|
||||
if (err != DS_ERR_SCORER_NO_TRIE) {
|
||||
if (err != STT_ERR_SCORER_NO_TRIE) {
|
||||
cerr << "Error loading language model file: "
|
||||
<< (err == DS_ERR_SCORER_UNREADABLE ? "Can't open binary LM file." : DS_ErrorCodeToErrorMessage(err))
|
||||
<< (err == STT_ERR_SCORER_UNREADABLE ? "Can't open binary LM file." : STT_ErrorCodeToErrorMessage(err))
|
||||
<< "\n";
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/AppTheme">
|
||||
<activity android:name=".DeepSpeechActivity">
|
||||
<activity android:name=".STTActivity">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
|
|
|
@ -16,11 +16,11 @@ import java.io.IOException;
|
|||
import java.nio.ByteOrder;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import ai.coqui.libstt.DeepSpeechModel;
|
||||
import ai.coqui.libstt.STTModel;
|
||||
|
||||
public class DeepSpeechActivity extends AppCompatActivity {
|
||||
public class STTActivity extends AppCompatActivity {
|
||||
|
||||
DeepSpeechModel _m = null;
|
||||
STTModel _m = null;
|
||||
|
||||
EditText _tfliteModel;
|
||||
EditText _audioFile;
|
||||
|
@ -50,7 +50,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||
this._tfliteStatus.setText("Creating model");
|
||||
if (this._m == null) {
|
||||
// sphinx-doc: java_ref_model_start
|
||||
this._m = new DeepSpeechModel(tfliteModel);
|
||||
this._m = new STTModel(tfliteModel);
|
||||
this._m.setBeamWidth(BEAM_WIDTH);
|
||||
// sphinx-doc: java_ref_model_stop
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ public class DeepSpeechActivity extends AppCompatActivity {
|
|||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_deep_speech);
|
||||
setContentView(R.layout.activity_stt);
|
||||
|
||||
this._decodedString = (TextView) findViewById(R.id.decodedString);
|
||||
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
|
|
@ -4,7 +4,7 @@
|
|||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
tools:context=".DeepSpeechActivity">
|
||||
tools:context=".STTActivity">
|
||||
|
||||
<!--
|
||||
<TextView
|
|
@ -10,7 +10,7 @@
|
|||
%javaconst(1);
|
||||
|
||||
%include "arrays_java.i"
|
||||
// apply to DS_FeedAudioContent and DS_SpeechToText
|
||||
// apply to STT_FeedAudioContent and STT_SpeechToText
|
||||
%apply short[] { short* };
|
||||
|
||||
%include "cpointer.i"
|
||||
|
@ -43,7 +43,7 @@
|
|||
}
|
||||
|
||||
~Metadata() {
|
||||
DS_FreeMetadata(self);
|
||||
STT_FreeMetadata(self);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,13 +54,13 @@
|
|||
%nodefaultctor TokenMetadata;
|
||||
%nodefaultdtor TokenMetadata;
|
||||
|
||||
%typemap(newfree) char* "DS_FreeString($1);";
|
||||
%newobject DS_SpeechToText;
|
||||
%newobject DS_IntermediateDecode;
|
||||
%newobject DS_FinishStream;
|
||||
%newobject DS_ErrorCodeToErrorMessage;
|
||||
%typemap(newfree) char* "STT_FreeString($1);";
|
||||
%newobject STT_SpeechToText;
|
||||
%newobject STT_IntermediateDecode;
|
||||
%newobject STT_FinishStream;
|
||||
%newobject STT_ErrorCodeToErrorMessage;
|
||||
|
||||
%rename ("%(strip:[DS_])s") "";
|
||||
%rename ("%(strip:[STT_])s") "";
|
||||
|
||||
// make struct members camel case to suit Java conventions
|
||||
%rename ("%(camelcase)s", %$ismember) "";
|
||||
|
|
|
@ -11,7 +11,7 @@ import org.junit.runners.MethodSorters;
|
|||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import ai.coqui.libstt.DeepSpeechModel;
|
||||
import ai.coqui.libstt.STTModel;
|
||||
import ai.coqui.libstt.CandidateTranscript;
|
||||
|
||||
import java.io.RandomAccessFile;
|
||||
|
@ -58,8 +58,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_basic() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_basic() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
m.freeModel();
|
||||
}
|
||||
|
||||
|
@ -71,7 +71,7 @@ public class BasicTest {
|
|||
return retval;
|
||||
}
|
||||
|
||||
private String doSTT(DeepSpeechModel m, boolean extendedMetadata) {
|
||||
private String doSTT(STTModel m, boolean extendedMetadata) {
|
||||
try {
|
||||
RandomAccessFile wave = new RandomAccessFile(wavFile, "r");
|
||||
|
||||
|
@ -115,8 +115,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_stt_noLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_stt_noLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
|
||||
String decoded = doSTT(m, false);
|
||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||
|
@ -124,8 +124,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_stt_withLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_stt_withLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
m.enableExternalScorer(scorerFile);
|
||||
|
||||
String decoded = doSTT(m, false);
|
||||
|
@ -134,8 +134,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_sttWithMetadata_noLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_sttWithMetadata_noLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
|
||||
String decoded = doSTT(m, true);
|
||||
assertEquals("she had your dark suit in greasy wash water all year", decoded);
|
||||
|
@ -143,8 +143,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_sttWithMetadata_withLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_sttWithMetadata_withLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
m.enableExternalScorer(scorerFile);
|
||||
|
||||
String decoded = doSTT(m, true);
|
||||
|
@ -153,8 +153,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_HotWord_withLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_HotWord_withLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
m.enableExternalScorer(scorerFile);
|
||||
|
||||
for(int i = 0; i < word.length; i++) {
|
||||
|
@ -168,8 +168,8 @@ public class BasicTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void loadDeepSpeech_HotWord_noLM() {
|
||||
DeepSpeechModel m = new DeepSpeechModel(modelFile);
|
||||
public void loadSTT_HotWord_noLM() {
|
||||
STTModel m = new STTModel(modelFile);
|
||||
try {
|
||||
m.addHotWord(word[0], boost[0]);
|
||||
assert(false);
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
package ai.coqui.libstt;
|
||||
|
||||
/**
|
||||
* @brief Exposes a DeepSpeech model in Java
|
||||
* @brief Exposes a STT model in Java
|
||||
**/
|
||||
public class DeepSpeechModel {
|
||||
public class STTModel {
|
||||
|
||||
static {
|
||||
System.loadLibrary("stt-jni");
|
||||
|
@ -15,14 +15,14 @@ public class DeepSpeechModel {
|
|||
private SWIGTYPE_p_ModelState _msp;
|
||||
|
||||
private void evaluateErrorCode(int errorCode) {
|
||||
DeepSpeech_Error_Codes code = DeepSpeech_Error_Codes.swigToEnum(errorCode);
|
||||
if (code != DeepSpeech_Error_Codes.ERR_OK) {
|
||||
STT_Error_Codes code = STT_Error_Codes.swigToEnum(errorCode);
|
||||
if (code != STT_Error_Codes.ERR_OK) {
|
||||
throw new RuntimeException("Error: " + impl.ErrorCodeToErrorMessage(errorCode) + " (0x" + Integer.toHexString(errorCode) + ").");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief An object providing an interface to a trained DeepSpeech model.
|
||||
* @brief An object providing an interface to a trained STT model.
|
||||
*
|
||||
* @constructor
|
||||
*
|
||||
|
@ -30,7 +30,7 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @throws RuntimeException on failure.
|
||||
*/
|
||||
public DeepSpeechModel(String modelPath) {
|
||||
public STTModel(String modelPath) {
|
||||
this._mspp = impl.new_modelstatep();
|
||||
evaluateErrorCode(impl.CreateModel(modelPath, this._mspp));
|
||||
this._msp = impl.modelstatep_value(this._mspp);
|
||||
|
@ -107,7 +107,7 @@ public class DeepSpeechModel {
|
|||
}
|
||||
|
||||
/*
|
||||
* @brief Use the DeepSpeech model to perform Speech-To-Text.
|
||||
* @brief Use the STT model to perform Speech-To-Text.
|
||||
*
|
||||
* @param buffer A 16-bit, mono raw audio signal at the appropriate
|
||||
* sample rate (matching what the model was trained on).
|
||||
|
@ -120,7 +120,7 @@ public class DeepSpeechModel {
|
|||
}
|
||||
|
||||
/**
|
||||
* @brief Use the DeepSpeech model to perform Speech-To-Text and output metadata
|
||||
* @brief Use the STT model to perform Speech-To-Text and output metadata
|
||||
* about the results.
|
||||
*
|
||||
* @param buffer A 16-bit, mono raw audio signal at the appropriate
|
||||
|
@ -144,10 +144,10 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @throws RuntimeException on failure.
|
||||
*/
|
||||
public DeepSpeechStreamingState createStream() {
|
||||
public STTStreamingState createStream() {
|
||||
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
|
||||
evaluateErrorCode(impl.CreateStream(this._msp, ssp));
|
||||
return new DeepSpeechStreamingState(impl.streamingstatep_value(ssp));
|
||||
return new STTStreamingState(impl.streamingstatep_value(ssp));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -158,7 +158,7 @@ public class DeepSpeechModel {
|
|||
* appropriate sample rate (matching what the model was trained on).
|
||||
* @param buffer_size The number of samples in @p buffer.
|
||||
*/
|
||||
public void feedAudioContent(DeepSpeechStreamingState ctx, short[] buffer, int buffer_size) {
|
||||
public void feedAudioContent(STTStreamingState ctx, short[] buffer, int buffer_size) {
|
||||
impl.FeedAudioContent(ctx.get(), buffer, buffer_size);
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
public String intermediateDecode(DeepSpeechStreamingState ctx) {
|
||||
public String intermediateDecode(STTStreamingState ctx) {
|
||||
return impl.IntermediateDecode(ctx.get());
|
||||
}
|
||||
|
||||
|
@ -181,7 +181,7 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @return The STT intermediate result.
|
||||
*/
|
||||
public Metadata intermediateDecodeWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
|
||||
public Metadata intermediateDecodeWithMetadata(STTStreamingState ctx, int num_results) {
|
||||
return impl.IntermediateDecodeWithMetadata(ctx.get(), num_results);
|
||||
}
|
||||
|
||||
|
@ -195,7 +195,7 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @note This method will free the state pointer (@p ctx).
|
||||
*/
|
||||
public String finishStream(DeepSpeechStreamingState ctx) {
|
||||
public String finishStream(STTStreamingState ctx) {
|
||||
return impl.FinishStream(ctx.get());
|
||||
}
|
||||
|
||||
|
@ -212,7 +212,7 @@ public class DeepSpeechModel {
|
|||
*
|
||||
* @note This method will free the state pointer (@p ctx).
|
||||
*/
|
||||
public Metadata finishStreamWithMetadata(DeepSpeechStreamingState ctx, int num_results) {
|
||||
public Metadata finishStreamWithMetadata(STTStreamingState ctx, int num_results) {
|
||||
return impl.FinishStreamWithMetadata(ctx.get(), num_results);
|
||||
}
|
||||
/**
|
|
@ -1,9 +1,9 @@
|
|||
package ai.coqui.libstt;
|
||||
|
||||
public final class DeepSpeechStreamingState {
|
||||
public final class STTStreamingState {
|
||||
private SWIGTYPE_p_StreamingState _sp;
|
||||
|
||||
public DeepSpeechStreamingState(SWIGTYPE_p_StreamingState sp) {
|
||||
public STTStreamingState(SWIGTYPE_p_StreamingState sp) {
|
||||
this._sp = sp;
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.1
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.1
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.1
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
|
@ -8,7 +8,7 @@
|
|||
|
||||
package ai.coqui.libstt;
|
||||
|
||||
public enum DeepSpeech_Error_Codes {
|
||||
public enum STT_Error_Codes {
|
||||
ERR_OK(0x0000),
|
||||
ERR_NO_MODEL(0x1000),
|
||||
ERR_INVALID_ALPHABET(0x2000),
|
||||
|
@ -37,29 +37,29 @@ public enum DeepSpeech_Error_Codes {
|
|||
return swigValue;
|
||||
}
|
||||
|
||||
public static DeepSpeech_Error_Codes swigToEnum(int swigValue) {
|
||||
DeepSpeech_Error_Codes[] swigValues = DeepSpeech_Error_Codes.class.getEnumConstants();
|
||||
public static STT_Error_Codes swigToEnum(int swigValue) {
|
||||
STT_Error_Codes[] swigValues = STT_Error_Codes.class.getEnumConstants();
|
||||
if (swigValue < swigValues.length && swigValue >= 0 && swigValues[swigValue].swigValue == swigValue)
|
||||
return swigValues[swigValue];
|
||||
for (DeepSpeech_Error_Codes swigEnum : swigValues)
|
||||
for (STT_Error_Codes swigEnum : swigValues)
|
||||
if (swigEnum.swigValue == swigValue)
|
||||
return swigEnum;
|
||||
throw new IllegalArgumentException("No enum " + DeepSpeech_Error_Codes.class + " with value " + swigValue);
|
||||
throw new IllegalArgumentException("No enum " + STT_Error_Codes.class + " with value " + swigValue);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private DeepSpeech_Error_Codes() {
|
||||
private STT_Error_Codes() {
|
||||
this.swigValue = SwigNext.next++;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private DeepSpeech_Error_Codes(int swigValue) {
|
||||
private STT_Error_Codes(int swigValue) {
|
||||
this.swigValue = swigValue;
|
||||
SwigNext.next = swigValue+1;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private DeepSpeech_Error_Codes(DeepSpeech_Error_Codes swigEnum) {
|
||||
private STT_Error_Codes(STT_Error_Codes swigEnum) {
|
||||
this.swigValue = swigEnum.swigValue;
|
||||
SwigNext.next = this.swigValue+1;
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
* This file was automatically generated by SWIG (http://www.swig.org).
|
||||
* Version 4.0.1
|
||||
* Version 4.0.2
|
||||
*
|
||||
* Do not make changes to this file unless you know what you are doing--modify
|
||||
* the SWIG interface file instead.
|
||||
|
|
|
@ -26,18 +26,18 @@ using namespace node;
|
|||
$2 = ($2_ltype)(bufferLength / 2);
|
||||
}
|
||||
|
||||
// apply to DS_FeedAudioContent and DS_SpeechToText
|
||||
// apply to STT_FeedAudioContent and STT_SpeechToText
|
||||
%apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)};
|
||||
|
||||
|
||||
// make sure the string returned by SpeechToText is freed
|
||||
%typemap(newfree) char* "DS_FreeString($1);";
|
||||
%typemap(newfree) char* "STT_FreeString($1);";
|
||||
|
||||
%newobject DS_SpeechToText;
|
||||
%newobject DS_IntermediateDecode;
|
||||
%newobject DS_FinishStream;
|
||||
%newobject DS_Version;
|
||||
%newobject DS_ErrorCodeToErrorMessage;
|
||||
%newobject STT_SpeechToText;
|
||||
%newobject STT_IntermediateDecode;
|
||||
%newobject STT_FinishStream;
|
||||
%newobject STT_Version;
|
||||
%newobject STT_ErrorCodeToErrorMessage;
|
||||
|
||||
// convert double pointer retval in CreateModel to an output
|
||||
%typemap(in, numinputs=0) ModelState **retval (ModelState *ret) {
|
||||
|
@ -62,7 +62,7 @@ using namespace node;
|
|||
%typemap(argout) StreamingState **retval {
|
||||
$result = SWIGV8_ARRAY_NEW();
|
||||
SWIGV8_AppendOutput($result, SWIG_From_int(result));
|
||||
// not owned, DS_FinishStream deallocates StreamingState
|
||||
// not owned, STT_FinishStream deallocates StreamingState
|
||||
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
|
||||
}
|
||||
|
||||
|
@ -93,6 +93,6 @@ using namespace node;
|
|||
%nodefaultctor TokenMetadata;
|
||||
%nodefaultdtor TokenMetadata;
|
||||
|
||||
%rename ("%(strip:[DS_])s") "";
|
||||
%rename ("%(strip:[STT_])s") "";
|
||||
|
||||
%include "../coqui-stt.h"
|
||||
|
|
|
@ -26,7 +26,7 @@ ModelState::~ModelState()
|
|||
int
|
||||
ModelState::init(const char* model_path)
|
||||
{
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
|
||||
char*
|
||||
|
|
|
@ -71,7 +71,7 @@ struct ModelState {
|
|||
*
|
||||
* @return A Metadata struct containing CandidateTranscript structs.
|
||||
* Each represents an candidate transcript, with the first ranked most probable.
|
||||
* The user is responsible for freeing Result by calling DS_FreeMetadata().
|
||||
* The user is responsible for freeing Result by calling STT_FreeMetadata().
|
||||
*/
|
||||
virtual Metadata* decode_metadata(const DecoderState& state,
|
||||
size_t num_results);
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
import_array();
|
||||
%}
|
||||
|
||||
// apply NumPy conversion typemap to DS_FeedAudioContent and DS_SpeechToText
|
||||
// apply NumPy conversion typemap to STT_FeedAudioContent and STT_SpeechToText
|
||||
%apply (short* IN_ARRAY1, int DIM1) {(const short* aBuffer, unsigned int aBufferSize)};
|
||||
|
||||
%typemap(in, numinputs=0) ModelState **retval (ModelState *ret) {
|
||||
|
@ -19,7 +19,7 @@ import_array();
|
|||
}
|
||||
|
||||
%typemap(argout) ModelState **retval {
|
||||
// not owned, Python wrapper in __init__.py calls DS_FreeModel
|
||||
// not owned, Python wrapper in __init__.py calls STT_FreeModel
|
||||
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ import_array();
|
|||
}
|
||||
|
||||
%typemap(argout) StreamingState **retval {
|
||||
// not owned, DS_FinishStream deallocates StreamingState
|
||||
// not owned, STT_FinishStream deallocates StreamingState
|
||||
%append_output(SWIG_NewPointerObj(%as_voidptr(*$1), $*1_descriptor, 0));
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ static PyObject *parent_reference() {
|
|||
|
||||
%extend struct Metadata {
|
||||
~Metadata() {
|
||||
DS_FreeMetadata($self);
|
||||
STT_FreeMetadata($self);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,14 +115,14 @@ static PyObject *parent_reference() {
|
|||
%nodefaultctor TokenMetadata;
|
||||
%nodefaultdtor TokenMetadata;
|
||||
|
||||
%typemap(newfree) char* "DS_FreeString($1);";
|
||||
%typemap(newfree) char* "STT_FreeString($1);";
|
||||
|
||||
%newobject DS_SpeechToText;
|
||||
%newobject DS_IntermediateDecode;
|
||||
%newobject DS_FinishStream;
|
||||
%newobject DS_Version;
|
||||
%newobject DS_ErrorCodeToErrorMessage;
|
||||
%newobject STT_SpeechToText;
|
||||
%newobject STT_IntermediateDecode;
|
||||
%newobject STT_FinishStream;
|
||||
%newobject STT_Version;
|
||||
%newobject STT_ErrorCodeToErrorMessage;
|
||||
|
||||
%rename ("%(strip:[DS_])s") "";
|
||||
%rename ("%(strip:[STT_])s") "";
|
||||
|
||||
%include "../coqui-stt.h"
|
||||
|
|
|
@ -263,7 +263,7 @@ StreamingState::processBatch(const vector<float>& buf, unsigned int n_steps)
|
|||
}
|
||||
|
||||
int
|
||||
DS_CreateModel(const char* aModelPath,
|
||||
STT_CreateModel(const char* aModelPath,
|
||||
ModelState** retval)
|
||||
{
|
||||
*retval = nullptr;
|
||||
|
@ -279,7 +279,7 @@ DS_CreateModel(const char* aModelPath,
|
|||
|
||||
if (!aModelPath || strlen(aModelPath) < 1) {
|
||||
std::cerr << "No model specified, cannot continue." << std::endl;
|
||||
return DS_ERR_NO_MODEL;
|
||||
return STT_ERR_NO_MODEL;
|
||||
}
|
||||
|
||||
std::unique_ptr<ModelState> model(
|
||||
|
@ -292,58 +292,58 @@ DS_CreateModel(const char* aModelPath,
|
|||
|
||||
if (!model) {
|
||||
std::cerr << "Could not allocate model state." << std::endl;
|
||||
return DS_ERR_FAIL_CREATE_MODEL;
|
||||
return STT_ERR_FAIL_CREATE_MODEL;
|
||||
}
|
||||
|
||||
int err = model->init(aModelPath);
|
||||
if (err != DS_ERR_OK) {
|
||||
if (err != STT_ERR_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
*retval = model.release();
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
DS_GetModelBeamWidth(const ModelState* aCtx)
|
||||
STT_GetModelBeamWidth(const ModelState* aCtx)
|
||||
{
|
||||
return aCtx->beam_width_;
|
||||
}
|
||||
|
||||
int
|
||||
DS_SetModelBeamWidth(ModelState* aCtx, unsigned int aBeamWidth)
|
||||
STT_SetModelBeamWidth(ModelState* aCtx, unsigned int aBeamWidth)
|
||||
{
|
||||
aCtx->beam_width_ = aBeamWidth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
DS_GetModelSampleRate(const ModelState* aCtx)
|
||||
STT_GetModelSampleRate(const ModelState* aCtx)
|
||||
{
|
||||
return aCtx->sample_rate_;
|
||||
}
|
||||
|
||||
void
|
||||
DS_FreeModel(ModelState* ctx)
|
||||
STT_FreeModel(ModelState* ctx)
|
||||
{
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
int
|
||||
DS_EnableExternalScorer(ModelState* aCtx,
|
||||
STT_EnableExternalScorer(ModelState* aCtx,
|
||||
const char* aScorerPath)
|
||||
{
|
||||
std::unique_ptr<Scorer> scorer(new Scorer());
|
||||
int err = scorer->init(aScorerPath, aCtx->alphabet_);
|
||||
if (err != 0) {
|
||||
return DS_ERR_INVALID_SCORER;
|
||||
return STT_ERR_INVALID_SCORER;
|
||||
}
|
||||
aCtx->scorer_ = std::move(scorer);
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
|
||||
int
|
||||
DS_AddHotWord(ModelState* aCtx,
|
||||
STT_AddHotWord(ModelState* aCtx,
|
||||
const char* word,
|
||||
float boost)
|
||||
{
|
||||
|
@ -352,15 +352,15 @@ DS_AddHotWord(ModelState* aCtx,
|
|||
aCtx->hot_words_.insert( std::pair<std::string,float> (word, boost) );
|
||||
const int size_after = aCtx->hot_words_.size();
|
||||
if (size_before == size_after) {
|
||||
return DS_ERR_FAIL_INSERT_HOTWORD;
|
||||
return STT_ERR_FAIL_INSERT_HOTWORD;
|
||||
}
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
return DS_ERR_SCORER_NOT_ENABLED;
|
||||
return STT_ERR_SCORER_NOT_ENABLED;
|
||||
}
|
||||
|
||||
int
|
||||
DS_EraseHotWord(ModelState* aCtx,
|
||||
STT_EraseHotWord(ModelState* aCtx,
|
||||
const char* word)
|
||||
{
|
||||
if (aCtx->scorer_) {
|
||||
|
@ -368,50 +368,50 @@ DS_EraseHotWord(ModelState* aCtx,
|
|||
int err = aCtx->hot_words_.erase(word);
|
||||
const int size_after = aCtx->hot_words_.size();
|
||||
if (size_before == size_after) {
|
||||
return DS_ERR_FAIL_ERASE_HOTWORD;
|
||||
return STT_ERR_FAIL_ERASE_HOTWORD;
|
||||
}
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
return DS_ERR_SCORER_NOT_ENABLED;
|
||||
return STT_ERR_SCORER_NOT_ENABLED;
|
||||
}
|
||||
|
||||
int
|
||||
DS_ClearHotWords(ModelState* aCtx)
|
||||
STT_ClearHotWords(ModelState* aCtx)
|
||||
{
|
||||
if (aCtx->scorer_) {
|
||||
aCtx->hot_words_.clear();
|
||||
const int size_after = aCtx->hot_words_.size();
|
||||
if (size_after != 0) {
|
||||
return DS_ERR_FAIL_CLEAR_HOTWORD;
|
||||
return STT_ERR_FAIL_CLEAR_HOTWORD;
|
||||
}
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
return DS_ERR_SCORER_NOT_ENABLED;
|
||||
return STT_ERR_SCORER_NOT_ENABLED;
|
||||
}
|
||||
|
||||
int
|
||||
DS_DisableExternalScorer(ModelState* aCtx)
|
||||
STT_DisableExternalScorer(ModelState* aCtx)
|
||||
{
|
||||
if (aCtx->scorer_) {
|
||||
aCtx->scorer_.reset();
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
return DS_ERR_SCORER_NOT_ENABLED;
|
||||
return STT_ERR_SCORER_NOT_ENABLED;
|
||||
}
|
||||
|
||||
int DS_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
int STT_SetScorerAlphaBeta(ModelState* aCtx,
|
||||
float aAlpha,
|
||||
float aBeta)
|
||||
{
|
||||
if (aCtx->scorer_) {
|
||||
aCtx->scorer_->reset_params(aAlpha, aBeta);
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
return DS_ERR_SCORER_NOT_ENABLED;
|
||||
return STT_ERR_SCORER_NOT_ENABLED;
|
||||
}
|
||||
|
||||
int
|
||||
DS_CreateStream(ModelState* aCtx,
|
||||
STT_CreateStream(ModelState* aCtx,
|
||||
StreamingState** retval)
|
||||
{
|
||||
*retval = nullptr;
|
||||
|
@ -419,7 +419,7 @@ DS_CreateStream(ModelState* aCtx,
|
|||
std::unique_ptr<StreamingState> ctx(new StreamingState());
|
||||
if (!ctx) {
|
||||
std::cerr << "Could not allocate streaming state." << std::endl;
|
||||
return DS_ERR_FAIL_CREATE_STREAM;
|
||||
return STT_ERR_FAIL_CREATE_STREAM;
|
||||
}
|
||||
|
||||
ctx->audio_buffer_.reserve(aCtx->audio_win_len_);
|
||||
|
@ -441,11 +441,11 @@ DS_CreateStream(ModelState* aCtx,
|
|||
aCtx->hot_words_);
|
||||
|
||||
*retval = ctx.release();
|
||||
return DS_ERR_OK;
|
||||
return STT_ERR_OK;
|
||||
}
|
||||
|
||||
void
|
||||
DS_FeedAudioContent(StreamingState* aSctx,
|
||||
STT_FeedAudioContent(StreamingState* aSctx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize)
|
||||
{
|
||||
|
@ -453,32 +453,32 @@ DS_FeedAudioContent(StreamingState* aSctx,
|
|||
}
|
||||
|
||||
char*
|
||||
DS_IntermediateDecode(const StreamingState* aSctx)
|
||||
STT_IntermediateDecode(const StreamingState* aSctx)
|
||||
{
|
||||
return aSctx->intermediateDecode();
|
||||
}
|
||||
|
||||
Metadata*
|
||||
DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||
STT_IntermediateDecodeWithMetadata(const StreamingState* aSctx,
|
||||
unsigned int aNumResults)
|
||||
{
|
||||
return aSctx->intermediateDecodeWithMetadata(aNumResults);
|
||||
}
|
||||
|
||||
char*
|
||||
DS_FinishStream(StreamingState* aSctx)
|
||||
STT_FinishStream(StreamingState* aSctx)
|
||||
{
|
||||
char* str = aSctx->finishStream();
|
||||
DS_FreeStream(aSctx);
|
||||
STT_FreeStream(aSctx);
|
||||
return str;
|
||||
}
|
||||
|
||||
Metadata*
|
||||
DS_FinishStreamWithMetadata(StreamingState* aSctx,
|
||||
STT_FinishStreamWithMetadata(StreamingState* aSctx,
|
||||
unsigned int aNumResults)
|
||||
{
|
||||
Metadata* result = aSctx->finishStreamWithMetadata(aNumResults);
|
||||
DS_FreeStream(aSctx);
|
||||
STT_FreeStream(aSctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -488,41 +488,41 @@ CreateStreamAndFeedAudioContent(ModelState* aCtx,
|
|||
unsigned int aBufferSize)
|
||||
{
|
||||
StreamingState* ctx;
|
||||
int status = DS_CreateStream(aCtx, &ctx);
|
||||
if (status != DS_ERR_OK) {
|
||||
int status = STT_CreateStream(aCtx, &ctx);
|
||||
if (status != STT_ERR_OK) {
|
||||
return nullptr;
|
||||
}
|
||||
DS_FeedAudioContent(ctx, aBuffer, aBufferSize);
|
||||
STT_FeedAudioContent(ctx, aBuffer, aBufferSize);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
char*
|
||||
DS_SpeechToText(ModelState* aCtx,
|
||||
STT_SpeechToText(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize)
|
||||
{
|
||||
StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize);
|
||||
return DS_FinishStream(ctx);
|
||||
return STT_FinishStream(ctx);
|
||||
}
|
||||
|
||||
Metadata*
|
||||
DS_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
STT_SpeechToTextWithMetadata(ModelState* aCtx,
|
||||
const short* aBuffer,
|
||||
unsigned int aBufferSize,
|
||||
unsigned int aNumResults)
|
||||
{
|
||||
StreamingState* ctx = CreateStreamAndFeedAudioContent(aCtx, aBuffer, aBufferSize);
|
||||
return DS_FinishStreamWithMetadata(ctx, aNumResults);
|
||||
return STT_FinishStreamWithMetadata(ctx, aNumResults);
|
||||
}
|
||||
|
||||
void
|
||||
DS_FreeStream(StreamingState* aSctx)
|
||||
STT_FreeStream(StreamingState* aSctx)
|
||||
{
|
||||
delete aSctx;
|
||||
}
|
||||
|
||||
void
|
||||
DS_FreeMetadata(Metadata* m)
|
||||
STT_FreeMetadata(Metadata* m)
|
||||
{
|
||||
if (m) {
|
||||
for (int i = 0; i < m->num_transcripts; ++i) {
|
||||
|
@ -539,13 +539,13 @@ DS_FreeMetadata(Metadata* m)
|
|||
}
|
||||
|
||||
void
|
||||
DS_FreeString(char* str)
|
||||
STT_FreeString(char* str)
|
||||
{
|
||||
free(str);
|
||||
}
|
||||
|
||||
char*
|
||||
DS_Version()
|
||||
STT_Version()
|
||||
{
|
||||
return strdup(ds_version());
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#include <string.h>
|
||||
|
||||
char*
|
||||
DS_ErrorCodeToErrorMessage(int aErrorCode)
|
||||
STT_ErrorCodeToErrorMessage(int aErrorCode)
|
||||
{
|
||||
#define RETURN_MESSAGE(NAME, VALUE, DESC) \
|
||||
case NAME: \
|
||||
|
@ -10,7 +10,7 @@ DS_ErrorCodeToErrorMessage(int aErrorCode)
|
|||
|
||||
switch(aErrorCode)
|
||||
{
|
||||
DS_FOR_EACH_ERROR(RETURN_MESSAGE)
|
||||
STT_FOR_EACH_ERROR(RETURN_MESSAGE)
|
||||
default:
|
||||
return strdup("Unknown error, please make sure you are using the correct native binary.");
|
||||
}
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
framework module deepspeech_ios {
|
||||
umbrella header "deepspeech_ios.h"
|
||||
|
||||
export *
|
||||
module * { export * }
|
||||
|
||||
explicit module libdeepspeech_Private {
|
||||
header "coqui-stt.h"
|
||||
export *
|
||||
link "deepspeech"
|
||||
}
|
||||
}
|
|
@ -13,7 +13,7 @@ Pod::Spec.new do |s|
|
|||
s.source = { :git => "https://github.com/coqui-ai/STT.git", :tag => "v#{s.version}" }
|
||||
|
||||
# Assuming taskcluster build location. Depending on your Xcode setup, this might be in
|
||||
# build/Release-iphoneos/deepspeech_ios.framework instead.
|
||||
s.vendored_frameworks = "native_client/swift/DerivedData/Build/Products/Release-iphoneos/deepspeech_ios.framework"
|
||||
s.source_files = "native_client/swift/deepspeech_ios/**/*.{h,m,mm,swift}"
|
||||
# build/Release-iphoneos/stt_ios.framework instead.
|
||||
s.vendored_frameworks = "native_client/swift/DerivedData/Build/Products/Release-iphoneos/stt_ios.framework"
|
||||
s.source_files = "native_client/swift/stt_ios/**/*.{h,m,mm,swift}"
|
||||
end
|
|
@ -7,11 +7,11 @@
|
|||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 505B136124960D550007DADA /* deepspeech_ios.framework */; };
|
||||
505B137224960D550007DADA /* deepspeech_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* deepspeech_ios.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
505B136B24960D550007DADA /* stt_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 505B136124960D550007DADA /* stt_ios.framework */; };
|
||||
505B137224960D550007DADA /* stt_ios.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B136424960D550007DADA /* stt_ios.h */; settings = {ATTRIBUTES = (Public, ); }; };
|
||||
505B137D24961AF20007DADA /* coqui-stt.h in Headers */ = {isa = PBXBuildFile; fileRef = 505B137C24961AF20007DADA /* coqui-stt.h */; settings = {ATTRIBUTES = (Private, ); }; };
|
||||
505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* DeepSpeech.swift */; };
|
||||
AD2FD0F925678F8800314F2E /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */; };
|
||||
505B137F24961BA70007DADA /* STT.swift in Sources */ = {isa = PBXBuildFile; fileRef = 505B137E24961BA70007DADA /* STT.swift */; };
|
||||
AD2FD0F925678F8800314F2E /* stt_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AD2FD0F825678F8800314F2E /* stt_ios.framework */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
|
@ -20,7 +20,7 @@
|
|||
containerPortal = 505B135824960D550007DADA /* Project object */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = 505B136024960D550007DADA;
|
||||
remoteInfo = deepspeech_ios;
|
||||
remoteInfo = stt_ios;
|
||||
};
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
|
@ -38,14 +38,14 @@
|
|||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
505B136124960D550007DADA /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
505B136424960D550007DADA /* deepspeech_ios.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = deepspeech_ios.h; sourceTree = "<group>"; };
|
||||
505B136124960D550007DADA /* stt_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = stt_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
505B136424960D550007DADA /* stt_ios.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = stt_ios.h; sourceTree = "<group>"; };
|
||||
505B136524960D550007DADA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
505B136A24960D550007DADA /* deepspeech_iosTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_iosTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
505B137B249619C90007DADA /* deepspeech_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = deepspeech_ios.modulemap; sourceTree = "<group>"; };
|
||||
505B136A24960D550007DADA /* stt_iosTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = stt_iosTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
505B137B249619C90007DADA /* stt_ios.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = stt_ios.modulemap; sourceTree = "<group>"; };
|
||||
505B137C24961AF20007DADA /* coqui-stt.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = coqui-stt.h; path = ../../coqui-stt.h; sourceTree = "<group>"; };
|
||||
505B137E24961BA70007DADA /* DeepSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DeepSpeech.swift; sourceTree = "<group>"; };
|
||||
AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = "<group>"; };
|
||||
505B137E24961BA70007DADA /* STT.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = STT.swift; sourceTree = "<group>"; };
|
||||
AD2FD0F825678F8800314F2E /* stt_ios.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = stt_ios.framework; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
|
@ -53,7 +53,7 @@
|
|||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
AD2FD0F925678F8800314F2E /* deepspeech_ios.framework in Frameworks */,
|
||||
AD2FD0F925678F8800314F2E /* stt_ios.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -61,7 +61,7 @@
|
|||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
505B136B24960D550007DADA /* deepspeech_ios.framework in Frameworks */,
|
||||
505B136B24960D550007DADA /* stt_ios.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -71,7 +71,7 @@
|
|||
505B135724960D550007DADA = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
505B136324960D550007DADA /* deepspeech_ios */,
|
||||
505B136324960D550007DADA /* stt_ios */,
|
||||
505B136224960D550007DADA /* Products */,
|
||||
505B1380249620C60007DADA /* Frameworks */,
|
||||
);
|
||||
|
@ -80,28 +80,28 @@
|
|||
505B136224960D550007DADA /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
505B136124960D550007DADA /* deepspeech_ios.framework */,
|
||||
505B136A24960D550007DADA /* deepspeech_iosTests.xctest */,
|
||||
505B136124960D550007DADA /* stt_ios.framework */,
|
||||
505B136A24960D550007DADA /* stt_iosTests.xctest */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
505B136324960D550007DADA /* deepspeech_ios */ = {
|
||||
505B136324960D550007DADA /* stt_ios */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
505B137C24961AF20007DADA /* coqui-stt.h */,
|
||||
505B136424960D550007DADA /* deepspeech_ios.h */,
|
||||
505B137E24961BA70007DADA /* DeepSpeech.swift */,
|
||||
505B137B249619C90007DADA /* deepspeech_ios.modulemap */,
|
||||
505B136424960D550007DADA /* stt_ios.h */,
|
||||
505B137E24961BA70007DADA /* STT.swift */,
|
||||
505B137B249619C90007DADA /* stt_ios.modulemap */,
|
||||
505B136524960D550007DADA /* Info.plist */,
|
||||
);
|
||||
path = deepspeech_ios;
|
||||
path = stt_ios;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
505B1380249620C60007DADA /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
AD2FD0F825678F8800314F2E /* deepspeech_ios.framework */,
|
||||
AD2FD0F825678F8800314F2E /* stt_ios.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
|
@ -113,7 +113,7 @@
|
|||
isa = PBXHeadersBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
505B137224960D550007DADA /* deepspeech_ios.h in Headers */,
|
||||
505B137224960D550007DADA /* stt_ios.h in Headers */,
|
||||
505B137D24961AF20007DADA /* coqui-stt.h in Headers */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
|
@ -121,9 +121,9 @@
|
|||
/* End PBXHeadersBuildPhase section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
505B136024960D550007DADA /* deepspeech_ios */ = {
|
||||
505B136024960D550007DADA /* stt_ios */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */;
|
||||
buildConfigurationList = 505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "stt_ios" */;
|
||||
buildPhases = (
|
||||
505B135C24960D550007DADA /* Headers */,
|
||||
505B135D24960D550007DADA /* Sources */,
|
||||
|
@ -135,14 +135,14 @@
|
|||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = deepspeech_ios;
|
||||
productName = deepspeech_ios;
|
||||
productReference = 505B136124960D550007DADA /* deepspeech_ios.framework */;
|
||||
name = stt_ios;
|
||||
productName = stt_ios;
|
||||
productReference = 505B136124960D550007DADA /* stt_ios.framework */;
|
||||
productType = "com.apple.product-type.framework";
|
||||
};
|
||||
505B136924960D550007DADA /* deepspeech_iosTests */ = {
|
||||
505B136924960D550007DADA /* stt_iosTests */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */;
|
||||
buildConfigurationList = 505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "stt_iosTests" */;
|
||||
buildPhases = (
|
||||
505B136624960D550007DADA /* Sources */,
|
||||
505B136724960D550007DADA /* Frameworks */,
|
||||
|
@ -153,9 +153,9 @@
|
|||
dependencies = (
|
||||
505B136D24960D550007DADA /* PBXTargetDependency */,
|
||||
);
|
||||
name = deepspeech_iosTests;
|
||||
productName = deepspeech_iosTests;
|
||||
productReference = 505B136A24960D550007DADA /* deepspeech_iosTests.xctest */;
|
||||
name = stt_iosTests;
|
||||
productName = stt_iosTests;
|
||||
productReference = 505B136A24960D550007DADA /* stt_iosTests.xctest */;
|
||||
productType = "com.apple.product-type.bundle.unit-test";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
@ -177,7 +177,7 @@
|
|||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */;
|
||||
buildConfigurationList = 505B135B24960D550007DADA /* Build configuration list for PBXProject "stt_ios" */;
|
||||
compatibilityVersion = "Xcode 9.3";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
|
@ -190,8 +190,8 @@
|
|||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
505B136024960D550007DADA /* deepspeech_ios */,
|
||||
505B136924960D550007DADA /* deepspeech_iosTests */,
|
||||
505B136024960D550007DADA /* stt_ios */,
|
||||
505B136924960D550007DADA /* stt_iosTests */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
@ -218,7 +218,7 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
505B137F24961BA70007DADA /* DeepSpeech.swift in Sources */,
|
||||
505B137F24961BA70007DADA /* STT.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -234,7 +234,7 @@
|
|||
/* Begin PBXTargetDependency section */
|
||||
505B136D24960D550007DADA /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
target = 505B136024960D550007DADA /* deepspeech_ios */;
|
||||
target = 505B136024960D550007DADA /* stt_ios */;
|
||||
targetProxy = 505B136C24960D550007DADA /* PBXContainerItemProxy */;
|
||||
};
|
||||
/* End PBXTargetDependency section */
|
||||
|
@ -383,7 +383,7 @@
|
|||
"$(inherited)",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
INFOPLIST_FILE = deepspeech_ios/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios/Info.plist;
|
||||
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
|
@ -392,12 +392,12 @@
|
|||
);
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"$(PROJECT_DIR)/deepspeech_ios",
|
||||
"$(PROJECT_DIR)/stt_ios",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap;
|
||||
MODULEMAP_FILE = stt_ios/stt_ios.modulemap;
|
||||
OTHER_LDFLAGS = "-lstdc++";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios";
|
||||
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
|
||||
SKIP_INSTALL = YES;
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
|
@ -423,7 +423,7 @@
|
|||
"$(inherited)",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
INFOPLIST_FILE = deepspeech_ios/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios/Info.plist;
|
||||
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
|
@ -432,12 +432,12 @@
|
|||
);
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"$(PROJECT_DIR)/deepspeech_ios",
|
||||
"$(PROJECT_DIR)/stt_ios",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
MODULEMAP_FILE = deepspeech_ios/deepspeech_ios.modulemap;
|
||||
MODULEMAP_FILE = stt_ios/stt_ios.modulemap;
|
||||
OTHER_LDFLAGS = "-lstdc++";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios";
|
||||
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
|
||||
SKIP_INSTALL = YES;
|
||||
SWIFT_VERSION = 5.0;
|
||||
|
@ -450,13 +450,13 @@
|
|||
buildSettings = {
|
||||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_iosTests/Info.plist;
|
||||
INFOPLIST_FILE = stt_iosTests/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-iosTests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-iosTests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
|
@ -468,13 +468,13 @@
|
|||
buildSettings = {
|
||||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_iosTests/Info.plist;
|
||||
INFOPLIST_FILE = stt_iosTests/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-iosTests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-iosTests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
|
@ -484,7 +484,7 @@
|
|||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
505B135B24960D550007DADA /* Build configuration list for PBXProject "deepspeech_ios" */ = {
|
||||
505B135B24960D550007DADA /* Build configuration list for PBXProject "stt_ios" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
505B137324960D550007DADA /* Debug */,
|
||||
|
@ -493,7 +493,7 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_ios" */ = {
|
||||
505B137524960D550007DADA /* Build configuration list for PBXNativeTarget "stt_ios" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
505B137624960D550007DADA /* Debug */,
|
||||
|
@ -502,7 +502,7 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "deepspeech_iosTests" */ = {
|
||||
505B137824960D550007DADA /* Build configuration list for PBXNativeTarget "stt_iosTests" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
505B137924960D550007DADA /* Debug */,
|
|
@ -2,6 +2,6 @@
|
|||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:deepspeech_ios.xcodeproj">
|
||||
location = "self:stt_ios.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
|
@ -15,9 +15,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "505B136024960D550007DADA"
|
||||
BuildableName = "deepspeech_ios.framework"
|
||||
BlueprintName = "deepspeech_ios"
|
||||
ReferencedContainer = "container:deepspeech_ios.xcodeproj">
|
||||
BuildableName = "stt_ios.framework"
|
||||
BlueprintName = "stt_ios"
|
||||
ReferencedContainer = "container:stt_ios.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
|
@ -33,9 +33,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "505B136924960D550007DADA"
|
||||
BuildableName = "deepspeech_iosTests.xctest"
|
||||
BlueprintName = "deepspeech_iosTests"
|
||||
ReferencedContainer = "container:deepspeech_ios.xcodeproj">
|
||||
BuildableName = "stt_iosTests.xctest"
|
||||
BlueprintName = "stt_iosTests"
|
||||
ReferencedContainer = "container:stt_ios.xcodeproj">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
</Testables>
|
||||
|
@ -62,9 +62,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "505B136024960D550007DADA"
|
||||
BuildableName = "deepspeech_ios.framework"
|
||||
BlueprintName = "deepspeech_ios"
|
||||
ReferencedContainer = "container:deepspeech_ios.xcodeproj">
|
||||
BuildableName = "stt_ios.framework"
|
||||
BlueprintName = "stt_ios"
|
||||
ReferencedContainer = "container:stt_ios.xcodeproj">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
</ProfileAction>
|
|
@ -2,9 +2,9 @@
|
|||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "group:deepspeech_ios.xcodeproj">
|
||||
location = "group:stt_ios.xcodeproj">
|
||||
</FileRef>
|
||||
<FileRef
|
||||
location = "group:deepspeech_ios_test.xcodeproj">
|
||||
location = "group:stt_ios_test.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
|
@ -1,14 +1,14 @@
|
|||
//
|
||||
// DeepSpeech.swift
|
||||
// deepspeech_ios
|
||||
// STT.swift
|
||||
// stt_ios
|
||||
//
|
||||
// Created by Reuben Morais on 14.06.20.
|
||||
// Copyright © 2020 Mozilla
|
||||
// Copyright © 2021 Coqui GmbH
|
||||
|
||||
import deepspeech_ios.libdeepspeech_Private
|
||||
import stt_ios.libstt_Private
|
||||
|
||||
public enum DeepSpeechError: Error {
|
||||
public enum STTError: Error {
|
||||
// Should be kept in sync with coqui-stt.h
|
||||
case noModel(errorCode: Int32)
|
||||
case invalidAlphabet(errorCode: Int32)
|
||||
|
@ -35,7 +35,7 @@ public enum DeepSpeechError: Error {
|
|||
case invalidErrorCode(errorCode: Int32)
|
||||
}
|
||||
|
||||
extension DeepSpeechError : LocalizedError {
|
||||
extension STTError : LocalizedError {
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
case .noModel(let errorCode),
|
||||
|
@ -58,66 +58,66 @@ extension DeepSpeechError : LocalizedError {
|
|||
.failCreateSess(let errorCode),
|
||||
.failCreateModel(let errorCode),
|
||||
.invalidErrorCode(let errorCode):
|
||||
let result = DS_ErrorCodeToErrorMessage(errorCode)
|
||||
defer { DS_FreeString(result) }
|
||||
let result = STT_ErrorCodeToErrorMessage(errorCode)
|
||||
defer { STT_FreeString(result) }
|
||||
return String(cString: result!)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func errorCodeToEnum(errorCode: Int32) -> DeepSpeechError {
|
||||
private func errorCodeToEnum(errorCode: Int32) -> STTError {
|
||||
switch Int(errorCode) {
|
||||
case Int(DS_ERR_NO_MODEL.rawValue):
|
||||
return DeepSpeechError.noModel(errorCode: errorCode)
|
||||
case Int(DS_ERR_INVALID_ALPHABET.rawValue):
|
||||
return DeepSpeechError.invalidAlphabet(errorCode: errorCode)
|
||||
case Int(DS_ERR_INVALID_SHAPE.rawValue):
|
||||
return DeepSpeechError.invalidShape(errorCode: errorCode)
|
||||
case Int(DS_ERR_INVALID_SCORER.rawValue):
|
||||
return DeepSpeechError.invalidScorer(errorCode: errorCode)
|
||||
case Int(DS_ERR_MODEL_INCOMPATIBLE.rawValue):
|
||||
return DeepSpeechError.modelIncompatible(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_NOT_ENABLED.rawValue):
|
||||
return DeepSpeechError.scorerNotEnabled(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_UNREADABLE.rawValue):
|
||||
return DeepSpeechError.scorerUnreadable(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_INVALID_LM.rawValue):
|
||||
return DeepSpeechError.scorerInvalidLm(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_NO_TRIE.rawValue):
|
||||
return DeepSpeechError.scorerNoTrie(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_INVALID_TRIE.rawValue):
|
||||
return DeepSpeechError.scorerInvalidTrie(errorCode: errorCode)
|
||||
case Int(DS_ERR_SCORER_VERSION_MISMATCH.rawValue):
|
||||
return DeepSpeechError.scorerVersionMismatch(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_INIT_MMAP.rawValue):
|
||||
return DeepSpeechError.failInitMmap(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_INIT_SESS.rawValue):
|
||||
return DeepSpeechError.failInitSess(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_INTERPRETER.rawValue):
|
||||
return DeepSpeechError.failInterpreter(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_RUN_SESS.rawValue):
|
||||
return DeepSpeechError.failRunSess(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_CREATE_STREAM.rawValue):
|
||||
return DeepSpeechError.failCreateStream(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_READ_PROTOBUF.rawValue):
|
||||
return DeepSpeechError.failReadProtobuf(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_CREATE_SESS.rawValue):
|
||||
return DeepSpeechError.failCreateSess(errorCode: errorCode)
|
||||
case Int(DS_ERR_FAIL_CREATE_MODEL.rawValue):
|
||||
return DeepSpeechError.failCreateModel(errorCode: errorCode)
|
||||
case Int(STT_ERR_NO_MODEL.rawValue):
|
||||
return STTError.noModel(errorCode: errorCode)
|
||||
case Int(STT_ERR_INVALID_ALPHABET.rawValue):
|
||||
return STTError.invalidAlphabet(errorCode: errorCode)
|
||||
case Int(STT_ERR_INVALID_SHAPE.rawValue):
|
||||
return STTError.invalidShape(errorCode: errorCode)
|
||||
case Int(STT_ERR_INVALID_SCORER.rawValue):
|
||||
return STTError.invalidScorer(errorCode: errorCode)
|
||||
case Int(STT_ERR_MODEL_INCOMPATIBLE.rawValue):
|
||||
return STTError.modelIncompatible(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_NOT_ENABLED.rawValue):
|
||||
return STTError.scorerNotEnabled(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_UNREADABLE.rawValue):
|
||||
return STTError.scorerUnreadable(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_INVALID_LM.rawValue):
|
||||
return STTError.scorerInvalidLm(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_NO_TRIE.rawValue):
|
||||
return STTError.scorerNoTrie(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_INVALID_TRIE.rawValue):
|
||||
return STTError.scorerInvalidTrie(errorCode: errorCode)
|
||||
case Int(STT_ERR_SCORER_VERSION_MISMATCH.rawValue):
|
||||
return STTError.scorerVersionMismatch(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_INIT_MMAP.rawValue):
|
||||
return STTError.failInitMmap(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_INIT_SESS.rawValue):
|
||||
return STTError.failInitSess(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_INTERPRETER.rawValue):
|
||||
return STTError.failInterpreter(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_RUN_SESS.rawValue):
|
||||
return STTError.failRunSess(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_CREATE_STREAM.rawValue):
|
||||
return STTError.failCreateStream(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_READ_PROTOBUF.rawValue):
|
||||
return STTError.failReadProtobuf(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_CREATE_SESS.rawValue):
|
||||
return STTError.failCreateSess(errorCode: errorCode)
|
||||
case Int(STT_ERR_FAIL_CREATE_MODEL.rawValue):
|
||||
return STTError.failCreateModel(errorCode: errorCode)
|
||||
default:
|
||||
return DeepSpeechError.invalidErrorCode(errorCode: errorCode)
|
||||
return STTError.invalidErrorCode(errorCode: errorCode)
|
||||
}
|
||||
}
|
||||
|
||||
private func evaluateErrorCode(errorCode: Int32) throws {
|
||||
if errorCode != Int32(DS_ERR_OK.rawValue) {
|
||||
if errorCode != Int32(STT_ERR_OK.rawValue) {
|
||||
throw errorCodeToEnum(errorCode: errorCode)
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores text of an individual token, along with its timing information
|
||||
public struct DeepSpeechTokenMetadata {
|
||||
public struct STTTokenMetadata {
|
||||
/// The text corresponding to this token
|
||||
public let text: String
|
||||
|
||||
|
@ -137,9 +137,9 @@ public struct DeepSpeechTokenMetadata {
|
|||
/** A single transcript computed by the model, including a confidence value and
|
||||
the metadata for its constituent tokens
|
||||
*/
|
||||
public struct DeepSpeechCandidateTranscript {
|
||||
/// Array of DeepSpeechTokenMetadata objects
|
||||
public private(set) var tokens: [DeepSpeechTokenMetadata] = []
|
||||
public struct STTCandidateTranscript {
|
||||
/// Array of STTTokenMetadata objects
|
||||
public private(set) var tokens: [STTTokenMetadata] = []
|
||||
|
||||
/** Approximated confidence value for this transcript. This corresponds to
|
||||
both acoustic model and language model scores that contributed to the
|
||||
|
@ -150,16 +150,16 @@ public struct DeepSpeechCandidateTranscript {
|
|||
internal init(fromInternal: CandidateTranscript) {
|
||||
let tokensBuffer = UnsafeBufferPointer<TokenMetadata>(start: fromInternal.tokens, count: Int(fromInternal.num_tokens))
|
||||
for tok in tokensBuffer {
|
||||
tokens.append(DeepSpeechTokenMetadata(fromInternal: tok))
|
||||
tokens.append(STTTokenMetadata(fromInternal: tok))
|
||||
}
|
||||
confidence = fromInternal.confidence
|
||||
}
|
||||
}
|
||||
|
||||
/// An array of DeepSpeechCandidateTranscript objects computed by the model
|
||||
public struct DeepSpeechMetadata {
|
||||
/// Array of DeepSpeechCandidateTranscript objects
|
||||
public private(set) var transcripts: [DeepSpeechCandidateTranscript] = []
|
||||
/// An array of STTCandidateTranscript objects computed by the model
|
||||
public struct STTMetadata {
|
||||
/// Array of STTCandidateTranscript objects
|
||||
public private(set) var transcripts: [STTCandidateTranscript] = []
|
||||
|
||||
internal init(fromInternal: UnsafeMutablePointer<Metadata>) {
|
||||
let md = fromInternal.pointee
|
||||
|
@ -168,12 +168,12 @@ public struct DeepSpeechMetadata {
|
|||
count: Int(md.num_transcripts))
|
||||
|
||||
for tr in transcriptsBuffer {
|
||||
transcripts.append(DeepSpeechCandidateTranscript(fromInternal: tr))
|
||||
transcripts.append(STTCandidateTranscript(fromInternal: tr))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class DeepSpeechStream {
|
||||
public class STTStream {
|
||||
private var streamCtx: OpaquePointer!
|
||||
|
||||
internal init(streamContext: OpaquePointer) {
|
||||
|
@ -182,7 +182,7 @@ public class DeepSpeechStream {
|
|||
|
||||
deinit {
|
||||
if streamCtx != nil {
|
||||
DS_FreeStream(streamCtx)
|
||||
STT_FreeStream(streamCtx)
|
||||
streamCtx = nil
|
||||
}
|
||||
}
|
||||
|
@ -212,7 +212,7 @@ public class DeepSpeechStream {
|
|||
public func feedAudioContent(buffer: UnsafeBufferPointer<Int16>) {
|
||||
precondition(streamCtx != nil, "calling method on invalidated Stream")
|
||||
|
||||
DS_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count))
|
||||
STT_FeedAudioContent(streamCtx, buffer.baseAddress, UInt32(buffer.count))
|
||||
}
|
||||
|
||||
/** Compute the intermediate decoding of an ongoing streaming inference.
|
||||
|
@ -224,8 +224,8 @@ public class DeepSpeechStream {
|
|||
public func intermediateDecode() -> String {
|
||||
precondition(streamCtx != nil, "calling method on invalidated Stream")
|
||||
|
||||
let result = DS_IntermediateDecode(streamCtx)
|
||||
defer { DS_FreeString(result) }
|
||||
let result = STT_IntermediateDecode(streamCtx)
|
||||
defer { STT_FreeString(result) }
|
||||
return String(cString: result!)
|
||||
}
|
||||
|
||||
|
@ -239,11 +239,11 @@ public class DeepSpeechStream {
|
|||
- Returns: Metadata struct containing multiple CandidateTranscript structs.
|
||||
Each transcript has per-token metadata including timing information.
|
||||
*/
|
||||
public func intermediateDecodeWithMetadata(numResults: Int) -> DeepSpeechMetadata {
|
||||
public func intermediateDecodeWithMetadata(numResults: Int) -> STTMetadata {
|
||||
precondition(streamCtx != nil, "calling method on invalidated Stream")
|
||||
let result = DS_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))!
|
||||
defer { DS_FreeMetadata(result) }
|
||||
return DeepSpeechMetadata(fromInternal: result)
|
||||
let result = STT_IntermediateDecodeWithMetadata(streamCtx, UInt32(numResults))!
|
||||
defer { STT_FreeMetadata(result) }
|
||||
return STTMetadata(fromInternal: result)
|
||||
}
|
||||
|
||||
/** Compute the final decoding of an ongoing streaming inference and return
|
||||
|
@ -258,9 +258,9 @@ public class DeepSpeechStream {
|
|||
public func finishStream() -> String {
|
||||
precondition(streamCtx != nil, "calling method on invalidated Stream")
|
||||
|
||||
let result = DS_FinishStream(streamCtx)
|
||||
let result = STT_FinishStream(streamCtx)
|
||||
defer {
|
||||
DS_FreeString(result)
|
||||
STT_FreeString(result)
|
||||
streamCtx = nil
|
||||
}
|
||||
return String(cString: result!)
|
||||
|
@ -279,42 +279,42 @@ public class DeepSpeechStream {
|
|||
|
||||
- Postcondition: This method will invalidate this streaming context.
|
||||
*/
|
||||
public func finishStreamWithMetadata(numResults: Int) -> DeepSpeechMetadata {
|
||||
public func finishStreamWithMetadata(numResults: Int) -> STTMetadata {
|
||||
precondition(streamCtx != nil, "calling method on invalidated Stream")
|
||||
|
||||
let result = DS_FinishStreamWithMetadata(streamCtx, UInt32(numResults))!
|
||||
defer { DS_FreeMetadata(result) }
|
||||
return DeepSpeechMetadata(fromInternal: result)
|
||||
let result = STT_FinishStreamWithMetadata(streamCtx, UInt32(numResults))!
|
||||
defer { STT_FreeMetadata(result) }
|
||||
return STTMetadata(fromInternal: result)
|
||||
}
|
||||
}
|
||||
|
||||
/// An object providing an interface to a trained DeepSpeech model.
|
||||
public class DeepSpeechModel {
|
||||
/// An object providing an interface to a trained STT model.
|
||||
public class STTModel {
|
||||
private var modelCtx: OpaquePointer!
|
||||
|
||||
/**
|
||||
- Parameter modelPath: The path to the model file.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public init(modelPath: String) throws {
|
||||
let err = DS_CreateModel(modelPath, &modelCtx)
|
||||
let err = STT_CreateModel(modelPath, &modelCtx)
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
}
|
||||
|
||||
deinit {
|
||||
DS_FreeModel(modelCtx)
|
||||
STT_FreeModel(modelCtx)
|
||||
modelCtx = nil
|
||||
}
|
||||
|
||||
/** Get beam width value used by the model. If {@link DS_SetModelBeamWidth}
|
||||
/** Get beam width value used by the model. If {@link STT_SetModelBeamWidth}
|
||||
was not called before, will return the default value loaded from the
|
||||
model file.
|
||||
|
||||
- Returns: Beam width value used by the model.
|
||||
*/
|
||||
public func getBeamWidth() -> Int {
|
||||
return Int(DS_GetModelBeamWidth(modelCtx))
|
||||
return Int(STT_GetModelBeamWidth(modelCtx))
|
||||
}
|
||||
|
||||
/** Set beam width value used by the model.
|
||||
|
@ -323,17 +323,17 @@ public class DeepSpeechModel {
|
|||
width value generates better results at the cost
|
||||
of decoding time.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public func setBeamWidth(beamWidth: Int) throws {
|
||||
let err = DS_SetModelBeamWidth(modelCtx, UInt32(beamWidth))
|
||||
let err = STT_SetModelBeamWidth(modelCtx, UInt32(beamWidth))
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
}
|
||||
|
||||
// The sample rate expected by the model.
|
||||
public var sampleRate: Int {
|
||||
get {
|
||||
return Int(DS_GetModelSampleRate(modelCtx))
|
||||
return Int(STT_GetModelSampleRate(modelCtx))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,19 +341,19 @@ public class DeepSpeechModel {
|
|||
|
||||
- Parameter scorerPath: The path to the external scorer file.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public func enableExternalScorer(scorerPath: String) throws {
|
||||
let err = DS_EnableExternalScorer(modelCtx, scorerPath)
|
||||
let err = STT_EnableExternalScorer(modelCtx, scorerPath)
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
}
|
||||
|
||||
/** Disable decoding using an external scorer.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public func disableExternalScorer() throws {
|
||||
let err = DS_DisableExternalScorer(modelCtx)
|
||||
let err = STT_DisableExternalScorer(modelCtx)
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
}
|
||||
|
||||
|
@ -362,14 +362,14 @@ public class DeepSpeechModel {
|
|||
- Parameter alpha: The alpha hyperparameter of the decoder. Language model weight.
|
||||
- Parameter beta: The beta hyperparameter of the decoder. Word insertion weight.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public func setScorerAlphaBeta(alpha: Float, beta: Float) throws {
|
||||
let err = DS_SetScorerAlphaBeta(modelCtx, alpha, beta)
|
||||
let err = STT_SetScorerAlphaBeta(modelCtx, alpha, beta)
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
}
|
||||
|
||||
/** Use the DeepSpeech model to convert speech to text.
|
||||
/** Use the STT model to convert speech to text.
|
||||
|
||||
- Parameter buffer: A 16-bit, mono raw audio signal at the appropriate
|
||||
sample rate (matching what the model was trained on).
|
||||
|
@ -382,7 +382,7 @@ public class DeepSpeechModel {
|
|||
}
|
||||
}
|
||||
|
||||
/** Use the DeepSpeech model to convert speech to text.
|
||||
/** Use the STT model to convert speech to text.
|
||||
|
||||
- Parameter buffer: A 16-bit, mono raw audio signal at the appropriate
|
||||
sample rate (matching what the model was trained on).
|
||||
|
@ -390,65 +390,65 @@ public class DeepSpeechModel {
|
|||
- Returns: The STT result.
|
||||
*/
|
||||
public func speechToText(buffer: UnsafeBufferPointer<Int16>) -> String {
|
||||
let result = DS_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count))
|
||||
defer { DS_FreeString(result) }
|
||||
let result = STT_SpeechToText(modelCtx, buffer.baseAddress, UInt32(buffer.count))
|
||||
defer { STT_FreeString(result) }
|
||||
return String(cString: result!)
|
||||
}
|
||||
|
||||
/** Use the DeepSpeech model to convert speech to text and output results
|
||||
/** Use the STT model to convert speech to text and output results
|
||||
including metadata.
|
||||
|
||||
- Parameter buffer: A 16-bit, mono raw audio signal at the appropriate
|
||||
sample rate (matching what the model was trained on).
|
||||
- Parameter numResults: The maximum number of DeepSpeechCandidateTranscript
|
||||
- Parameter numResults: The maximum number of STTCandidateTranscript
|
||||
structs to return. Returned value might be smaller than this.
|
||||
|
||||
- Returns: Metadata struct containing multiple CandidateTranscript structs.
|
||||
Each transcript has per-token metadata including timing information.
|
||||
*/
|
||||
public func speechToTextWithMetadata(buffer: Array<Int16>, numResults: Int) -> DeepSpeechMetadata {
|
||||
return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> DeepSpeechMetadata in
|
||||
public func speechToTextWithMetadata(buffer: Array<Int16>, numResults: Int) -> STTMetadata {
|
||||
return buffer.withUnsafeBufferPointer { unsafeBufferPointer -> STTMetadata in
|
||||
return speechToTextWithMetadata(buffer: unsafeBufferPointer, numResults: numResults)
|
||||
}
|
||||
}
|
||||
|
||||
/** Use the DeepSpeech model to convert speech to text and output results
|
||||
/** Use the STT model to convert speech to text and output results
|
||||
including metadata.
|
||||
|
||||
- Parameter buffer: A 16-bit, mono raw audio signal at the appropriate
|
||||
sample rate (matching what the model was trained on).
|
||||
- Parameter numResults: The maximum number of DeepSpeechCandidateTranscript
|
||||
- Parameter numResults: The maximum number of STTCandidateTranscript
|
||||
structs to return. Returned value might be smaller than this.
|
||||
|
||||
- Returns: Metadata struct containing multiple CandidateTranscript structs.
|
||||
Each transcript has per-token metadata including timing information.
|
||||
*/
|
||||
public func speechToTextWithMetadata(buffer: UnsafeBufferPointer<Int16>, numResults: Int) -> DeepSpeechMetadata {
|
||||
let result = DS_SpeechToTextWithMetadata(
|
||||
public func speechToTextWithMetadata(buffer: UnsafeBufferPointer<Int16>, numResults: Int) -> STTMetadata {
|
||||
let result = STT_SpeechToTextWithMetadata(
|
||||
modelCtx,
|
||||
buffer.baseAddress,
|
||||
UInt32(buffer.count),
|
||||
UInt32(numResults))!
|
||||
defer { DS_FreeMetadata(result) }
|
||||
return DeepSpeechMetadata(fromInternal: result)
|
||||
defer { STT_FreeMetadata(result) }
|
||||
return STTMetadata(fromInternal: result)
|
||||
}
|
||||
|
||||
/** Create a new streaming inference state.
|
||||
|
||||
- Returns: DeepSpeechStream object representing the streaming state.
|
||||
- Returns: STTStream object representing the streaming state.
|
||||
|
||||
- Throws: `DeepSpeechError` on failure.
|
||||
- Throws: `STTError` on failure.
|
||||
*/
|
||||
public func createStream() throws -> DeepSpeechStream {
|
||||
public func createStream() throws -> STTStream {
|
||||
var streamContext: OpaquePointer!
|
||||
let err = DS_CreateStream(modelCtx, &streamContext)
|
||||
let err = STT_CreateStream(modelCtx, &streamContext)
|
||||
try evaluateErrorCode(errorCode: err)
|
||||
return DeepSpeechStream(streamContext: streamContext)
|
||||
return STTStream(streamContext: streamContext)
|
||||
}
|
||||
}
|
||||
|
||||
public func DeepSpeechVersion() -> String {
|
||||
let result = DS_Version()
|
||||
defer { DS_FreeString(result) }
|
||||
public func STTVersion() -> String {
|
||||
let result = STT_Version()
|
||||
defer { STT_FreeString(result) }
|
||||
return String(cString: result!)
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
//
|
||||
// deepspeech_ios.h
|
||||
// deepspeech_ios
|
||||
// stt_ios.h
|
||||
// stt_ios
|
||||
//
|
||||
// Created by Reuben Morais on 14.06.20.
|
||||
// Copyright © 2020 Mozilla
|
||||
|
@ -8,6 +8,6 @@
|
|||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
// In this header, you should import all the public headers of your framework using statements like #import <deepspeech_ios/PublicHeader.h>
|
||||
// In this header, you should import all the public headers of your framework using statements like #import <stt_ios/PublicHeader.h>
|
||||
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
framework module stt_ios {
|
||||
umbrella header "stt_ios.h"
|
||||
|
||||
export *
|
||||
module * { export * }
|
||||
|
||||
explicit module libstt_Private {
|
||||
header "coqui-stt.h"
|
||||
export *
|
||||
link "stt"
|
||||
}
|
||||
}
|
|
@ -9,16 +9,16 @@
|
|||
/* Begin PBXBuildFile section */
|
||||
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
|
||||
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
|
||||
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
|
||||
504EC34524CF4F4F0073C22E /* stt_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* stt_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
507CD3A124B61FE400409BBB /* stt_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* stt_ios.framework */; };
|
||||
50F787F32497683900D52237 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F22497683900D52237 /* AppDelegate.swift */; };
|
||||
50F787F52497683900D52237 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F42497683900D52237 /* SceneDelegate.swift */; };
|
||||
50F787F72497683900D52237 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F787F62497683900D52237 /* ContentView.swift */; };
|
||||
50F787F92497683A00D52237 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787F82497683A00D52237 /* Assets.xcassets */; };
|
||||
50F787FC2497683A00D52237 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FB2497683A00D52237 /* Preview Assets.xcassets */; };
|
||||
50F787FF2497683A00D52237 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 50F787FD2497683A00D52237 /* LaunchScreen.storyboard */; };
|
||||
50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */; };
|
||||
50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */; };
|
||||
50F7880A2497683A00D52237 /* stt_ios_testTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788092497683A00D52237 /* stt_ios_testTests.swift */; };
|
||||
50F788152497683A00D52237 /* stt_ios_testUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 50F788142497683A00D52237 /* stt_ios_testUITests.swift */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
|
@ -27,14 +27,14 @@
|
|||
containerPortal = 50F787E72497683900D52237 /* Project object */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = 50F787EE2497683900D52237;
|
||||
remoteInfo = deepspeech_ios_test;
|
||||
remoteInfo = stt_ios_test;
|
||||
};
|
||||
50F788112497683A00D52237 /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 50F787E72497683900D52237 /* Project object */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = 50F787EE2497683900D52237;
|
||||
remoteInfo = deepspeech_ios_test;
|
||||
remoteInfo = stt_ios_test;
|
||||
};
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
|
@ -45,7 +45,7 @@
|
|||
dstPath = "";
|
||||
dstSubfolderSpec = 10;
|
||||
files = (
|
||||
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
|
||||
504EC34524CF4F4F0073C22E /* stt_ios.framework in Embed Frameworks */,
|
||||
);
|
||||
name = "Embed Frameworks";
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
|
@ -55,9 +55,9 @@
|
|||
/* Begin PBXFileReference section */
|
||||
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
|
||||
504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
|
||||
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
|
||||
50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
507CD3A024B61FE400409BBB /* stt_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = stt_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
507CD3A224B61FEA00409BBB /* libstt.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libstt.so; sourceTree = "<group>"; };
|
||||
50F787EF2497683900D52237 /* stt_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = stt_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||
50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||
50F787F62497683900D52237 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
|
@ -65,11 +65,11 @@
|
|||
50F787FB2497683A00D52237 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
50F787FE2497683A00D52237 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
|
||||
50F788002497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testTests.swift; sourceTree = "<group>"; };
|
||||
50F788052497683A00D52237 /* stt_ios_testTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = stt_ios_testTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F788092497683A00D52237 /* stt_ios_testTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = stt_ios_testTests.swift; sourceTree = "<group>"; };
|
||||
50F7880B2497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = deepspeech_ios_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = deepspeech_ios_testUITests.swift; sourceTree = "<group>"; };
|
||||
50F788102497683A00D52237 /* stt_ios_testUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = stt_ios_testUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F788142497683A00D52237 /* stt_ios_testUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = stt_ios_testUITests.swift; sourceTree = "<group>"; };
|
||||
50F788162497683A00D52237 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
|
@ -78,7 +78,7 @@
|
|||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */,
|
||||
507CD3A124B61FE400409BBB /* stt_ios.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -102,8 +102,8 @@
|
|||
50F2B0FC2498D6C7007CD876 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
507CD3A224B61FEA00409BBB /* libdeepspeech.so */,
|
||||
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */,
|
||||
507CD3A224B61FEA00409BBB /* libstt.so */,
|
||||
507CD3A024B61FE400409BBB /* stt_ios.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
|
@ -111,9 +111,9 @@
|
|||
50F787E62497683900D52237 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
50F787F12497683900D52237 /* deepspeech_ios_test */,
|
||||
50F788082497683A00D52237 /* deepspeech_ios_testTests */,
|
||||
50F788132497683A00D52237 /* deepspeech_ios_testUITests */,
|
||||
50F787F12497683900D52237 /* stt_ios_test */,
|
||||
50F788082497683A00D52237 /* stt_ios_testTests */,
|
||||
50F788132497683A00D52237 /* stt_ios_testUITests */,
|
||||
50F787F02497683900D52237 /* Products */,
|
||||
50F2B0FC2498D6C7007CD876 /* Frameworks */,
|
||||
);
|
||||
|
@ -122,14 +122,14 @@
|
|||
50F787F02497683900D52237 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
50F787EF2497683900D52237 /* deepspeech_ios_test.app */,
|
||||
50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */,
|
||||
50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */,
|
||||
50F787EF2497683900D52237 /* stt_ios_test.app */,
|
||||
50F788052497683A00D52237 /* stt_ios_testTests.xctest */,
|
||||
50F788102497683A00D52237 /* stt_ios_testUITests.xctest */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
50F787F12497683900D52237 /* deepspeech_ios_test */ = {
|
||||
50F787F12497683900D52237 /* stt_ios_test */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
504EC34224CF4EFD0073C22E /* AudioContext.swift */,
|
||||
|
@ -142,7 +142,7 @@
|
|||
50F788002497683A00D52237 /* Info.plist */,
|
||||
50F787FA2497683A00D52237 /* Preview Content */,
|
||||
);
|
||||
path = deepspeech_ios_test;
|
||||
path = stt_ios_test;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
50F787FA2497683A00D52237 /* Preview Content */ = {
|
||||
|
@ -153,30 +153,30 @@
|
|||
path = "Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
50F788082497683A00D52237 /* deepspeech_ios_testTests */ = {
|
||||
50F788082497683A00D52237 /* stt_ios_testTests */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
50F788092497683A00D52237 /* deepspeech_ios_testTests.swift */,
|
||||
50F788092497683A00D52237 /* stt_ios_testTests.swift */,
|
||||
50F7880B2497683A00D52237 /* Info.plist */,
|
||||
);
|
||||
path = deepspeech_ios_testTests;
|
||||
path = stt_ios_testTests;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
50F788132497683A00D52237 /* deepspeech_ios_testUITests */ = {
|
||||
50F788132497683A00D52237 /* stt_ios_testUITests */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
50F788142497683A00D52237 /* deepspeech_ios_testUITests.swift */,
|
||||
50F788142497683A00D52237 /* stt_ios_testUITests.swift */,
|
||||
50F788162497683A00D52237 /* Info.plist */,
|
||||
);
|
||||
path = deepspeech_ios_testUITests;
|
||||
path = stt_ios_testUITests;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
50F787EE2497683900D52237 /* deepspeech_ios_test */ = {
|
||||
50F787EE2497683900D52237 /* stt_ios_test */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */;
|
||||
buildConfigurationList = 50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_test" */;
|
||||
buildPhases = (
|
||||
50F787EB2497683900D52237 /* Sources */,
|
||||
50F787EC2497683900D52237 /* Frameworks */,
|
||||
|
@ -187,14 +187,14 @@
|
|||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = deepspeech_ios_test;
|
||||
productName = deepspeech_ios_test;
|
||||
productReference = 50F787EF2497683900D52237 /* deepspeech_ios_test.app */;
|
||||
name = stt_ios_test;
|
||||
productName = stt_ios_test;
|
||||
productReference = 50F787EF2497683900D52237 /* stt_ios_test.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
50F788042497683A00D52237 /* deepspeech_ios_testTests */ = {
|
||||
50F788042497683A00D52237 /* stt_ios_testTests */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */;
|
||||
buildConfigurationList = 50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_testTests" */;
|
||||
buildPhases = (
|
||||
50F788012497683A00D52237 /* Sources */,
|
||||
50F788022497683A00D52237 /* Frameworks */,
|
||||
|
@ -205,14 +205,14 @@
|
|||
dependencies = (
|
||||
50F788072497683A00D52237 /* PBXTargetDependency */,
|
||||
);
|
||||
name = deepspeech_ios_testTests;
|
||||
productName = deepspeech_ios_testTests;
|
||||
productReference = 50F788052497683A00D52237 /* deepspeech_ios_testTests.xctest */;
|
||||
name = stt_ios_testTests;
|
||||
productName = stt_ios_testTests;
|
||||
productReference = 50F788052497683A00D52237 /* stt_ios_testTests.xctest */;
|
||||
productType = "com.apple.product-type.bundle.unit-test";
|
||||
};
|
||||
50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */ = {
|
||||
50F7880F2497683A00D52237 /* stt_ios_testUITests */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */;
|
||||
buildConfigurationList = 50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_testUITests" */;
|
||||
buildPhases = (
|
||||
50F7880C2497683A00D52237 /* Sources */,
|
||||
50F7880D2497683A00D52237 /* Frameworks */,
|
||||
|
@ -223,9 +223,9 @@
|
|||
dependencies = (
|
||||
50F788122497683A00D52237 /* PBXTargetDependency */,
|
||||
);
|
||||
name = deepspeech_ios_testUITests;
|
||||
productName = deepspeech_ios_testUITests;
|
||||
productReference = 50F788102497683A00D52237 /* deepspeech_ios_testUITests.xctest */;
|
||||
name = stt_ios_testUITests;
|
||||
productName = stt_ios_testUITests;
|
||||
productReference = 50F788102497683A00D52237 /* stt_ios_testUITests.xctest */;
|
||||
productType = "com.apple.product-type.bundle.ui-testing";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
@ -251,7 +251,7 @@
|
|||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */;
|
||||
buildConfigurationList = 50F787EA2497683900D52237 /* Build configuration list for PBXProject "stt_ios_test" */;
|
||||
compatibilityVersion = "Xcode 9.3";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
|
@ -264,9 +264,9 @@
|
|||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
50F787EE2497683900D52237 /* deepspeech_ios_test */,
|
||||
50F788042497683A00D52237 /* deepspeech_ios_testTests */,
|
||||
50F7880F2497683A00D52237 /* deepspeech_ios_testUITests */,
|
||||
50F787EE2497683900D52237 /* stt_ios_test */,
|
||||
50F788042497683A00D52237 /* stt_ios_testTests */,
|
||||
50F7880F2497683A00D52237 /* stt_ios_testUITests */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
@ -315,7 +315,7 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
50F7880A2497683A00D52237 /* deepspeech_ios_testTests.swift in Sources */,
|
||||
50F7880A2497683A00D52237 /* stt_ios_testTests.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -323,7 +323,7 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
50F788152497683A00D52237 /* deepspeech_ios_testUITests.swift in Sources */,
|
||||
50F788152497683A00D52237 /* stt_ios_testUITests.swift in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
@ -332,12 +332,12 @@
|
|||
/* Begin PBXTargetDependency section */
|
||||
50F788072497683A00D52237 /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
target = 50F787EE2497683900D52237 /* deepspeech_ios_test */;
|
||||
target = 50F787EE2497683900D52237 /* stt_ios_test */;
|
||||
targetProxy = 50F788062497683A00D52237 /* PBXContainerItemProxy */;
|
||||
};
|
||||
50F788122497683A00D52237 /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
target = 50F787EE2497683900D52237 /* deepspeech_ios_test */;
|
||||
target = 50F787EE2497683900D52237 /* stt_ios_test */;
|
||||
targetProxy = 50F788112497683A00D52237 /* PBXContainerItemProxy */;
|
||||
};
|
||||
/* End PBXTargetDependency section */
|
||||
|
@ -473,11 +473,11 @@
|
|||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\"";
|
||||
DEVELOPMENT_ASSET_PATHS = "\"stt_ios_test/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = AWCG9S27P7;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
|
||||
INFOPLIST_FILE = deepspeech_ios_test/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_test/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
|
@ -486,7 +486,7 @@
|
|||
"$(inherited)",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-test";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-test";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
|
@ -498,11 +498,11 @@
|
|||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"deepspeech_ios_test/Preview Content\"";
|
||||
DEVELOPMENT_ASSET_PATHS = "\"stt_ios_test/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = AWCG9S27P7;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
|
||||
INFOPLIST_FILE = deepspeech_ios_test/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_test/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
|
@ -511,7 +511,7 @@
|
|||
"$(inherited)",
|
||||
"$(PROJECT_DIR)",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-test";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-test";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
|
@ -524,18 +524,18 @@
|
|||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
BUNDLE_LOADER = "$(TEST_HOST)";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_testTests/Info.plist;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 13.5;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testTests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-testTests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test";
|
||||
TEST_HOST = "$(BUILT_PRODUCTS_DIR)/stt_ios_test.app/stt_ios_test";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
|
@ -545,18 +545,18 @@
|
|||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
BUNDLE_LOADER = "$(TEST_HOST)";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_ios_testTests/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_testTests/Info.plist;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 13.5;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testTests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-testTests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
TEST_HOST = "$(BUILT_PRODUCTS_DIR)/deepspeech_ios_test.app/deepspeech_ios_test";
|
||||
TEST_HOST = "$(BUILT_PRODUCTS_DIR)/stt_ios_test.app/stt_ios_test";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
|
@ -565,17 +565,17 @@
|
|||
buildSettings = {
|
||||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_testUITests/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testUITests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-testUITests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
TEST_TARGET_NAME = deepspeech_ios_test;
|
||||
TEST_TARGET_NAME = stt_ios_test;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
|
@ -584,24 +584,24 @@
|
|||
buildSettings = {
|
||||
ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
INFOPLIST_FILE = deepspeech_ios_testUITests/Info.plist;
|
||||
INFOPLIST_FILE = stt_ios_testUITests/Info.plist;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
"@loader_path/Frameworks",
|
||||
);
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "org.deepspeech.deepspeech-ios-testUITests";
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "ai.coqui.stt-ios-testUITests";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
TEST_TARGET_NAME = deepspeech_ios_test;
|
||||
TEST_TARGET_NAME = stt_ios_test;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
50F787EA2497683900D52237 /* Build configuration list for PBXProject "deepspeech_ios_test" */ = {
|
||||
50F787EA2497683900D52237 /* Build configuration list for PBXProject "stt_ios_test" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
50F788172497683A00D52237 /* Debug */,
|
||||
|
@ -610,7 +610,7 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_test" */ = {
|
||||
50F788192497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_test" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
50F7881A2497683A00D52237 /* Debug */,
|
||||
|
@ -619,7 +619,7 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testTests" */ = {
|
||||
50F7881C2497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_testTests" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
50F7881D2497683A00D52237 /* Debug */,
|
||||
|
@ -628,7 +628,7 @@
|
|||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "deepspeech_ios_testUITests" */ = {
|
||||
50F7881F2497683A00D52237 /* Build configuration list for PBXNativeTarget "stt_ios_testUITests" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
50F788202497683A00D52237 /* Debug */,
|
|
@ -2,6 +2,6 @@
|
|||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:deepspeech_ios_test.xcodeproj">
|
||||
location = "self:stt_ios_test.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
|
@ -15,9 +15,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "50F787EE2497683900D52237"
|
||||
BuildableName = "deepspeech_ios_test.app"
|
||||
BlueprintName = "deepspeech_ios_test"
|
||||
ReferencedContainer = "container:deepspeech_ios_test.xcodeproj">
|
||||
BuildableName = "stt_ios_test.app"
|
||||
BlueprintName = "stt_ios_test"
|
||||
ReferencedContainer = "container:stt_ios_test.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
|
@ -33,9 +33,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "50F788042497683A00D52237"
|
||||
BuildableName = "deepspeech_ios_testTests.xctest"
|
||||
BlueprintName = "deepspeech_ios_testTests"
|
||||
ReferencedContainer = "container:deepspeech_ios_test.xcodeproj">
|
||||
BuildableName = "stt_ios_testTests.xctest"
|
||||
BlueprintName = "stt_ios_testTests"
|
||||
ReferencedContainer = "container:stt_ios_test.xcodeproj">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
<TestableReference
|
||||
|
@ -43,9 +43,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "50F7880F2497683A00D52237"
|
||||
BuildableName = "deepspeech_ios_testUITests.xctest"
|
||||
BlueprintName = "deepspeech_ios_testUITests"
|
||||
ReferencedContainer = "container:deepspeech_ios_test.xcodeproj">
|
||||
BuildableName = "stt_ios_testUITests.xctest"
|
||||
BlueprintName = "stt_ios_testUITests"
|
||||
ReferencedContainer = "container:stt_ios_test.xcodeproj">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
</Testables>
|
||||
|
@ -66,9 +66,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "50F787EE2497683900D52237"
|
||||
BuildableName = "deepspeech_ios_test.app"
|
||||
BlueprintName = "deepspeech_ios_test"
|
||||
ReferencedContainer = "container:deepspeech_ios_test.xcodeproj">
|
||||
BuildableName = "stt_ios_test.app"
|
||||
BlueprintName = "stt_ios_test"
|
||||
ReferencedContainer = "container:stt_ios_test.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</LaunchAction>
|
||||
|
@ -83,9 +83,9 @@
|
|||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "50F787EE2497683900D52237"
|
||||
BuildableName = "deepspeech_ios_test.app"
|
||||
BlueprintName = "deepspeech_ios_test"
|
||||
ReferencedContainer = "container:deepspeech_ios_test.xcodeproj">
|
||||
BuildableName = "stt_ios_test.app"
|
||||
BlueprintName = "stt_ios_test"
|
||||
ReferencedContainer = "container:stt_ios_test.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
|
@ -1,6 +1,6 @@
|
|||
//
|
||||
// AppDelegate.swift
|
||||
// deepspeech_ios_test
|
||||
// stt_ios_test
|
||||
//
|
||||
// Created by Reuben Morais on 15.06.20.
|
||||
// Copyright © 2020 Mozilla
|
|
@ -1,6 +1,6 @@
|
|||
//
|
||||
// AudioContext.swift
|
||||
// deepspeech_ios_test
|
||||
// stt_ios_test
|
||||
//
|
||||
// Created by Erik Ziegler on 27.07.20.
|
||||
// Copyright © 2020 Mozilla
|
||||
|
@ -12,7 +12,7 @@ import AVFoundation
|
|||
import AudioToolbox
|
||||
import Accelerate
|
||||
|
||||
import deepspeech_ios
|
||||
import stt_ios
|
||||
|
||||
/// Holds audio information used for building waveforms
|
||||
final class AudioContext {
|
|
@ -1,6 +1,6 @@
|
|||
//
|
||||
// ContentView.swift
|
||||
// deepspeech_ios_test
|
||||
// stt_ios_test
|
||||
//
|
||||
// Created by Reuben Morais on 15.06.20.
|
||||
// Copyright © 2020 Mozilla
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue