diff --git a/native_client/dotnet/DeepSpeech.sln b/native_client/dotnet/MozillaVoiceStt.sln similarity index 77% rename from native_client/dotnet/DeepSpeech.sln rename to native_client/dotnet/MozillaVoiceStt.sln index 78afe7db..0bf2b52e 100644 --- a/native_client/dotnet/DeepSpeech.sln +++ b/native_client/dotnet/MozillaVoiceStt.sln @@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 16 VisualStudioVersion = 16.0.30204.135 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MozillaVoiceSttClient", "MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttConsole", "MozillaVoiceSttConsole\MozillaVoiceSttConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs b/native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs similarity index 91% rename from native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs rename to native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs index 600c91d3..aa816f8d 100644 --- a/native_client/dotnet/DeepSpeechClient/Enums/ErrorCodes.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Enums/ErrorCodes.cs @@ -1,7 +1,7 @@ -namespace DeepSpeechClient.Enums +namespace MozillaVoiceSttClient.Enums { /// - /// Error codes from the native DeepSpeech binary. + /// Error codes from the native Mozilla Voice STT binary. /// internal enum ErrorCodes { diff --git a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs b/native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs similarity index 97% rename from native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs rename to native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs index 3e18f7cb..0d2229f9 100644 --- a/native_client/dotnet/DeepSpeechClient/Extensions/NativeExtensions.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Extensions/NativeExtensions.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Structs; +using MozillaVoiceSttClient.Structs; using System; using System.Runtime.InteropServices; using System.Text; -namespace DeepSpeechClient.Extensions +namespace MozillaVoiceSttClient.Extensions { internal static class NativeExtensions { diff --git a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs similarity index 86% rename from native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs rename to native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs index e1ed9cad..bd8a62e1 100644 --- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IModel.cs @@ -1,13 +1,13 @@ -using DeepSpeechClient.Models; +using MozillaVoiceSttClient.Models; using System; using System.IO; -namespace DeepSpeechClient.Interfaces +namespace MozillaVoiceSttClient.Interfaces { /// - /// Client interface of Mozilla's DeepSpeech implementation. + /// Client interface of Mozilla Voice STT. /// - public interface IDeepSpeech : IDisposable + public interface IModel : IDisposable { /// /// Return version of this library. The returned version is a semantic version @@ -59,7 +59,7 @@ namespace DeepSpeechClient.Interfaces unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the Mozilla Voice STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -68,7 +68,7 @@ namespace DeepSpeechClient.Interfaces uint aBufferSize); /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -83,26 +83,26 @@ namespace DeepSpeechClient.Interfaces /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - unsafe void FreeStream(DeepSpeechStream stream); + unsafe void FreeStream(MozillaVoiceSttStream stream); /// /// Creates a new streaming inference state. /// - unsafe DeepSpeechStream CreateStream(); + unsafe MozillaVoiceSttStream CreateStream(); /// /// Feeds audio samples to an ongoing streaming inference. /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize); + unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize); /// /// Computes the intermediate decoding of an ongoing streaming inference. /// /// Instance of the stream to decode. /// The STT intermediate result. - unsafe string IntermediateDecode(DeepSpeechStream stream); + unsafe string IntermediateDecode(MozillaVoiceSttStream stream); /// /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. @@ -110,14 +110,14 @@ namespace DeepSpeechClient.Interfaces /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// /// Instance of the stream to finish. /// The STT result. - unsafe string FinishStream(DeepSpeechStream stream); + unsafe string FinishStream(MozillaVoiceSttStream stream); /// /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. @@ -125,6 +125,6 @@ namespace DeepSpeechClient.Interfaces /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults); + unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); } } diff --git a/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs new file mode 100644 index 00000000..ede8b5f4 --- /dev/null +++ b/native_client/dotnet/MozillaVoiceSttClient/Interfaces/IMozillaVoiceSttModel.cs @@ -0,0 +1,130 @@ +using MozillaVoiceSttClient.Models; +using System; +using System.IO; + +namespace MozillaVoiceSttClient.Interfaces +{ + /// + /// Client interface of Mozilla Voice STT. + /// + public interface IMozillaVoiceSttModel : IDisposable + { + /// + /// Return version of this library. The returned version is a semantic version + /// (SemVer 2.0.0). + /// + unsafe string Version(); + + /// + /// Return the sample rate expected by the model. + /// + /// Sample rate. + unsafe int GetModelSampleRate(); + + /// + /// Get beam width value used by the model. If SetModelBeamWidth was not + /// called before, will return the default value loaded from the model + /// file. + /// + /// Beam width value used by the model. + unsafe uint GetModelBeamWidth(); + + /// + /// Set beam width value used by the model. + /// + /// The beam width used by the decoder. A larger beam width value generates better results at the cost of decoding time. + /// Thrown on failure. + unsafe void SetModelBeamWidth(uint aBeamWidth); + + /// + /// Enable decoding using an external scorer. + /// + /// The path to the external scorer file. + /// Thrown when the native binary failed to enable decoding with an external scorer. + /// Thrown when cannot find the scorer file. + unsafe void EnableExternalScorer(string aScorerPath); + + /// + /// Disable decoding using an external scorer. + /// + /// Thrown when an external scorer is not enabled. + unsafe void DisableExternalScorer(); + + /// + /// Set hyperparameters alpha and beta of the external scorer. + /// + /// The alpha hyperparameter of the decoder. Language model weight. + /// The beta hyperparameter of the decoder. Word insertion weight. + /// Thrown when an external scorer is not enabled. + unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta); + + /// + /// Use the Mozilla Voice STT model to perform Speech-To-Text. + /// + /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + /// The number of samples in the audio signal. + /// The STT result. Returns NULL on error. + unsafe string SpeechToText(short[] aBuffer, + uint aBufferSize); + + /// + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. + /// + /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). + /// The number of samples in the audio signal. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata. Returns NULL on error. + unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, + uint aBufferSize, + uint aNumResults); + + /// + /// Destroy a streaming state without decoding the computed logits. + /// This can be used if you no longer need the result of an ongoing streaming + /// inference and don't want to perform a costly decode operation. + /// + unsafe void FreeStream(MozillaVoiceSttStream stream); + + /// + /// Creates a new streaming inference state. + /// + unsafe MozillaVoiceSttStream CreateStream(); + + /// + /// Feeds audio samples to an ongoing streaming inference. + /// + /// Instance of the stream to feed the data. + /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). + unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize); + + /// + /// Computes the intermediate decoding of an ongoing streaming inference. + /// + /// Instance of the stream to decode. + /// The STT intermediate result. + unsafe string IntermediateDecode(MozillaVoiceSttStream stream); + + /// + /// Computes the intermediate decoding of an ongoing streaming inference, including metadata. + /// + /// Instance of the stream to decode. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata result. + unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); + + /// + /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. + /// + /// Instance of the stream to finish. + /// The STT result. + unsafe string FinishStream(MozillaVoiceSttStream stream); + + /// + /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata. + /// + /// Instance of the stream to finish. + /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. + /// The extended metadata result. + unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults); + } +} diff --git a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs similarity index 92% rename from native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs index cc6b5d28..abe1aa30 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/CandidateTranscript.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/CandidateTranscript.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs similarity index 80% rename from native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs index e4605f5e..0223a6bd 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/DeepSpeechStream.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/DeepSpeechStream.cs @@ -1,19 +1,19 @@ using System; -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Wrapper of the pointer used for the decoding stream. /// - public class DeepSpeechStream : IDisposable + public class MozillaVoiceSttStream : IDisposable { private unsafe IntPtr** _streamingStatePp; /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// Native pointer of the native stream. - public unsafe DeepSpeechStream(IntPtr** streamingStatePP) + public unsafe MozillaVoiceSttStream(IntPtr** streamingStatePP) { _streamingStatePp = streamingStatePP; } diff --git a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs similarity index 88% rename from native_client/dotnet/DeepSpeechClient/Models/Metadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs index fb6c613d..ea0666bf 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/Metadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/Metadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores the entire CTC output as an array of character metadata objects. diff --git a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs similarity index 92% rename from native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs index 5f2dea56..86e8bdda 100644 --- a/native_client/dotnet/DeepSpeechClient/Models/TokenMetadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Models/TokenMetadata.cs @@ -1,4 +1,4 @@ -namespace DeepSpeechClient.Models +namespace MozillaVoiceSttClient.Models { /// /// Stores each individual character, along with its timing information. diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs similarity index 87% rename from native_client/dotnet/DeepSpeechClient/DeepSpeech.cs rename to native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs index fda061d7..f2b67fb7 100644 --- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceStt.cs @@ -1,34 +1,34 @@ -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Extensions; +using MozillaVoiceStt.Interfaces; +using MozillaVoiceStt.Extensions; using System; using System.IO; -using DeepSpeechClient.Enums; -using DeepSpeechClient.Models; +using MozillaVoiceStt.Enums; +using MozillaVoiceStt.Models; -namespace DeepSpeechClient +namespace MozillaVoiceStt { /// - /// Concrete implementation of . + /// Concrete implementation of . /// - public class DeepSpeech : IDeepSpeech + public class MozillaVoiceSttModel : IMozillaVoiceSttModel { private unsafe IntPtr** _modelStatePP; /// - /// Initializes a new instance of class and creates a new acoustic model. + /// Initializes a new instance of class and creates a new acoustic model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. - public DeepSpeech(string aModelPath) + public MozillaVoiceSttModel(string aModelPath) { CreateModel(aModelPath); } - #region IDeepSpeech + #region IMozillaVoiceSttModel /// - /// Create an object providing an interface to a trained DeepSpeech model. + /// Create an object providing an interface to a trained Mozilla Voice STT model. /// /// The path to the frozen model graph. /// Thrown when the native binary failed to create the model. @@ -153,7 +153,7 @@ namespace DeepSpeechClient /// /// Instance of the stream to feed the data. /// An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on). - public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) + public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize) { NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); } @@ -163,7 +163,7 @@ namespace DeepSpeechClient /// /// Instance of the stream to finish. /// The STT result. - public unsafe string FinishStream(DeepSpeechStream stream) + public unsafe string FinishStream(MozillaVoiceSttStream stream) { return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString(); } @@ -174,7 +174,7 @@ namespace DeepSpeechClient /// Instance of the stream to finish. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The extended metadata result. - public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } @@ -184,7 +184,7 @@ namespace DeepSpeechClient /// /// Instance of the stream to decode. /// The STT intermediate result. - public unsafe string IntermediateDecode(DeepSpeechStream stream) + public unsafe string IntermediateDecode(MozillaVoiceSttStream stream) { return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString(); } @@ -195,7 +195,7 @@ namespace DeepSpeechClient /// Instance of the stream to decode. /// Maximum number of candidate transcripts to return. Returned list might be smaller than this. /// The STT intermediate result. - public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults) + public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults) { return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata(); } @@ -212,12 +212,12 @@ namespace DeepSpeechClient /// /// Creates a new streaming inference state. /// - public unsafe DeepSpeechStream CreateStream() + public unsafe MozillaVoiceSttStream CreateStream() { IntPtr** streamingStatePointer = null; var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer); EvaluateResultCode(resultCode); - return new DeepSpeechStream(streamingStatePointer); + return new MozillaVoiceSttStream(streamingStatePointer); } /// @@ -225,14 +225,14 @@ namespace DeepSpeechClient /// This can be used if you no longer need the result of an ongoing streaming /// inference and don't want to perform a costly decode operation. /// - public unsafe void FreeStream(DeepSpeechStream stream) + public unsafe void FreeStream(MozillaVoiceSttStream stream) { NativeImp.STT_FreeStream(stream.GetNativePointer()); stream.Dispose(); } /// - /// Use the DeepSpeech model to perform Speech-To-Text. + /// Use the Mozilla Voice STT model to perform Speech-To-Text. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. @@ -243,7 +243,7 @@ namespace DeepSpeechClient } /// - /// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata. + /// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata. /// /// A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on). /// The number of samples in the audio signal. diff --git a/native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj b/native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceSttClient.csproj similarity index 100% rename from native_client/dotnet/DeepSpeechClient/DeepSpeechClient.csproj rename to native_client/dotnet/MozillaVoiceSttClient/MozillaVoiceSttClient.csproj diff --git a/native_client/dotnet/DeepSpeechClient/NativeImp.cs b/native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs similarity index 98% rename from native_client/dotnet/DeepSpeechClient/NativeImp.cs rename to native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs index 56695296..daad79ac 100644 --- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/NativeImp.cs @@ -1,9 +1,9 @@ -using DeepSpeechClient.Enums; +using MozillaVoiceSttClient.Enums; using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient +namespace MozillaVoiceSttClient { /// /// Wrapper for the native implementation of "libmozilla_voice_stt.so" diff --git a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs index 54581f6f..9029d0f5 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/CandidateTranscript.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/CandidateTranscript.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct CandidateTranscript diff --git a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs similarity index 91% rename from native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs index 0a9beddc..a354759a 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/Metadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/Metadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct Metadata diff --git a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs b/native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs similarity index 93% rename from native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs rename to native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs index 1c660c71..1f54e5d4 100644 --- a/native_client/dotnet/DeepSpeechClient/Structs/TokenMetadata.cs +++ b/native_client/dotnet/MozillaVoiceSttClient/Structs/TokenMetadata.cs @@ -1,7 +1,7 @@ using System; using System.Runtime.InteropServices; -namespace DeepSpeechClient.Structs +namespace MozillaVoiceSttClient.Structs { [StructLayout(LayoutKind.Sequential)] internal unsafe struct TokenMetadata diff --git a/native_client/dotnet/DeepSpeechConsole/App.config b/native_client/dotnet/MozillaVoiceSttConsole/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/App.config rename to native_client/dotnet/MozillaVoiceSttConsole/App.config diff --git a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj b/native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj similarity index 92% rename from native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj rename to native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj index a05fca61..13a8b355 100644 --- a/native_client/dotnet/DeepSpeechConsole/DeepSpeechConsole.csproj +++ b/native_client/dotnet/MozillaVoiceSttConsole/MozillaVoiceSttConsole.csproj @@ -6,8 +6,8 @@ AnyCPU {312965E5-C4F6-4D95-BA64-79906B8BC7AC} Exe - DeepSpeechConsole - DeepSpeechConsole + MozillaVoiceSttConsole + MozillaVoiceSttConsole v4.6.2 512 true @@ -56,9 +56,9 @@ - + {56DE4091-BBBE-47E4-852D-7268B33B971F} - DeepSpeechClient + MozillaVoiceSttClient diff --git a/native_client/dotnet/DeepSpeechConsole/Program.cs b/native_client/dotnet/MozillaVoiceSttConsole/Program.cs similarity index 94% rename from native_client/dotnet/DeepSpeechConsole/Program.cs rename to native_client/dotnet/MozillaVoiceSttConsole/Program.cs index 68f3fc54..f94f5de1 100644 --- a/native_client/dotnet/DeepSpeechConsole/Program.cs +++ b/native_client/dotnet/MozillaVoiceSttConsole/Program.cs @@ -1,6 +1,6 @@ -using DeepSpeechClient; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using MozillaVoiceSttClient; +using MozillaVoiceSttClient.Interfaces; +using MozillaVoiceSttClient.Models; using NAudio.Wave; using System; using System.Collections.Generic; @@ -52,7 +52,7 @@ namespace CSharpExamples Console.WriteLine("Loading model..."); stopwatch.Start(); // sphinx-doc: csharp_ref_model_start - using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm")) + using (IMozillaVoiceSttModel sttClient = new MozillaVoiceSttModel(model ?? "output_graph.pbmm")) { // sphinx-doc: csharp_ref_model_stop stopwatch.Stop(); diff --git a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs b/native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs similarity index 96% rename from native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs rename to native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs index 845851a1..f3257c64 100644 --- a/native_client/dotnet/DeepSpeechConsole/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/MozillaVoiceSttConsole/Properties/AssemblyInfo.cs @@ -5,7 +5,7 @@ using System.Runtime.InteropServices; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeechConsole")] +[assembly: AssemblyTitle("MozillaVoiceSttConsole")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] diff --git a/native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav b/native_client/dotnet/MozillaVoiceSttConsole/arctic_a0024.wav similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/arctic_a0024.wav rename to native_client/dotnet/MozillaVoiceSttConsole/arctic_a0024.wav diff --git a/native_client/dotnet/DeepSpeechConsole/packages.config b/native_client/dotnet/MozillaVoiceSttConsole/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechConsole/packages.config rename to native_client/dotnet/MozillaVoiceSttConsole/packages.config diff --git a/native_client/dotnet/DeepSpeechWPF/.gitignore b/native_client/dotnet/MozillaVoiceSttWPF/.gitignore similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/.gitignore rename to native_client/dotnet/MozillaVoiceSttWPF/.gitignore diff --git a/native_client/dotnet/DeepSpeechWPF/App.config b/native_client/dotnet/MozillaVoiceSttWPF/App.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/App.config rename to native_client/dotnet/MozillaVoiceSttWPF/App.config diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml similarity index 71% rename from native_client/dotnet/DeepSpeechWPF/App.xaml rename to native_client/dotnet/MozillaVoiceSttWPF/App.xaml index 16ebb0d4..ca6a0f13 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml +++ b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml @@ -1,8 +1,8 @@  diff --git a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs similarity index 61% rename from native_client/dotnet/DeepSpeechWPF/App.xaml.cs rename to native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs index d4b87d6e..973046b8 100644 --- a/native_client/dotnet/DeepSpeechWPF/App.xaml.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/App.xaml.cs @@ -1,10 +1,10 @@ using CommonServiceLocator; -using DeepSpeech.WPF.ViewModels; -using DeepSpeechClient.Interfaces; +using MozillaVoiceStt.WPF.ViewModels; +using MozillaVoiceStt.Interfaces; using GalaSoft.MvvmLight.Ioc; using System.Windows; -namespace DeepSpeechWPF +namespace MozillaVoiceSttWPF { /// /// Interaction logic for App.xaml @@ -18,11 +18,11 @@ namespace DeepSpeechWPF try { - //Register instance of DeepSpeech - DeepSpeechClient.DeepSpeech deepSpeechClient = - new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm"); + //Register instance of Mozilla Voice STT + MozillaVoiceSttClient.Model client = + new MozillaVoiceSttClient.Model("deepspeech-0.8.0-models.pbmm"); - SimpleIoc.Default.Register(() => deepSpeechClient); + SimpleIoc.Default.Register(() => client); SimpleIoc.Default.Register(); } catch (System.Exception ex) @@ -35,8 +35,8 @@ namespace DeepSpeechWPF protected override void OnExit(ExitEventArgs e) { base.OnExit(e); - //Dispose instance of DeepSpeech - ServiceLocator.Current.GetInstance()?.Dispose(); + //Dispose instance of Mozilla Voice STT + ServiceLocator.Current.GetInstance()?.Dispose(); } } } diff --git a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml b/native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml similarity index 97% rename from native_client/dotnet/DeepSpeechWPF/MainWindow.xaml rename to native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml index 4fbe5e72..5894fae3 100644 --- a/native_client/dotnet/DeepSpeechWPF/MainWindow.xaml +++ b/native_client/dotnet/MozillaVoiceSttWPF/MainWindow.xaml @@ -1,10 +1,10 @@  /// Interaction logic for MainWindow.xaml diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj similarity index 94% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj rename to native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj index 7f46a31e..d14a02b7 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.csproj +++ b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.csproj @@ -6,8 +6,8 @@ AnyCPU {54BFD766-4305-4F4C-BA59-AF45505DF3C1} WinExe - DeepSpeech.WPF - DeepSpeech.WPF + MozillaVoiceStt.WPF + MozillaVoiceStt.WPF v4.6.2 512 {60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} @@ -131,9 +131,9 @@ - + {56de4091-bbbe-47e4-852d-7268b33b971f} - DeepSpeechClient + MozillaVoiceSttClient diff --git a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln similarity index 79% rename from native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln rename to native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln index cd29025e..003c6d8e 100644 --- a/native_client/dotnet/DeepSpeechWPF/DeepSpeech.WPF.sln +++ b/native_client/dotnet/MozillaVoiceSttWPF/MozillaVoiceStt.WPF.sln @@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.28307.421 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs similarity index 95% rename from native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs index f9ae7d76..034ac3d6 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/AssemblyInfo.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/AssemblyInfo.cs @@ -7,11 +7,11 @@ using System.Windows; // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("DeepSpeech.WPF")] +[assembly: AssemblyTitle("MozillaVoiceStt.WPF")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")] +[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")] [assembly: AssemblyCopyright("Copyright © 2018")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs similarity index 94% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs index 2da2b4b2..b470f9ae 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.Designer.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace MozillaVoiceStt.WPF.Properties { using System; @@ -39,7 +39,7 @@ namespace DeepSpeech.WPF.Properties { internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly); + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.resx similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Resources.resx rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Resources.resx diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs similarity index 96% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs index 0f464bc4..a7218694 100644 --- a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.Designer.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.Designer.cs @@ -8,7 +8,7 @@ // //------------------------------------------------------------------------------ -namespace DeepSpeech.WPF.Properties { +namespace MozillaVoiceStt.WPF.Properties { [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] diff --git a/native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings b/native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.settings similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/Properties/Settings.settings rename to native_client/dotnet/MozillaVoiceSttWPF/Properties/Settings.settings diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs similarity index 98% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs rename to native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs index 909327ee..92fd2f57 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/BindableBase.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/BindableBase.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.ComponentModel; using System.Runtime.CompilerServices; -namespace DeepSpeech.WPF.ViewModels +namespace MozillaVoiceStt.WPF.ViewModels { /// /// Implementation of to simplify models. diff --git a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs similarity index 96% rename from native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs rename to native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs index 230fd42a..0d81c2f0 100644 --- a/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs +++ b/native_client/dotnet/MozillaVoiceSttWPF/ViewModels/MainWindowViewModel.cs @@ -3,8 +3,8 @@ using CSCore; using CSCore.CoreAudioAPI; using CSCore.SoundIn; using CSCore.Streams; -using DeepSpeechClient.Interfaces; -using DeepSpeechClient.Models; +using MozillaVoiceSttClient.Interfaces; +using MozillaVoiceSttClient.Models; using GalaSoft.MvvmLight.CommandWpf; using Microsoft.Win32; using System; @@ -15,7 +15,7 @@ using System.IO; using System.Threading; using System.Threading.Tasks; -namespace DeepSpeech.WPF.ViewModels +namespace MozillaVoiceStt.WPF.ViewModels { /// /// View model of the MainWindow View. @@ -27,7 +27,7 @@ namespace DeepSpeech.WPF.ViewModels private const string ScorerPath = "kenlm.scorer"; #endregion - private readonly IDeepSpeech _sttClient; + private readonly IMozillaVoiceSttModel _sttClient; #region Commands /// @@ -62,7 +62,7 @@ namespace DeepSpeech.WPF.ViewModels /// /// Stream used to feed data into the acoustic model. /// - private DeepSpeechStream _sttStream; + private MozillaVoiceSttStream _sttStream; /// /// Records the audio of the selected device. @@ -75,7 +75,7 @@ namespace DeepSpeech.WPF.ViewModels private SoundInSource _soundInSource; /// - /// Target wave source.(16KHz Mono 16bit for DeepSpeech) + /// Target wave source.(16KHz Mono 16bit for Mozilla Voice STT) /// private IWaveSource _convertedSource; @@ -200,7 +200,7 @@ namespace DeepSpeech.WPF.ViewModels #endregion #region Ctors - public MainWindowViewModel(IDeepSpeech sttClient) + public MainWindowViewModel(IMozillaVoiceSttModel sttClient) { _sttClient = sttClient; @@ -290,7 +290,8 @@ namespace DeepSpeech.WPF.ViewModels //read data from the converedSource //important: don't use the e.Data here //the e.Data contains the raw data provided by the - //soundInSource which won't have the deepspeech required audio format + //soundInSource which won't have the Mozilla Voice STT required + // audio format byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2]; int read; diff --git a/native_client/dotnet/DeepSpeechWPF/packages.config b/native_client/dotnet/MozillaVoiceSttWPF/packages.config similarity index 100% rename from native_client/dotnet/DeepSpeechWPF/packages.config rename to native_client/dotnet/MozillaVoiceSttWPF/packages.config diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index f998bfa3..26db5b96 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -1,8 +1,8 @@ -Building DeepSpeech native client for Windows +Building Mozilla Voice STT native client for Windows ============================================= -Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the ``native_client``. +Now we can build the native client of Mozilla Voice STT and run inference on Windows using the C# client, to do that we need to compile the ``native_client``. **Table of Contents** @@ -59,8 +59,8 @@ There should already be a symbolic link, for this example let's suppose that we . ├── D:\ - │ ├── cloned # Contains DeepSpeech and tensorflow side by side - │ │ └── DeepSpeech # Root of the cloned DeepSpeech + │ ├── cloned # Contains Mozilla Voice STT and tensorflow side by side + │ │ └── DeepSpeech # Root of the cloned Mozilla Voice STT │ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow └── ... @@ -142,4 +142,4 @@ Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally y Using the generated library --------------------------- -As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ `_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. +As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ `_ in your Mozilla Voice STT directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory. diff --git a/native_client/dotnet/nupkg/deepspeech.nuspec.in b/native_client/dotnet/nupkg/deepspeech.nuspec.in index fd1a169f..93a6f6ea 100644 --- a/native_client/dotnet/nupkg/deepspeech.nuspec.in +++ b/native_client/dotnet/nupkg/deepspeech.nuspec.in @@ -3,13 +3,13 @@ $NUPKG_ID $NUPKG_VERSION - Mozilla_Voice_STT + Mozilla.Voice.STT Mozilla Mozilla MPL-2.0 http://github.com/mozilla/DeepSpeech false - A library for running inference with a DeepSpeech model + A library for running inference with a Mozilla Voice STT model Copyright (c) 2019 Mozilla Corporation native speech speech_recognition