.NET rename
This commit is contained in:
parent
fa21911048
commit
ee7bf86460
@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30204.135
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MozillaVoiceSttClient", "MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttConsole", "MozillaVoiceSttConsole\MozillaVoiceSttConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
@ -1,7 +1,7 @@
|
||||
namespace DeepSpeechClient.Enums
|
||||
namespace MozillaVoiceSttClient.Enums
|
||||
{
|
||||
/// <summary>
|
||||
/// Error codes from the native DeepSpeech binary.
|
||||
/// Error codes from the native Mozilla Voice STT binary.
|
||||
/// </summary>
|
||||
internal enum ErrorCodes
|
||||
{
|
@ -1,9 +1,9 @@
|
||||
using DeepSpeechClient.Structs;
|
||||
using MozillaVoiceSttClient.Structs;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
|
||||
namespace DeepSpeechClient.Extensions
|
||||
namespace MozillaVoiceSttClient.Extensions
|
||||
{
|
||||
internal static class NativeExtensions
|
||||
{
|
@ -1,13 +1,13 @@
|
||||
using DeepSpeechClient.Models;
|
||||
using MozillaVoiceSttClient.Models;
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace DeepSpeechClient.Interfaces
|
||||
namespace MozillaVoiceSttClient.Interfaces
|
||||
{
|
||||
/// <summary>
|
||||
/// Client interface of Mozilla's DeepSpeech implementation.
|
||||
/// Client interface of Mozilla Voice STT.
|
||||
/// </summary>
|
||||
public interface IDeepSpeech : IDisposable
|
||||
public interface IModel : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Return version of this library. The returned version is a semantic version
|
||||
@ -59,7 +59,7 @@ namespace DeepSpeechClient.Interfaces
|
||||
unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta);
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text.
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
@ -68,7 +68,7 @@ namespace DeepSpeechClient.Interfaces
|
||||
uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
@ -83,26 +83,26 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
unsafe void FreeStream(DeepSpeechStream stream);
|
||||
unsafe void FreeStream(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
unsafe DeepSpeechStream CreateStream();
|
||||
unsafe MozillaVoiceSttStream CreateStream();
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to feed the data.</param>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize);
|
||||
unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
unsafe string IntermediateDecode(DeepSpeechStream stream);
|
||||
unsafe string IntermediateDecode(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
|
||||
@ -110,14 +110,14 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults);
|
||||
unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <returns>The STT result.</returns>
|
||||
unsafe string FinishStream(DeepSpeechStream stream);
|
||||
unsafe string FinishStream(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
|
||||
@ -125,6 +125,6 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults);
|
||||
unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
|
||||
}
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
using MozillaVoiceSttClient.Models;
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace MozillaVoiceSttClient.Interfaces
|
||||
{
|
||||
/// <summary>
|
||||
/// Client interface of Mozilla Voice STT.
|
||||
/// </summary>
|
||||
public interface IMozillaVoiceSttModel : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Return version of this library. The returned version is a semantic version
|
||||
/// (SemVer 2.0.0).
|
||||
/// </summary>
|
||||
unsafe string Version();
|
||||
|
||||
/// <summary>
|
||||
/// Return the sample rate expected by the model.
|
||||
/// </summary>
|
||||
/// <returns>Sample rate.</returns>
|
||||
unsafe int GetModelSampleRate();
|
||||
|
||||
/// <summary>
|
||||
/// Get beam width value used by the model. If SetModelBeamWidth was not
|
||||
/// called before, will return the default value loaded from the model
|
||||
/// file.
|
||||
/// </summary>
|
||||
/// <returns>Beam width value used by the model.</returns>
|
||||
unsafe uint GetModelBeamWidth();
|
||||
|
||||
/// <summary>
|
||||
/// Set beam width value used by the model.
|
||||
/// </summary>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width value generates better results at the cost of decoding time.</param>
|
||||
/// <exception cref="ArgumentException">Thrown on failure.</exception>
|
||||
unsafe void SetModelBeamWidth(uint aBeamWidth);
|
||||
|
||||
/// <summary>
|
||||
/// Enable decoding using an external scorer.
|
||||
/// </summary>
|
||||
/// <param name="aScorerPath">The path to the external scorer file.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with an external scorer.</exception>
|
||||
/// <exception cref="FileNotFoundException">Thrown when cannot find the scorer file.</exception>
|
||||
unsafe void EnableExternalScorer(string aScorerPath);
|
||||
|
||||
/// <summary>
|
||||
/// Disable decoding using an external scorer.
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
|
||||
unsafe void DisableExternalScorer();
|
||||
|
||||
/// <summary>
|
||||
/// Set hyperparameters alpha and beta of the external scorer.
|
||||
/// </summary>
|
||||
/// <param name="aAlpha">The alpha hyperparameter of the decoder. Language model weight.</param>
|
||||
/// <param name="aBeta">The beta hyperparameter of the decoder. Word insertion weight.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
|
||||
unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta);
|
||||
|
||||
/// <summary>
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
/// <returns>The STT result. Returns NULL on error.</returns>
|
||||
unsafe string SpeechToText(short[] aBuffer,
|
||||
uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata. Returns NULL on error.</returns>
|
||||
unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
|
||||
uint aBufferSize,
|
||||
uint aNumResults);
|
||||
|
||||
/// <summary>
|
||||
/// Destroy a streaming state without decoding the computed logits.
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
unsafe void FreeStream(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
unsafe MozillaVoiceSttStream CreateStream();
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to feed the data.</param>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
unsafe string IntermediateDecode(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <returns>The STT result.</returns>
|
||||
unsafe string FinishStream(MozillaVoiceSttStream stream);
|
||||
|
||||
/// <summary>
|
||||
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
namespace DeepSpeechClient.Models
|
||||
namespace MozillaVoiceSttClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores the entire CTC output as an array of character metadata objects.
|
@ -1,19 +1,19 @@
|
||||
using System;
|
||||
|
||||
namespace DeepSpeechClient.Models
|
||||
namespace MozillaVoiceSttClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrapper of the pointer used for the decoding stream.
|
||||
/// </summary>
|
||||
public class DeepSpeechStream : IDisposable
|
||||
public class MozillaVoiceSttStream : IDisposable
|
||||
{
|
||||
private unsafe IntPtr** _streamingStatePp;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="DeepSpeechStream"/>.
|
||||
/// Initializes a new instance of <see cref="MozillaVoiceSttStream"/>.
|
||||
/// </summary>
|
||||
/// <param name="streamingStatePP">Native pointer of the native stream.</param>
|
||||
public unsafe DeepSpeechStream(IntPtr** streamingStatePP)
|
||||
public unsafe MozillaVoiceSttStream(IntPtr** streamingStatePP)
|
||||
{
|
||||
_streamingStatePp = streamingStatePP;
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
namespace DeepSpeechClient.Models
|
||||
namespace MozillaVoiceSttClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores the entire CTC output as an array of character metadata objects.
|
@ -1,4 +1,4 @@
|
||||
namespace DeepSpeechClient.Models
|
||||
namespace MozillaVoiceSttClient.Models
|
||||
{
|
||||
/// <summary>
|
||||
/// Stores each individual character, along with its timing information.
|
@ -1,34 +1,34 @@
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Extensions;
|
||||
using MozillaVoiceStt.Interfaces;
|
||||
using MozillaVoiceStt.Extensions;
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using DeepSpeechClient.Enums;
|
||||
using DeepSpeechClient.Models;
|
||||
using MozillaVoiceStt.Enums;
|
||||
using MozillaVoiceStt.Models;
|
||||
|
||||
namespace DeepSpeechClient
|
||||
namespace MozillaVoiceStt
|
||||
{
|
||||
/// <summary>
|
||||
/// Concrete implementation of <see cref="DeepSpeechClient.Interfaces.IDeepSpeech"/>.
|
||||
/// Concrete implementation of <see cref="MozillaVoiceStt.Interfaces.IMozillaVoiceSttModel"/>.
|
||||
/// </summary>
|
||||
public class DeepSpeech : IDeepSpeech
|
||||
public class MozillaVoiceSttModel : IMozillaVoiceSttModel
|
||||
{
|
||||
private unsafe IntPtr** _modelStatePP;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of <see cref="DeepSpeech"/> class and creates a new acoustic model.
|
||||
/// Initializes a new instance of <see cref="MozillaVoiceSttModel"/> class and creates a new acoustic model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
public DeepSpeech(string aModelPath)
|
||||
public MozillaVoiceSttModel(string aModelPath)
|
||||
{
|
||||
CreateModel(aModelPath);
|
||||
}
|
||||
|
||||
#region IDeepSpeech
|
||||
#region IMozillaVoiceSttModel
|
||||
|
||||
/// <summary>
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// Create an object providing an interface to a trained Mozilla Voice STT model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
@ -153,7 +153,7 @@ namespace DeepSpeechClient
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to feed the data.</param>
|
||||
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize)
|
||||
public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize)
|
||||
{
|
||||
NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize);
|
||||
}
|
||||
@ -163,7 +163,7 @@ namespace DeepSpeechClient
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <returns>The STT result.</returns>
|
||||
public unsafe string FinishStream(DeepSpeechStream stream)
|
||||
public unsafe string FinishStream(MozillaVoiceSttStream stream)
|
||||
{
|
||||
return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString();
|
||||
}
|
||||
@ -174,7 +174,7 @@ namespace DeepSpeechClient
|
||||
/// <param name="stream">Instance of the stream to finish.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The extended metadata result.</returns>
|
||||
public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults)
|
||||
public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
|
||||
{
|
||||
return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
}
|
||||
@ -184,7 +184,7 @@ namespace DeepSpeechClient
|
||||
/// </summary>
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
public unsafe string IntermediateDecode(DeepSpeechStream stream)
|
||||
public unsafe string IntermediateDecode(MozillaVoiceSttStream stream)
|
||||
{
|
||||
return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString();
|
||||
}
|
||||
@ -195,7 +195,7 @@ namespace DeepSpeechClient
|
||||
/// <param name="stream">Instance of the stream to decode.</param>
|
||||
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
|
||||
/// <returns>The STT intermediate result.</returns>
|
||||
public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults)
|
||||
public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
|
||||
{
|
||||
return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
|
||||
}
|
||||
@ -212,12 +212,12 @@ namespace DeepSpeechClient
|
||||
/// <summary>
|
||||
/// Creates a new streaming inference state.
|
||||
/// </summary>
|
||||
public unsafe DeepSpeechStream CreateStream()
|
||||
public unsafe MozillaVoiceSttStream CreateStream()
|
||||
{
|
||||
IntPtr** streamingStatePointer = null;
|
||||
var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer);
|
||||
EvaluateResultCode(resultCode);
|
||||
return new DeepSpeechStream(streamingStatePointer);
|
||||
return new MozillaVoiceSttStream(streamingStatePointer);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -225,14 +225,14 @@ namespace DeepSpeechClient
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
public unsafe void FreeStream(DeepSpeechStream stream)
|
||||
public unsafe void FreeStream(MozillaVoiceSttStream stream)
|
||||
{
|
||||
NativeImp.STT_FreeStream(stream.GetNativePointer());
|
||||
stream.Dispose();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text.
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
||||
@ -243,7 +243,7 @@ namespace DeepSpeechClient
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
|
||||
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
|
||||
/// </summary>
|
||||
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
|
||||
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
|
@ -1,9 +1,9 @@
|
||||
using DeepSpeechClient.Enums;
|
||||
using MozillaVoiceSttClient.Enums;
|
||||
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient
|
||||
namespace MozillaVoiceSttClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Wrapper for the native implementation of "libmozilla_voice_stt.so"
|
@ -1,7 +1,7 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace MozillaVoiceSttClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct CandidateTranscript
|
@ -1,7 +1,7 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace MozillaVoiceSttClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct Metadata
|
@ -1,7 +1,7 @@
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
namespace MozillaVoiceSttClient.Structs
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
internal unsafe struct TokenMetadata
|
@ -6,8 +6,8 @@
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{312965E5-C4F6-4D95-BA64-79906B8BC7AC}</ProjectGuid>
|
||||
<OutputType>Exe</OutputType>
|
||||
<RootNamespace>DeepSpeechConsole</RootNamespace>
|
||||
<AssemblyName>DeepSpeechConsole</AssemblyName>
|
||||
<RootNamespace>MozillaVoiceSttConsole</RootNamespace>
|
||||
<AssemblyName>MozillaVoiceSttConsole</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
@ -56,9 +56,9 @@
|
||||
<None Include="packages.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
|
||||
<ProjectReference Include="..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj">
|
||||
<Project>{56DE4091-BBBE-47E4-852D-7268B33B971F}</Project>
|
||||
<Name>DeepSpeechClient</Name>
|
||||
<Name>MozillaVoiceSttClient</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
@ -1,6 +1,6 @@
|
||||
using DeepSpeechClient;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Models;
|
||||
using MozillaVoiceSttClient;
|
||||
using MozillaVoiceSttClient.Interfaces;
|
||||
using MozillaVoiceSttClient.Models;
|
||||
using NAudio.Wave;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
@ -52,7 +52,7 @@ namespace CSharpExamples
|
||||
Console.WriteLine("Loading model...");
|
||||
stopwatch.Start();
|
||||
// sphinx-doc: csharp_ref_model_start
|
||||
using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm"))
|
||||
using (IMozillaVoiceSttModel sttClient = new MozillaVoiceSttModel(model ?? "output_graph.pbmm"))
|
||||
{
|
||||
// sphinx-doc: csharp_ref_model_stop
|
||||
stopwatch.Stop();
|
@ -5,7 +5,7 @@ using System.Runtime.InteropServices;
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DeepSpeechConsole")]
|
||||
[assembly: AssemblyTitle("MozillaVoiceSttConsole")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
@ -1,8 +1,8 @@
|
||||
<Application
|
||||
x:Class="DeepSpeechWPF.App"
|
||||
x:Class="MozillaVoiceSttWPF.App"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:local="clr-namespace:DeepSpeechWPF"
|
||||
xmlns:local="clr-namespace:MozillaVoiceSttWPF"
|
||||
StartupUri="MainWindow.xaml">
|
||||
<Application.Resources />
|
||||
</Application>
|
@ -1,10 +1,10 @@
|
||||
using CommonServiceLocator;
|
||||
using DeepSpeech.WPF.ViewModels;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using MozillaVoiceStt.WPF.ViewModels;
|
||||
using MozillaVoiceStt.Interfaces;
|
||||
using GalaSoft.MvvmLight.Ioc;
|
||||
using System.Windows;
|
||||
|
||||
namespace DeepSpeechWPF
|
||||
namespace MozillaVoiceSttWPF
|
||||
{
|
||||
/// <summary>
|
||||
/// Interaction logic for App.xaml
|
||||
@ -18,11 +18,11 @@ namespace DeepSpeechWPF
|
||||
|
||||
try
|
||||
{
|
||||
//Register instance of DeepSpeech
|
||||
DeepSpeechClient.DeepSpeech deepSpeechClient =
|
||||
new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm");
|
||||
//Register instance of Mozilla Voice STT
|
||||
MozillaVoiceSttClient.Model client =
|
||||
new MozillaVoiceSttClient.Model("deepspeech-0.8.0-models.pbmm");
|
||||
|
||||
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
|
||||
SimpleIoc.Default.Register<IMozillaVoiceSttModel>(() => client);
|
||||
SimpleIoc.Default.Register<MainWindowViewModel>();
|
||||
}
|
||||
catch (System.Exception ex)
|
||||
@ -35,8 +35,8 @@ namespace DeepSpeechWPF
|
||||
protected override void OnExit(ExitEventArgs e)
|
||||
{
|
||||
base.OnExit(e);
|
||||
//Dispose instance of DeepSpeech
|
||||
ServiceLocator.Current.GetInstance<IDeepSpeech>()?.Dispose();
|
||||
//Dispose instance of Mozilla Voice STT
|
||||
ServiceLocator.Current.GetInstance<IMozillaVoiceSttModel>()?.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
<Window
|
||||
x:Class="DeepSpeechWPF.MainWindow"
|
||||
x:Class="MozillaVoiceSttWPF.MainWindow"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
|
||||
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||
Title="Deepspeech client"
|
||||
Title="Mozilla Voice STT Client"
|
||||
Width="800"
|
||||
Height="600"
|
||||
Loaded="Window_Loaded"
|
@ -1,8 +1,8 @@
|
||||
using CommonServiceLocator;
|
||||
using DeepSpeech.WPF.ViewModels;
|
||||
using MozillaVoiceStt.WPF.ViewModels;
|
||||
using System.Windows;
|
||||
|
||||
namespace DeepSpeechWPF
|
||||
namespace MozillaVoiceSttWPF
|
||||
{
|
||||
/// <summary>
|
||||
/// Interaction logic for MainWindow.xaml
|
@ -6,8 +6,8 @@
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{54BFD766-4305-4F4C-BA59-AF45505DF3C1}</ProjectGuid>
|
||||
<OutputType>WinExe</OutputType>
|
||||
<RootNamespace>DeepSpeech.WPF</RootNamespace>
|
||||
<AssemblyName>DeepSpeech.WPF</AssemblyName>
|
||||
<RootNamespace>MozillaVoiceStt.WPF</RootNamespace>
|
||||
<AssemblyName>MozillaVoiceStt.WPF</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
|
||||
@ -131,9 +131,9 @@
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
|
||||
<ProjectReference Include="..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj">
|
||||
<Project>{56de4091-bbbe-47e4-852d-7268b33b971f}</Project>
|
||||
<Name>DeepSpeechClient</Name>
|
||||
<Name>MozillaVoiceSttClient</Name>
|
||||
</ProjectReference>
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.28307.421
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
@ -7,11 +7,11 @@ using System.Windows;
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("DeepSpeech.WPF")]
|
||||
[assembly: AssemblyTitle("MozillaVoiceStt.WPF")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")]
|
||||
[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2018")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
@ -8,7 +8,7 @@
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace DeepSpeech.WPF.Properties {
|
||||
namespace MozillaVoiceStt.WPF.Properties {
|
||||
using System;
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ namespace DeepSpeech.WPF.Properties {
|
||||
internal static global::System.Resources.ResourceManager ResourceManager {
|
||||
get {
|
||||
if (object.ReferenceEquals(resourceMan, null)) {
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly);
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly);
|
||||
resourceMan = temp;
|
||||
}
|
||||
return resourceMan;
|
@ -8,7 +8,7 @@
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace DeepSpeech.WPF.Properties {
|
||||
namespace MozillaVoiceStt.WPF.Properties {
|
||||
|
||||
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
@ -3,7 +3,7 @@ using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace DeepSpeech.WPF.ViewModels
|
||||
namespace MozillaVoiceStt.WPF.ViewModels
|
||||
{
|
||||
/// <summary>
|
||||
/// Implementation of <see cref="INotifyPropertyChanged"/> to simplify models.
|
@ -3,8 +3,8 @@ using CSCore;
|
||||
using CSCore.CoreAudioAPI;
|
||||
using CSCore.SoundIn;
|
||||
using CSCore.Streams;
|
||||
using DeepSpeechClient.Interfaces;
|
||||
using DeepSpeechClient.Models;
|
||||
using MozillaVoiceSttClient.Interfaces;
|
||||
using MozillaVoiceSttClient.Models;
|
||||
using GalaSoft.MvvmLight.CommandWpf;
|
||||
using Microsoft.Win32;
|
||||
using System;
|
||||
@ -15,7 +15,7 @@ using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace DeepSpeech.WPF.ViewModels
|
||||
namespace MozillaVoiceStt.WPF.ViewModels
|
||||
{
|
||||
/// <summary>
|
||||
/// View model of the MainWindow View.
|
||||
@ -27,7 +27,7 @@ namespace DeepSpeech.WPF.ViewModels
|
||||
private const string ScorerPath = "kenlm.scorer";
|
||||
#endregion
|
||||
|
||||
private readonly IDeepSpeech _sttClient;
|
||||
private readonly IMozillaVoiceSttModel _sttClient;
|
||||
|
||||
#region Commands
|
||||
/// <summary>
|
||||
@ -62,7 +62,7 @@ namespace DeepSpeech.WPF.ViewModels
|
||||
/// <summary>
|
||||
/// Stream used to feed data into the acoustic model.
|
||||
/// </summary>
|
||||
private DeepSpeechStream _sttStream;
|
||||
private MozillaVoiceSttStream _sttStream;
|
||||
|
||||
/// <summary>
|
||||
/// Records the audio of the selected device.
|
||||
@ -75,7 +75,7 @@ namespace DeepSpeech.WPF.ViewModels
|
||||
private SoundInSource _soundInSource;
|
||||
|
||||
/// <summary>
|
||||
/// Target wave source.(16KHz Mono 16bit for DeepSpeech)
|
||||
/// Target wave source.(16KHz Mono 16bit for Mozilla Voice STT)
|
||||
/// </summary>
|
||||
private IWaveSource _convertedSource;
|
||||
|
||||
@ -200,7 +200,7 @@ namespace DeepSpeech.WPF.ViewModels
|
||||
#endregion
|
||||
|
||||
#region Ctors
|
||||
public MainWindowViewModel(IDeepSpeech sttClient)
|
||||
public MainWindowViewModel(IMozillaVoiceSttModel sttClient)
|
||||
{
|
||||
_sttClient = sttClient;
|
||||
|
||||
@ -290,7 +290,8 @@ namespace DeepSpeech.WPF.ViewModels
|
||||
//read data from the converedSource
|
||||
//important: don't use the e.Data here
|
||||
//the e.Data contains the raw data provided by the
|
||||
//soundInSource which won't have the deepspeech required audio format
|
||||
//soundInSource which won't have the Mozilla Voice STT required
|
||||
// audio format
|
||||
byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
|
||||
|
||||
int read;
|
@ -1,8 +1,8 @@
|
||||
|
||||
Building DeepSpeech native client for Windows
|
||||
Building Mozilla Voice STT native client for Windows
|
||||
=============================================
|
||||
|
||||
Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the ``native_client``.
|
||||
Now we can build the native client of Mozilla Voice STT and run inference on Windows using the C# client, to do that we need to compile the ``native_client``.
|
||||
|
||||
**Table of Contents**
|
||||
|
||||
@ -59,8 +59,8 @@ There should already be a symbolic link, for this example let's suppose that we
|
||||
|
||||
.
|
||||
├── D:\
|
||||
│ ├── cloned # Contains DeepSpeech and tensorflow side by side
|
||||
│ │ └── DeepSpeech # Root of the cloned DeepSpeech
|
||||
│ ├── cloned # Contains Mozilla Voice STT and tensorflow side by side
|
||||
│ │ └── DeepSpeech # Root of the cloned Mozilla Voice STT
|
||||
│ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow
|
||||
└── ...
|
||||
|
||||
@ -142,4 +142,4 @@ Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally y
|
||||
Using the generated library
|
||||
---------------------------
|
||||
|
||||
As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ <https://github.com/mozilla/DeepSpeech/tree/master/native_client/dotnet>`_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory.
|
||||
As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ <https://github.com/mozilla/DeepSpeech/tree/master/native_client/dotnet>`_ in your Mozilla Voice STT directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory.
|
||||
|
@ -3,13 +3,13 @@
|
||||
<metadata>
|
||||
<id>$NUPKG_ID</id>
|
||||
<version>$NUPKG_VERSION</version>
|
||||
<title>Mozilla_Voice_STT</title>
|
||||
<title>Mozilla.Voice.STT</title>
|
||||
<authors>Mozilla</authors>
|
||||
<owners>Mozilla</owners>
|
||||
<license type="expression">MPL-2.0</license>
|
||||
<projectUrl>http://github.com/mozilla/DeepSpeech</projectUrl>
|
||||
<requireLicenseAcceptance>false</requireLicenseAcceptance>
|
||||
<description>A library for running inference with a DeepSpeech model</description>
|
||||
<description>A library for running inference with a Mozilla Voice STT model</description>
|
||||
<copyright>Copyright (c) 2019 Mozilla Corporation</copyright>
|
||||
<tags>native speech speech_recognition</tags>
|
||||
</metadata>
|
||||
|
Loading…
x
Reference in New Issue
Block a user