Update .NET bindings and client
This commit is contained in:
parent
bc6741cd41
commit
a8c53d2154
@ -32,13 +32,11 @@ namespace DeepSpeechClient
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
public unsafe void CreateModel(string aModelPath, uint aNCep,
|
||||
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
|
||||
public unsafe void CreateModel(string aModelPath,
|
||||
string aAlphabetConfigPath, uint aBeamWidth)
|
||||
{
|
||||
string exceptionMessage = null;
|
||||
if (string.IsNullOrWhiteSpace(aModelPath))
|
||||
@ -63,8 +61,6 @@ namespace DeepSpeechClient
|
||||
throw new FileNotFoundException(exceptionMessage);
|
||||
}
|
||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||
aNCep,
|
||||
aNContext,
|
||||
aAlphabetConfigPath,
|
||||
aBeamWidth,
|
||||
ref _modelStatePP);
|
||||
@ -116,20 +112,18 @@ namespace DeepSpeechClient
|
||||
/// </summary>
|
||||
public unsafe void Dispose()
|
||||
{
|
||||
NativeImp.DS_DestroyModel(_modelStatePP);
|
||||
NativeImp.DS_FreeModel(_modelStatePP);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enable decoding using beam scoring with a KenLM language model.
|
||||
/// </summary>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||
public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
||||
string aLMPath, string aTriePath,
|
||||
public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
|
||||
float aLMAlpha, float aLMBeta)
|
||||
{
|
||||
string exceptionMessage = null;
|
||||
@ -148,7 +142,6 @@ namespace DeepSpeechClient
|
||||
}
|
||||
|
||||
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
|
||||
aAlphabetConfigPath,
|
||||
aLMPath,
|
||||
aTriePath,
|
||||
aLMAlpha,
|
||||
@ -206,9 +199,9 @@ namespace DeepSpeechClient
|
||||
/// </summary>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||
public unsafe void SetupStream(uint aSampleRate)
|
||||
public unsafe void CreateStream(uint aSampleRate)
|
||||
{
|
||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||
EvaluateResultCode(resultCode);
|
||||
}
|
||||
|
||||
@ -217,9 +210,9 @@ namespace DeepSpeechClient
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
public unsafe void DiscardStream()
|
||||
public unsafe void FreeStream()
|
||||
{
|
||||
NativeImp.DS_DiscardStream(ref _streamingStatePP);
|
||||
NativeImp.DS_FreeStream(ref _streamingStatePP);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||
/// </summary>
|
||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||
unsafe void CreateModel(string aModelPath, uint aNCep,
|
||||
uint aNContext,
|
||||
unsafe void CreateModel(string aModelPath,
|
||||
string aAlphabetConfigPath,
|
||||
uint aBeamWidth);
|
||||
|
||||
/// <summary>
|
||||
/// Enable decoding using beam scoring with a KenLM language model.
|
||||
/// </summary>
|
||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||
unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
||||
string aLMPath,
|
||||
unsafe void EnableDecoderWithLM(string aLMPath,
|
||||
string aTriePath,
|
||||
float aLMAlpha,
|
||||
float aLMBeta);
|
||||
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// This can be used if you no longer need the result of an ongoing streaming
|
||||
/// inference and don't want to perform a costly decode operation.
|
||||
/// </summary>
|
||||
unsafe void DiscardStream();
|
||||
unsafe void FreeStream();
|
||||
|
||||
/// <summary>
|
||||
/// Free a DeepSpeech allocated string
|
||||
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
|
||||
/// </summary>
|
||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||
unsafe void SetupStream(uint aSampleRate);
|
||||
unsafe void CreateStream(uint aSampleRate);
|
||||
|
||||
/// <summary>
|
||||
/// Feeds audio samples to an ongoing streaming inference.
|
||||
|
||||
@ -17,15 +17,12 @@ namespace DeepSpeechClient
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||
uint aNCep,
|
||||
uint aNContext,
|
||||
string aAlphabetConfigPath,
|
||||
uint aBeamWidth,
|
||||
ref ModelState** pint);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
|
||||
string aAlphabetConfigPath,
|
||||
string aLMPath,
|
||||
string aTriePath,
|
||||
float aLMAlpha,
|
||||
@ -45,14 +42,14 @@ namespace DeepSpeechClient
|
||||
uint aSampleRate);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
|
||||
internal static unsafe extern void DS_FreeModel(ModelState** aCtx);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
||||
internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
|
||||
uint aSampleRate, ref StreamingState** retval);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx);
|
||||
internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);
|
||||
|
||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
||||
|
||||
@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;
|
||||
|
||||
namespace DeepSpeechClient.Structs
|
||||
{
|
||||
//FIXME: ModelState is an opaque pointer to the API, why is this code reverse
|
||||
// engineering its contents?
|
||||
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
|
||||
public unsafe struct ModelState
|
||||
{
|
||||
|
||||
@ -50,8 +50,6 @@ namespace CSharpExamples
|
||||
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
|
||||
}
|
||||
|
||||
const uint N_CEP = 26;
|
||||
const uint N_CONTEXT = 9;
|
||||
const uint BEAM_WIDTH = 500;
|
||||
const float LM_ALPHA = 0.75f;
|
||||
const float LM_BETA = 1.85f;
|
||||
@ -66,7 +64,6 @@ namespace CSharpExamples
|
||||
stopwatch.Start();
|
||||
sttClient.CreateModel(
|
||||
model ?? "output_graph.pbmm",
|
||||
N_CEP, N_CONTEXT,
|
||||
alphabet ?? "alphabet.txt",
|
||||
BEAM_WIDTH);
|
||||
stopwatch.Stop();
|
||||
@ -77,7 +74,6 @@ namespace CSharpExamples
|
||||
{
|
||||
Console.WriteLine("Loadin LM...");
|
||||
sttClient.EnableDecoderWithLM(
|
||||
alphabet ?? "alphabet.txt",
|
||||
lm ?? "lm.binary",
|
||||
trie ?? "trie",
|
||||
LM_ALPHA, LM_BETA);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user