Update .NET bindings and client

This commit is contained in:
Reuben Morais 2019-09-09 11:54:53 +02:00
parent bc6741cd41
commit a8c53d2154
5 changed files with 17 additions and 34 deletions

View File

@ -32,13 +32,11 @@ namespace DeepSpeechClient
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
/// <param name="aNContext">The context window the model was trained with.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
public unsafe void CreateModel(string aModelPath, uint aNCep,
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
public unsafe void CreateModel(string aModelPath,
string aAlphabetConfigPath, uint aBeamWidth)
{
string exceptionMessage = null;
if (string.IsNullOrWhiteSpace(aModelPath))
@ -63,8 +61,6 @@ namespace DeepSpeechClient
throw new FileNotFoundException(exceptionMessage);
}
var resultCode = NativeImp.DS_CreateModel(aModelPath,
aNCep,
aNContext,
aAlphabetConfigPath,
aBeamWidth,
ref _modelStatePP);
@ -116,20 +112,18 @@ namespace DeepSpeechClient
/// </summary>
public unsafe void Dispose()
{
NativeImp.DS_DestroyModel(_modelStatePP);
NativeImp.DS_FreeModel(_modelStatePP);
}
/// <summary>
/// Enable decoding using beam scoring with a KenLM language model.
/// </summary>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aLMPath">The path to the language model binary file.</param>
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
string aLMPath, string aTriePath,
public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
float aLMAlpha, float aLMBeta)
{
string exceptionMessage = null;
@ -148,7 +142,6 @@ namespace DeepSpeechClient
}
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
aAlphabetConfigPath,
aLMPath,
aTriePath,
aLMAlpha,
@ -206,9 +199,9 @@ namespace DeepSpeechClient
/// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
public unsafe void SetupStream(uint aSampleRate)
public unsafe void CreateStream(uint aSampleRate)
{
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
EvaluateResultCode(resultCode);
}
@ -217,9 +210,9 @@ namespace DeepSpeechClient
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
public unsafe void DiscardStream()
public unsafe void FreeStream()
{
NativeImp.DS_DiscardStream(ref _streamingStatePP);
NativeImp.DS_FreeStream(ref _streamingStatePP);
}
/// <summary>

View File

@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
/// <param name="aNContext">The context window the model was trained with.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
unsafe void CreateModel(string aModelPath, uint aNCep,
uint aNContext,
unsafe void CreateModel(string aModelPath,
string aAlphabetConfigPath,
uint aBeamWidth);
/// <summary>
/// Enable decoding using beam scoring with a KenLM language model.
/// </summary>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aLMPath">The path to the language model binary file.</param>
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
string aLMPath,
unsafe void EnableDecoderWithLM(string aLMPath,
string aTriePath,
float aLMAlpha,
float aLMBeta);
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
unsafe void DiscardStream();
unsafe void FreeStream();
/// <summary>
/// Free a DeepSpeech allocated string
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
/// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
unsafe void SetupStream(uint aSampleRate);
unsafe void CreateStream(uint aSampleRate);
/// <summary>
/// Feeds audio samples to an ongoing streaming inference.

View File

@ -17,15 +17,12 @@ namespace DeepSpeechClient
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
uint aNCep,
uint aNContext,
string aAlphabetConfigPath,
uint aBeamWidth,
ref ModelState** pint);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
string aAlphabetConfigPath,
string aLMPath,
string aTriePath,
float aLMAlpha,
@ -45,14 +42,14 @@ namespace DeepSpeechClient
uint aSampleRate);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
internal static unsafe extern void DS_FreeModel(ModelState** aCtx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
uint aSampleRate, ref StreamingState** retval);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx);
internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);

View File

@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;
namespace DeepSpeechClient.Structs
{
//FIXME: ModelState is an opaque pointer to the API, why is this code reverse
// engineering its contents?
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
public unsafe struct ModelState
{

View File

@ -50,8 +50,6 @@ namespace CSharpExamples
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
}
const uint N_CEP = 26;
const uint N_CONTEXT = 9;
const uint BEAM_WIDTH = 500;
const float LM_ALPHA = 0.75f;
const float LM_BETA = 1.85f;
@ -66,7 +64,6 @@ namespace CSharpExamples
stopwatch.Start();
sttClient.CreateModel(
model ?? "output_graph.pbmm",
N_CEP, N_CONTEXT,
alphabet ?? "alphabet.txt",
BEAM_WIDTH);
stopwatch.Stop();
@ -77,7 +74,6 @@ namespace CSharpExamples
{
Console.WriteLine("Loadin LM...");
sttClient.EnableDecoderWithLM(
alphabet ?? "alphabet.txt",
lm ?? "lm.binary",
trie ?? "trie",
LM_ALPHA, LM_BETA);