Update .NET bindings and client

This commit is contained in:
Reuben Morais 2019-09-09 11:54:53 +02:00
parent bc6741cd41
commit a8c53d2154
5 changed files with 17 additions and 34 deletions

View File

@ -32,13 +32,11 @@ namespace DeepSpeechClient
/// Create an object providing an interface to a trained DeepSpeech model. /// Create an object providing an interface to a trained DeepSpeech model.
/// </summary> /// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param> /// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
/// <param name="aNContext">The context window the model was trained with.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param> /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param> /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
public unsafe void CreateModel(string aModelPath, uint aNCep, public unsafe void CreateModel(string aModelPath,
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth) string aAlphabetConfigPath, uint aBeamWidth)
{ {
string exceptionMessage = null; string exceptionMessage = null;
if (string.IsNullOrWhiteSpace(aModelPath)) if (string.IsNullOrWhiteSpace(aModelPath))
@ -63,8 +61,6 @@ namespace DeepSpeechClient
throw new FileNotFoundException(exceptionMessage); throw new FileNotFoundException(exceptionMessage);
} }
var resultCode = NativeImp.DS_CreateModel(aModelPath, var resultCode = NativeImp.DS_CreateModel(aModelPath,
aNCep,
aNContext,
aAlphabetConfigPath, aAlphabetConfigPath,
aBeamWidth, aBeamWidth,
ref _modelStatePP); ref _modelStatePP);
@ -116,20 +112,18 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
public unsafe void Dispose() public unsafe void Dispose()
{ {
NativeImp.DS_DestroyModel(_modelStatePP); NativeImp.DS_FreeModel(_modelStatePP);
} }
/// <summary> /// <summary>
/// Enable decoding using beam scoring with a KenLM language model. /// Enable decoding using beam scoring with a KenLM language model.
/// </summary> /// </summary>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aLMPath">The path to the language model binary file.</param> /// <param name="aLMPath">The path to the language model binary file.</param>
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param> /// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param> /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param> /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath, public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
string aLMPath, string aTriePath,
float aLMAlpha, float aLMBeta) float aLMAlpha, float aLMBeta)
{ {
string exceptionMessage = null; string exceptionMessage = null;
@ -148,7 +142,6 @@ namespace DeepSpeechClient
} }
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP, var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
aAlphabetConfigPath,
aLMPath, aLMPath,
aTriePath, aTriePath,
aLMAlpha, aLMAlpha,
@ -206,9 +199,9 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param> /// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
public unsafe void SetupStream(uint aSampleRate) public unsafe void CreateStream(uint aSampleRate)
{ {
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP); var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
EvaluateResultCode(resultCode); EvaluateResultCode(resultCode);
} }
@ -217,9 +210,9 @@ namespace DeepSpeechClient
/// This can be used if you no longer need the result of an ongoing streaming /// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation. /// inference and don't want to perform a costly decode operation.
/// </summary> /// </summary>
public unsafe void DiscardStream() public unsafe void FreeStream()
{ {
NativeImp.DS_DiscardStream(ref _streamingStatePP); NativeImp.DS_FreeStream(ref _streamingStatePP);
} }
/// <summary> /// <summary>

View File

@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
/// Create an object providing an interface to a trained DeepSpeech model. /// Create an object providing an interface to a trained DeepSpeech model.
/// </summary> /// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param> /// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
/// <param name="aNContext">The context window the model was trained with.</param>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param> /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param> /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
unsafe void CreateModel(string aModelPath, uint aNCep, unsafe void CreateModel(string aModelPath,
uint aNContext,
string aAlphabetConfigPath, string aAlphabetConfigPath,
uint aBeamWidth); uint aBeamWidth);
/// <summary> /// <summary>
/// Enable decoding using beam scoring with a KenLM language model. /// Enable decoding using beam scoring with a KenLM language model.
/// </summary> /// </summary>
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
/// <param name="aLMPath">The path to the language model binary file.</param> /// <param name="aLMPath">The path to the language model binary file.</param>
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param> /// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param> /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param> /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
unsafe void EnableDecoderWithLM(string aAlphabetConfigPath, unsafe void EnableDecoderWithLM(string aLMPath,
string aLMPath,
string aTriePath, string aTriePath,
float aLMAlpha, float aLMAlpha,
float aLMBeta); float aLMBeta);
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
/// This can be used if you no longer need the result of an ongoing streaming /// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation. /// inference and don't want to perform a costly decode operation.
/// </summary> /// </summary>
unsafe void DiscardStream(); unsafe void FreeStream();
/// <summary> /// <summary>
/// Free a DeepSpeech allocated string /// Free a DeepSpeech allocated string
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
/// </summary> /// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param> /// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
unsafe void SetupStream(uint aSampleRate); unsafe void CreateStream(uint aSampleRate);
/// <summary> /// <summary>
/// Feeds audio samples to an ongoing streaming inference. /// Feeds audio samples to an ongoing streaming inference.

View File

@ -17,15 +17,12 @@ namespace DeepSpeechClient
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath, internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
uint aNCep,
uint aNContext,
string aAlphabetConfigPath, string aAlphabetConfigPath,
uint aBeamWidth, uint aBeamWidth,
ref ModelState** pint); ref ModelState** pint);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx, internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
string aAlphabetConfigPath,
string aLMPath, string aLMPath,
string aTriePath, string aTriePath,
float aLMAlpha, float aLMAlpha,
@ -45,14 +42,14 @@ namespace DeepSpeechClient
uint aSampleRate); uint aSampleRate);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx); internal static unsafe extern void DS_FreeModel(ModelState** aCtx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx, internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
uint aSampleRate, ref StreamingState** retval); uint aSampleRate, ref StreamingState** retval);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx); internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata); internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);

View File

@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;
namespace DeepSpeechClient.Structs namespace DeepSpeechClient.Structs
{ {
//FIXME: ModelState is an opaque pointer to the API, why is this code reverse
// engineering its contents?
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)] [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
public unsafe struct ModelState public unsafe struct ModelState
{ {

View File

@ -50,8 +50,6 @@ namespace CSharpExamples
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
} }
const uint N_CEP = 26;
const uint N_CONTEXT = 9;
const uint BEAM_WIDTH = 500; const uint BEAM_WIDTH = 500;
const float LM_ALPHA = 0.75f; const float LM_ALPHA = 0.75f;
const float LM_BETA = 1.85f; const float LM_BETA = 1.85f;
@ -66,7 +64,6 @@ namespace CSharpExamples
stopwatch.Start(); stopwatch.Start();
sttClient.CreateModel( sttClient.CreateModel(
model ?? "output_graph.pbmm", model ?? "output_graph.pbmm",
N_CEP, N_CONTEXT,
alphabet ?? "alphabet.txt", alphabet ?? "alphabet.txt",
BEAM_WIDTH); BEAM_WIDTH);
stopwatch.Stop(); stopwatch.Stop();
@ -77,7 +74,6 @@ namespace CSharpExamples
{ {
Console.WriteLine("Loadin LM..."); Console.WriteLine("Loadin LM...");
sttClient.EnableDecoderWithLM( sttClient.EnableDecoderWithLM(
alphabet ?? "alphabet.txt",
lm ?? "lm.binary", lm ?? "lm.binary",
trie ?? "trie", trie ?? "trie",
LM_ALPHA, LM_BETA); LM_ALPHA, LM_BETA);