Update .NET bindings and client
This commit is contained in:
parent
bc6741cd41
commit
a8c53d2154
@ -32,13 +32,11 @@ namespace DeepSpeechClient
|
|||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
|
||||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
public unsafe void CreateModel(string aModelPath, uint aNCep,
|
public unsafe void CreateModel(string aModelPath,
|
||||||
uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
|
string aAlphabetConfigPath, uint aBeamWidth)
|
||||||
{
|
{
|
||||||
string exceptionMessage = null;
|
string exceptionMessage = null;
|
||||||
if (string.IsNullOrWhiteSpace(aModelPath))
|
if (string.IsNullOrWhiteSpace(aModelPath))
|
||||||
@ -63,8 +61,6 @@ namespace DeepSpeechClient
|
|||||||
throw new FileNotFoundException(exceptionMessage);
|
throw new FileNotFoundException(exceptionMessage);
|
||||||
}
|
}
|
||||||
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
var resultCode = NativeImp.DS_CreateModel(aModelPath,
|
||||||
aNCep,
|
|
||||||
aNContext,
|
|
||||||
aAlphabetConfigPath,
|
aAlphabetConfigPath,
|
||||||
aBeamWidth,
|
aBeamWidth,
|
||||||
ref _modelStatePP);
|
ref _modelStatePP);
|
||||||
@ -116,20 +112,18 @@ namespace DeepSpeechClient
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public unsafe void Dispose()
|
public unsafe void Dispose()
|
||||||
{
|
{
|
||||||
NativeImp.DS_DestroyModel(_modelStatePP);
|
NativeImp.DS_FreeModel(_modelStatePP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Enable decoding using beam scoring with a KenLM language model.
|
/// Enable decoding using beam scoring with a KenLM language model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||||
public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
|
||||||
string aLMPath, string aTriePath,
|
|
||||||
float aLMAlpha, float aLMBeta)
|
float aLMAlpha, float aLMBeta)
|
||||||
{
|
{
|
||||||
string exceptionMessage = null;
|
string exceptionMessage = null;
|
||||||
@ -148,7 +142,6 @@ namespace DeepSpeechClient
|
|||||||
}
|
}
|
||||||
|
|
||||||
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
|
var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
|
||||||
aAlphabetConfigPath,
|
|
||||||
aLMPath,
|
aLMPath,
|
||||||
aTriePath,
|
aTriePath,
|
||||||
aLMAlpha,
|
aLMAlpha,
|
||||||
@ -206,9 +199,9 @@ namespace DeepSpeechClient
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
public unsafe void SetupStream(uint aSampleRate)
|
public unsafe void CreateStream(uint aSampleRate)
|
||||||
{
|
{
|
||||||
var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
|
||||||
EvaluateResultCode(resultCode);
|
EvaluateResultCode(resultCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,9 +210,9 @@ namespace DeepSpeechClient
|
|||||||
/// This can be used if you no longer need the result of an ongoing streaming
|
/// This can be used if you no longer need the result of an ongoing streaming
|
||||||
/// inference and don't want to perform a costly decode operation.
|
/// inference and don't want to perform a costly decode operation.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public unsafe void DiscardStream()
|
public unsafe void FreeStream()
|
||||||
{
|
{
|
||||||
NativeImp.DS_DiscardStream(ref _streamingStatePP);
|
NativeImp.DS_FreeStream(ref _streamingStatePP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|||||||
@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
|
|||||||
/// Create an object providing an interface to a trained DeepSpeech model.
|
/// Create an object providing an interface to a trained DeepSpeech model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
/// <param name="aModelPath">The path to the frozen model graph.</param>
|
||||||
/// <param name="aNCep">The number of cepstrum the model was trained with.</param>
|
|
||||||
/// <param name="aNContext">The context window the model was trained with.</param>
|
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
||||||
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
|
||||||
unsafe void CreateModel(string aModelPath, uint aNCep,
|
unsafe void CreateModel(string aModelPath,
|
||||||
uint aNContext,
|
|
||||||
string aAlphabetConfigPath,
|
string aAlphabetConfigPath,
|
||||||
uint aBeamWidth);
|
uint aBeamWidth);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Enable decoding using beam scoring with a KenLM language model.
|
/// Enable decoding using beam scoring with a KenLM language model.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
|
|
||||||
/// <param name="aLMPath">The path to the language model binary file.</param>
|
/// <param name="aLMPath">The path to the language model binary file.</param>
|
||||||
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
/// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
|
||||||
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
|
||||||
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
|
||||||
unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
|
unsafe void EnableDecoderWithLM(string aLMPath,
|
||||||
string aLMPath,
|
|
||||||
string aTriePath,
|
string aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
float aLMBeta);
|
float aLMBeta);
|
||||||
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
|
|||||||
/// This can be used if you no longer need the result of an ongoing streaming
|
/// This can be used if you no longer need the result of an ongoing streaming
|
||||||
/// inference and don't want to perform a costly decode operation.
|
/// inference and don't want to perform a costly decode operation.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
unsafe void DiscardStream();
|
unsafe void FreeStream();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Free a DeepSpeech allocated string
|
/// Free a DeepSpeech allocated string
|
||||||
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
|
||||||
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
|
||||||
unsafe void SetupStream(uint aSampleRate);
|
unsafe void CreateStream(uint aSampleRate);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Feeds audio samples to an ongoing streaming inference.
|
/// Feeds audio samples to an ongoing streaming inference.
|
||||||
|
|||||||
@ -17,15 +17,12 @@ namespace DeepSpeechClient
|
|||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
|
||||||
uint aNCep,
|
|
||||||
uint aNContext,
|
|
||||||
string aAlphabetConfigPath,
|
string aAlphabetConfigPath,
|
||||||
uint aBeamWidth,
|
uint aBeamWidth,
|
||||||
ref ModelState** pint);
|
ref ModelState** pint);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
|
internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
|
||||||
string aAlphabetConfigPath,
|
|
||||||
string aLMPath,
|
string aLMPath,
|
||||||
string aTriePath,
|
string aTriePath,
|
||||||
float aLMAlpha,
|
float aLMAlpha,
|
||||||
@ -45,14 +42,14 @@ namespace DeepSpeechClient
|
|||||||
uint aSampleRate);
|
uint aSampleRate);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
|
internal static unsafe extern void DS_FreeModel(ModelState** aCtx);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
|
internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
|
||||||
uint aSampleRate, ref StreamingState** retval);
|
uint aSampleRate, ref StreamingState** retval);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx);
|
internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);
|
||||||
|
|
||||||
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
|
||||||
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
|
||||||
|
|||||||
@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;
|
|||||||
|
|
||||||
namespace DeepSpeechClient.Structs
|
namespace DeepSpeechClient.Structs
|
||||||
{
|
{
|
||||||
|
//FIXME: ModelState is an opaque pointer to the API, why is this code reverse
|
||||||
|
// engineering its contents?
|
||||||
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
|
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
|
||||||
public unsafe struct ModelState
|
public unsafe struct ModelState
|
||||||
{
|
{
|
||||||
|
|||||||
@ -50,8 +50,6 @@ namespace CSharpExamples
|
|||||||
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
|
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint N_CEP = 26;
|
|
||||||
const uint N_CONTEXT = 9;
|
|
||||||
const uint BEAM_WIDTH = 500;
|
const uint BEAM_WIDTH = 500;
|
||||||
const float LM_ALPHA = 0.75f;
|
const float LM_ALPHA = 0.75f;
|
||||||
const float LM_BETA = 1.85f;
|
const float LM_BETA = 1.85f;
|
||||||
@ -66,7 +64,6 @@ namespace CSharpExamples
|
|||||||
stopwatch.Start();
|
stopwatch.Start();
|
||||||
sttClient.CreateModel(
|
sttClient.CreateModel(
|
||||||
model ?? "output_graph.pbmm",
|
model ?? "output_graph.pbmm",
|
||||||
N_CEP, N_CONTEXT,
|
|
||||||
alphabet ?? "alphabet.txt",
|
alphabet ?? "alphabet.txt",
|
||||||
BEAM_WIDTH);
|
BEAM_WIDTH);
|
||||||
stopwatch.Stop();
|
stopwatch.Stop();
|
||||||
@ -77,7 +74,6 @@ namespace CSharpExamples
|
|||||||
{
|
{
|
||||||
Console.WriteLine("Loadin LM...");
|
Console.WriteLine("Loadin LM...");
|
||||||
sttClient.EnableDecoderWithLM(
|
sttClient.EnableDecoderWithLM(
|
||||||
alphabet ?? "alphabet.txt",
|
|
||||||
lm ?? "lm.binary",
|
lm ?? "lm.binary",
|
||||||
trie ?? "trie",
|
trie ?? "trie",
|
||||||
LM_ALPHA, LM_BETA);
|
LM_ALPHA, LM_BETA);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user