Update .NET bindings and client

2019-09-09 11:54:53 +02:00 · 2019-09-09 11:54:53 +02:00 · a8c53d2154
commit a8c53d2154
parent bc6741cd41
5 changed files with 17 additions and 34 deletions
--- a/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/DeepSpeech.cs
@ -32,13 +32,11 @@ namespace DeepSpeechClient
        /// Create an object providing an interface to a trained DeepSpeech model.
        /// </summary>
        /// <param name="aModelPath">The path to the frozen model graph.</param>
-        /// <param name="aNCep">The number of cepstrum the model was trained with.</param>
-        /// <param name="aNContext">The context window the model was trained with.</param>
        /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
        /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
-        public unsafe void CreateModel(string aModelPath, uint aNCep,
-            uint aNContext, string aAlphabetConfigPath, uint aBeamWidth)
+        public unsafe void CreateModel(string aModelPath,
+            string aAlphabetConfigPath, uint aBeamWidth)
        {
            string exceptionMessage = null;
            if (string.IsNullOrWhiteSpace(aModelPath))
@ -63,8 +61,6 @@ namespace DeepSpeechClient
                throw new FileNotFoundException(exceptionMessage);
            }
            var resultCode = NativeImp.DS_CreateModel(aModelPath,
-                            aNCep,
-                            aNContext,
                            aAlphabetConfigPath,
                            aBeamWidth,
                            ref _modelStatePP);
@ -116,20 +112,18 @@ namespace DeepSpeechClient
        /// </summary>
        public unsafe void Dispose()
        {
-            NativeImp.DS_DestroyModel(_modelStatePP);
+            NativeImp.DS_FreeModel(_modelStatePP);
        }

        /// <summary>
        /// Enable decoding using beam scoring with a KenLM language model.
        /// </summary>
-        /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
        /// <param name="aLMPath">The path to the language model binary file.</param>
        /// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
        /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
        /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
-        public unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
-            string aLMPath, string aTriePath,
+        public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
            float aLMAlpha, float aLMBeta)
        {
            string exceptionMessage = null;
@ -148,7 +142,6 @@ namespace DeepSpeechClient
            }

            var resultCode = NativeImp.DS_EnableDecoderWithLM(_modelStatePP,
-                            aAlphabetConfigPath,
                            aLMPath,
                            aTriePath,
                            aLMAlpha,
@ -206,9 +199,9 @@ namespace DeepSpeechClient
        /// </summary>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        public unsafe void SetupStream(uint aSampleRate)
+        public unsafe void CreateStream(uint aSampleRate)
        {
-            var resultCode = NativeImp.DS_SetupStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
+            var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP);
            EvaluateResultCode(resultCode);
        }

@ -217,9 +210,9 @@ namespace DeepSpeechClient
        /// This can be used if you no longer need the result of an ongoing streaming
        /// inference and don't want to perform a costly decode operation.
        /// </summary>
-        public unsafe void DiscardStream()
+        public unsafe void FreeStream()
        {
-            NativeImp.DS_DiscardStream(ref _streamingStatePP);
+            NativeImp.DS_FreeStream(ref _streamingStatePP);
        }

        /// <summary>
--- a/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
+++ b/native_client/dotnet/DeepSpeechClient/Interfaces/IDeepSpeech.cs
@ -17,27 +17,22 @@ namespace DeepSpeechClient.Interfaces
        /// Create an object providing an interface to a trained DeepSpeech model.
        /// </summary>
        /// <param name="aModelPath">The path to the frozen model graph.</param>
-        /// <param name="aNCep">The number of cepstrum the model was trained with.</param>
-        /// <param name="aNContext">The context window the model was trained with.</param>
        /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
        /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
-        unsafe void CreateModel(string aModelPath, uint aNCep,
-                   uint aNContext,
+        unsafe void CreateModel(string aModelPath,
                   string aAlphabetConfigPath,
                   uint aBeamWidth);

        /// <summary>
        /// Enable decoding using beam scoring with a KenLM language model.
        /// </summary>
-        /// <param name="aAlphabetConfigPath">The path to the configuration file specifying the alphabet used by the network.</param>
        /// <param name="aLMPath">The path to the language model binary file.</param>
        /// <param name="aTriePath">The path to the trie file build from the same vocabulary as the language model binary.</param>
        /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
        /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
-        unsafe void EnableDecoderWithLM(string aAlphabetConfigPath,
-                  string aLMPath,
+        unsafe void EnableDecoderWithLM(string aLMPath,
                  string aTriePath,
                  float aLMAlpha,
                  float aLMBeta);
@ -69,7 +64,7 @@ namespace DeepSpeechClient.Interfaces
        /// This can be used if you no longer need the result of an ongoing streaming
        /// inference and don't want to perform a costly decode operation.
        /// </summary>
-        unsafe void DiscardStream();
+        unsafe void FreeStream();

        /// <summary>
        /// Free a DeepSpeech allocated string
@ -86,7 +81,7 @@ namespace DeepSpeechClient.Interfaces
        /// </summary>
        /// <param name="aSampleRate">The sample-rate of the audio signal</param>
        /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
-        unsafe void SetupStream(uint aSampleRate);
+        unsafe void CreateStream(uint aSampleRate);

        /// <summary>
        /// Feeds audio samples to an ongoing streaming inference.
--- a/native_client/dotnet/DeepSpeechClient/NativeImp.cs
+++ b/native_client/dotnet/DeepSpeechClient/NativeImp.cs
@ -17,15 +17,12 @@ namespace DeepSpeechClient

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal unsafe static extern ErrorCodes DS_CreateModel(string aModelPath,
-                   uint aNCep,
-                   uint aNContext,
                   string aAlphabetConfigPath,
                   uint aBeamWidth,
                   ref ModelState** pint);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern ErrorCodes DS_EnableDecoderWithLM(ModelState** aCtx,
-                  string aAlphabetConfigPath,
                  string aLMPath,
                  string aTriePath,
                  float aLMAlpha,
@ -45,14 +42,14 @@ namespace DeepSpeechClient
                uint aSampleRate);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
-        internal static unsafe extern void DS_DestroyModel(ModelState** aCtx);
+        internal static unsafe extern void DS_FreeModel(ModelState** aCtx);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
-        internal static unsafe extern ErrorCodes DS_SetupStream(ModelState** aCtx,
+        internal static unsafe extern ErrorCodes DS_CreateStream(ModelState** aCtx,
               uint aSampleRate, ref StreamingState** retval);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
-        internal static unsafe extern void DS_DiscardStream(ref StreamingState** aSctx);
+        internal static unsafe extern void DS_FreeStream(ref StreamingState** aSctx);

        [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
        internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);
--- a/native_client/dotnet/DeepSpeechClient/Structs/ModelState.cs
+++ b/native_client/dotnet/DeepSpeechClient/Structs/ModelState.cs
@ -7,6 +7,8 @@ using GraphDef = System.IntPtr;

 namespace DeepSpeechClient.Structs
 {
+    //FIXME: ModelState is an opaque pointer to the API, why is this code reverse
+    // engineering its contents?
    [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
    public unsafe struct ModelState
    {
--- a/native_client/dotnet/DeepSpeechConsole/Program.cs
+++ b/native_client/dotnet/DeepSpeechConsole/Program.cs
@ -50,8 +50,6 @@ namespace CSharpExamples
                extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended"));
            }

-            const uint N_CEP = 26;
-            const uint N_CONTEXT = 9;
            const uint BEAM_WIDTH = 500;
            const float LM_ALPHA = 0.75f;
            const float LM_BETA = 1.85f;
@ -66,7 +64,6 @@ namespace CSharpExamples
                    stopwatch.Start();
                    sttClient.CreateModel(
                        model ?? "output_graph.pbmm",
-                        N_CEP, N_CONTEXT,
                        alphabet ?? "alphabet.txt",
                        BEAM_WIDTH);
                    stopwatch.Stop();
@ -77,7 +74,6 @@ namespace CSharpExamples
                    {
                        Console.WriteLine("Loadin LM...");
                        sttClient.EnableDecoderWithLM(
-                            alphabet ?? "alphabet.txt",
                            lm ?? "lm.binary",
                            trie ?? "trie",
                            LM_ALPHA, LM_BETA);