Merge pull request #2591 from mozilla/revert-2548-net-streams

Revert "Multi-stream support .NET"
This commit is contained in:
Reuben Morais 2019-12-10 16:16:16 +01:00 committed by GitHub
commit 911743a0b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 99 additions and 136 deletions

View File

@ -15,13 +15,6 @@ DeepSpeech Class
:project: deepspeech-dotnet :project: deepspeech-dotnet
:members: :members:
DeepSpeechStream Class
----------------
.. doxygenclass:: DeepSpeechClient::DeepSpeechStream
:project: deepspeech-dotnet
:members:
ErrorCodes ErrorCodes
---------- ----------

View File

@ -18,20 +18,20 @@ namespace DeepSpeechWPF
const int BEAM_WIDTH = 500; const int BEAM_WIDTH = 500;
//Register instance of DeepSpeech
DeepSpeechClient.DeepSpeech deepSpeechClient = new DeepSpeechClient.DeepSpeech();
try try
{ {
//Register instance of DeepSpeech deepSpeechClient.CreateModel("output_graph.pbmm", BEAM_WIDTH);
DeepSpeechClient.DeepSpeech deepSpeechClient =
new DeepSpeechClient.DeepSpeech("output_graph.pbmm", BEAM_WIDTH);
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
SimpleIoc.Default.Register<MainWindowViewModel>();
} }
catch (System.Exception ex) catch (System.Exception ex)
{ {
MessageBox.Show(ex.Message); MessageBox.Show(ex.Message);
Current.Shutdown(); Current.Shutdown();
} }
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
SimpleIoc.Default.Register<MainWindowViewModel>();
} }
protected override void OnExit(ExitEventArgs e) protected override void OnExit(ExitEventArgs e)

View File

@ -4,7 +4,6 @@ using CSCore.CoreAudioAPI;
using CSCore.SoundIn; using CSCore.SoundIn;
using CSCore.Streams; using CSCore.Streams;
using DeepSpeechClient.Interfaces; using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using GalaSoft.MvvmLight.CommandWpf; using GalaSoft.MvvmLight.CommandWpf;
using Microsoft.Win32; using Microsoft.Win32;
using System; using System;
@ -59,12 +58,6 @@ namespace DeepSpeech.WPF.ViewModels
#endregion #endregion
#region Streaming #region Streaming
/// <summary>
/// Stream used to feed data into the acoustic model.
/// </summary>
private DeepSpeechStream _sttStream;
/// <summary> /// <summary>
/// Records the audio of the selected device. /// Records the audio of the selected device.
/// </summary> /// </summary>
@ -315,7 +308,7 @@ namespace DeepSpeech.WPF.ViewModels
if (_bufferQueue.TryDequeue(out short[] buffer)) if (_bufferQueue.TryDequeue(out short[] buffer))
{ {
StreamingIsBusy = true; StreamingIsBusy = true;
_sttClient.FeedAudioContent(_sttStream, buffer, Convert.ToUInt32(buffer.Length)); _sttClient.FeedAudioContent(buffer, Convert.ToUInt32(buffer.Length));
StreamingIsBusy = false; StreamingIsBusy = false;
} }
} }
@ -393,7 +386,7 @@ namespace DeepSpeech.WPF.ViewModels
{ {
await Task.Delay(90); await Task.Delay(90);
} }
Transcription = _sttClient.FinishStream(_sttStream); Transcription = _sttClient.FinishStream();
EnableStartRecord = true; EnableStartRecord = true;
} }
@ -402,7 +395,7 @@ namespace DeepSpeech.WPF.ViewModels
/// </summary> /// </summary>
private void StartRecording() private void StartRecording()
{ {
_sttStream =_sttClient.CreateStream(); _sttClient.CreateStream();
_audioCapture.Start(); _audioCapture.Start();
EnableStartRecord = false; EnableStartRecord = false;
EnableStopRecord = true; EnableStopRecord = true;

View File

@ -4,7 +4,6 @@ using DeepSpeechClient.Extensions;
using System; using System;
using System.IO; using System.IO;
using DeepSpeechClient.Enums; using DeepSpeechClient.Enums;
using DeepSpeechClient.Models;
namespace DeepSpeechClient namespace DeepSpeechClient
{ {
@ -14,16 +13,14 @@ namespace DeepSpeechClient
public class DeepSpeech : IDeepSpeech public class DeepSpeech : IDeepSpeech
{ {
private unsafe IntPtr** _modelStatePP; private unsafe IntPtr** _modelStatePP;
private unsafe IntPtr** _streamingStatePP;
/// <summary>
/// Initializes a new instance of <see cref="DeepSpeech"/> class and creates a new acoustic model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param> public DeepSpeech()
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
public DeepSpeech(string aModelPath, uint aBeamWidth)
{ {
CreateModel(aModelPath, aBeamWidth);
} }
#region IDeepSpeech #region IDeepSpeech
@ -34,7 +31,7 @@ namespace DeepSpeechClient
/// <param name="aModelPath">The path to the frozen model graph.</param> /// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param> /// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
private unsafe void CreateModel(string aModelPath, public unsafe void CreateModel(string aModelPath,
uint aBeamWidth) uint aBeamWidth)
{ {
string exceptionMessage = null; string exceptionMessage = null;
@ -121,19 +118,10 @@ namespace DeepSpeechClient
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param> /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param> /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
/// <exception cref="FileNotFoundException">Thrown when cannot find the language model or trie file.</exception>
public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath, public unsafe void EnableDecoderWithLM(string aLMPath, string aTriePath,
float aLMAlpha, float aLMBeta) float aLMAlpha, float aLMBeta)
{ {
string exceptionMessage = null; string exceptionMessage = null;
if (string.IsNullOrWhiteSpace(aLMPath))
{
exceptionMessage = "Path to the language model file cannot be empty.";
}
if (!File.Exists(aLMPath))
{
exceptionMessage = $"Cannot find the language model file: {aLMPath}";
}
if (string.IsNullOrWhiteSpace(aTriePath)) if (string.IsNullOrWhiteSpace(aTriePath))
{ {
exceptionMessage = "Path to the trie file cannot be empty."; exceptionMessage = "Path to the trie file cannot be empty.";
@ -159,41 +147,37 @@ namespace DeepSpeechClient
/// <summary> /// <summary>
/// Feeds audio samples to an ongoing streaming inference. /// Feeds audio samples to an ongoing streaming inference.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to feed the data.</param>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize) public unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize)
{ {
NativeImp.DS_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize); NativeImp.DS_FeedAudioContent(_streamingStatePP, aBuffer, aBufferSize);
} }
/// <summary> /// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to finish.</param> /// <returns>The STT result. The user is responsible for freeing the string.</returns>
/// <returns>The STT result.</returns> public unsafe string FinishStream()
public unsafe string FinishStream(DeepSpeechStream stream)
{ {
return NativeImp.DS_FinishStream(stream.GetNativePointer()).PtrToString(); return NativeImp.DS_FinishStream(_streamingStatePP).PtrToString();
} }
/// <summary> /// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to finish.</param> /// <returns>The extended metadata. The user is responsible for freeing the struct.</returns>
/// <returns>The extended metadata result.</returns> public unsafe Models.Metadata FinishStreamWithMetadata()
public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream)
{ {
return NativeImp.DS_FinishStreamWithMetadata(stream.GetNativePointer()).PtrToMetadata(); return NativeImp.DS_FinishStreamWithMetadata(_streamingStatePP).PtrToMetadata();
} }
/// <summary> /// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference. /// Computes the intermediate decoding of an ongoing streaming inference.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to decode.</param> /// <returns>The STT intermediate result. The user is responsible for freeing the string.</returns>
/// <returns>The STT intermediate result.</returns> public unsafe string IntermediateDecode()
public unsafe string IntermediateDecode(DeepSpeechStream stream)
{ {
return NativeImp.DS_IntermediateDecode(stream.GetNativePointer()); return NativeImp.DS_IntermediateDecode(_streamingStatePP);
} }
/// <summary> /// <summary>
@ -207,12 +191,11 @@ namespace DeepSpeechClient
/// <summary> /// <summary>
/// Creates a new streaming inference state. /// Creates a new streaming inference state.
/// </summary> /// </summary>
public unsafe DeepSpeechStream CreateStream() /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
public unsafe void CreateStream()
{ {
IntPtr** streamingStatePointer = null; var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref _streamingStatePP);
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref streamingStatePointer);
EvaluateResultCode(resultCode); EvaluateResultCode(resultCode);
return new DeepSpeechStream(streamingStatePointer);
} }
/// <summary> /// <summary>
@ -220,10 +203,25 @@ namespace DeepSpeechClient
/// This can be used if you no longer need the result of an ongoing streaming /// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation. /// inference and don't want to perform a costly decode operation.
/// </summary> /// </summary>
public unsafe void FreeStream(DeepSpeechStream stream) public unsafe void FreeStream()
{ {
NativeImp.DS_FreeStream(stream.GetNativePointer()); NativeImp.DS_FreeStream(ref _streamingStatePP);
stream.Dispose(); }
/// <summary>
/// Free a DeepSpeech allocated string
/// </summary>
public unsafe void FreeString(IntPtr intPtr)
{
NativeImp.DS_FreeString(intPtr);
}
/// <summary>
/// Free a DeepSpeech allocated Metadata struct
/// </summary>
public unsafe void FreeMetadata(IntPtr intPtr)
{
NativeImp.DS_FreeMetadata(intPtr);
} }
/// <summary> /// <summary>
@ -231,7 +229,7 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <returns>The STT result. Returns NULL on error.</returns> /// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize) public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize)
{ {
return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString(); return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString();
@ -242,8 +240,8 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <returns>The extended metadata. Returns NULL on error.</returns> /// <returns>The extended metadata. The user is responsible for freeing the struct. Returns NULL on error.</returns>
public unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize) public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize)
{ {
return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize).PtrToMetadata(); return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize).PtrToMetadata();
} }

View File

@ -48,7 +48,6 @@
<Compile Include="Enums\ErrorCodes.cs" /> <Compile Include="Enums\ErrorCodes.cs" />
<Compile Include="Interfaces\IDeepSpeech.cs" /> <Compile Include="Interfaces\IDeepSpeech.cs" />
<Compile Include="Extensions\NativeExtensions.cs" /> <Compile Include="Extensions\NativeExtensions.cs" />
<Compile Include="Models\DeepSpeechStream.cs" />
<Compile Include="Models\Metadata.cs" /> <Compile Include="Models\Metadata.cs" />
<Compile Include="Models\MetadataItem.cs" /> <Compile Include="Models\MetadataItem.cs" />
<Compile Include="NativeImp.cs" /> <Compile Include="NativeImp.cs" />

View File

@ -1,11 +1,10 @@
using DeepSpeechClient.Models; using DeepSpeechClient.Models;
using System; using System;
using System.IO;
namespace DeepSpeechClient.Interfaces namespace DeepSpeechClient.Interfaces
{ {
/// <summary> /// <summary>
/// Client interface of the Mozilla's DeepSpeech implementation. /// Client interface of the Mozilla's deepspeech implementation.
/// </summary> /// </summary>
public interface IDeepSpeech : IDisposable public interface IDeepSpeech : IDisposable
{ {
@ -14,6 +13,15 @@ namespace DeepSpeechClient.Interfaces
/// </summary> /// </summary>
void PrintVersions(); void PrintVersions();
/// <summary>
/// Create an object providing an interface to a trained DeepSpeech model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
unsafe void CreateModel(string aModelPath,
uint aBeamWidth);
/// <summary> /// <summary>
/// Return the sample rate expected by the model. /// Return the sample rate expected by the model.
/// </summary> /// </summary>
@ -28,7 +36,6 @@ namespace DeepSpeechClient.Interfaces
/// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param> /// <param name="aLMAlpha">The alpha hyperparameter of the CTC decoder. Language Model weight.</param>
/// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param> /// <param name="aLMBeta">The beta hyperparameter of the CTC decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with a language model.</exception>
/// <exception cref="FileNotFoundException">Thrown when cannot find the language model or trie file.</exception>
unsafe void EnableDecoderWithLM(string aLMPath, unsafe void EnableDecoderWithLM(string aLMPath,
string aTriePath, string aTriePath,
float aLMAlpha, float aLMAlpha,
@ -39,7 +46,7 @@ namespace DeepSpeechClient.Interfaces
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <returns>The STT result. Returns NULL on error.</returns> /// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
unsafe string SpeechToText(short[] aBuffer, unsafe string SpeechToText(short[] aBuffer,
uint aBufferSize); uint aBufferSize);
@ -48,7 +55,7 @@ namespace DeepSpeechClient.Interfaces
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <returns>The extended metadata. Returns NULL on error.</returns> /// <returns>The extended metadata result. The user is responsible for freeing the struct. Returns NULL on error.</returns>
unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
uint aBufferSize); uint aBufferSize);
@ -57,39 +64,46 @@ namespace DeepSpeechClient.Interfaces
/// This can be used if you no longer need the result of an ongoing streaming /// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation. /// inference and don't want to perform a costly decode operation.
/// </summary> /// </summary>
unsafe void FreeStream(DeepSpeechStream stream); unsafe void FreeStream();
/// <summary>
/// Free a DeepSpeech allocated string
/// </summary>
unsafe void FreeString(IntPtr intPtr);
/// <summary>
/// Free a DeepSpeech allocated Metadata struct
/// </summary>
unsafe void FreeMetadata(IntPtr intPtr);
/// <summary> /// <summary>
/// Creates a new streaming inference state. /// Creates a new streaming inference state.
/// </summary> /// </summary>
unsafe DeepSpeechStream CreateStream(); /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
unsafe void CreateStream();
/// <summary> /// <summary>
/// Feeds audio samples to an ongoing streaming inference. /// Feeds audio samples to an ongoing streaming inference.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to feed the data.</param>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param> /// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize); unsafe void FeedAudioContent(short[] aBuffer, uint aBufferSize);
/// <summary> /// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference. /// Computes the intermediate decoding of an ongoing streaming inference.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to decode.</param> /// <returns>The STT intermediate result. The user is responsible for freeing the string.</returns>
/// <returns>The STT intermediate result.</returns> unsafe string IntermediateDecode();
unsafe string IntermediateDecode(DeepSpeechStream stream);
/// <summary> /// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to finish.</param> /// <returns>The STT result. The user is responsible for freeing the string.</returns>
/// <returns>The STT result.</returns> unsafe string FinishStream();
unsafe string FinishStream(DeepSpeechStream stream);
/// <summary> /// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal. /// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary> /// </summary>
/// <param name="stream">Instance of the stream to finish.</param> /// <returns>The extended metadata result. The user is responsible for freeing the struct.</returns>
/// <returns>The extended metadata result.</returns> unsafe Metadata FinishStreamWithMetadata();
unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream);
} }
} }

View File

@ -1,35 +0,0 @@
using System;
namespace DeepSpeechClient.Models
{
/// <summary>
/// Wrapper of the pointer used for the decoding stream.
/// </summary>
public class DeepSpeechStream : IDisposable
{
private unsafe IntPtr** _streamingStatePp;
/// <summary>
/// Initializes a new instance of <see cref="DeepSpeechStream"/>.
/// </summary>
/// <param name="streamingStatePP">Native pointer of the native stream.</param>
public unsafe DeepSpeechStream(IntPtr** streamingStatePP)
{
_streamingStatePp = streamingStatePP;
}
/// <summary>
/// Gets the native pointer.
/// </summary>
/// <exception cref="InvalidOperationException">Thrown when the stream has been disposed or not yet initialized.</exception>
/// <returns>Native pointer of the stream.</returns>
internal unsafe IntPtr** GetNativePointer()
{
if (_streamingStatePp == null)
throw new InvalidOperationException("Cannot use a disposed or uninitialized stream.");
return _streamingStatePp;
}
public unsafe void Dispose() => _streamingStatePp = null;
}
}

View File

@ -48,7 +48,7 @@ namespace DeepSpeechClient
ref IntPtr** retval); ref IntPtr** retval);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeStream(IntPtr** aSctx); internal static unsafe extern void DS_FreeStream(ref IntPtr** aSctx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeMetadata(IntPtr metadata); internal static unsafe extern void DS_FreeMetadata(IntPtr metadata);

View File

@ -53,13 +53,16 @@ namespace CSharpExamples
const float LM_BETA = 1.85f; const float LM_BETA = 1.85f;
Stopwatch stopwatch = new Stopwatch(); Stopwatch stopwatch = new Stopwatch();
try
using (IDeepSpeech sttClient = new DeepSpeech())
{ {
Console.WriteLine("Loading model..."); try
stopwatch.Start();
using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm",
BEAM_WIDTH))
{ {
Console.WriteLine("Loading model...");
stopwatch.Start();
sttClient.CreateModel(
model ?? "output_graph.pbmm",
BEAM_WIDTH);
stopwatch.Stop(); stopwatch.Stop();
Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms");
@ -85,14 +88,12 @@ namespace CSharpExamples
string speechResult; string speechResult;
if (extended) if (extended)
{ {
Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
Convert.ToUInt32(waveBuffer.MaxSize / 2));
speechResult = MetadataToString(metaResult); speechResult = MetadataToString(metaResult);
} }
else else
{ {
speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
Convert.ToUInt32(waveBuffer.MaxSize / 2));
} }
stopwatch.Stop(); stopwatch.Stop();
@ -103,10 +104,10 @@ namespace CSharpExamples
} }
waveBuffer.Clear(); waveBuffer.Clear();
} }
} catch (Exception ex)
catch (Exception ex) {
{ Console.WriteLine(ex.Message);
Console.WriteLine(ex.Message); }
} }
} }
} }