Remove sample rate parameter usage from .NET binding

This commit is contained in:
Reuben Morais 2019-10-09 16:55:00 +02:00
parent 1007d93da2
commit 11ad23cc1f
4 changed files with 14 additions and 24 deletions

View File

@ -193,11 +193,10 @@ namespace DeepSpeechClient
/// <summary> /// <summary>
/// Creates a new streaming inference state. /// Creates a new streaming inference state.
/// </summary> /// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
public unsafe void CreateStream(uint aSampleRate) public unsafe void CreateStream()
{ {
var resultCode = NativeImp.DS_CreateStream(_modelStatePP, aSampleRate, ref _streamingStatePP); var resultCode = NativeImp.DS_CreateStream(_modelStatePP, ref _streamingStatePP);
EvaluateResultCode(resultCode); EvaluateResultCode(resultCode);
} }
@ -232,11 +231,10 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
/// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns> /// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize, uint aSampleRate) public unsafe string SpeechToText(short[] aBuffer, uint aBufferSize)
{ {
return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize, aSampleRate).PtrToString(); return NativeImp.DS_SpeechToText(_modelStatePP, aBuffer, aBufferSize).PtrToString();
} }
/// <summary> /// <summary>
@ -244,11 +242,10 @@ namespace DeepSpeechClient
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
/// <returns>The extended metadata. The user is responsible for freeing the struct. Returns NULL on error.</returns> /// <returns>The extended metadata. The user is responsible for freeing the struct. Returns NULL on error.</returns>
public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize, uint aSampleRate) public unsafe Models.Metadata SpeechToTextWithMetadata(short[] aBuffer, uint aBufferSize)
{ {
return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize, aSampleRate).PtrToMetadata(); return NativeImp.DS_SpeechToTextWithMetadata(_modelStatePP, aBuffer, aBufferSize).PtrToMetadata();
} }
#endregion #endregion

View File

@ -42,22 +42,18 @@ namespace DeepSpeechClient.Interfaces
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
/// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns> /// <returns>The STT result. The user is responsible for freeing the string. Returns NULL on error.</returns>
unsafe string SpeechToText(short[] aBuffer, unsafe string SpeechToText(short[] aBuffer,
uint aBufferSize, uint aBufferSize);
uint aSampleRate);
/// <summary> /// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text. /// Use the DeepSpeech model to perform Speech-To-Text.
/// </summary> /// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param> /// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate.</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param> /// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aSampleRate">The sample-rate of the audio signal.</param>
/// <returns>The extended metadata result. The user is responsible for freeing the struct. Returns NULL on error.</returns> /// <returns>The extended metadata result. The user is responsible for freeing the struct. Returns NULL on error.</returns>
unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer, unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
uint aBufferSize, uint aBufferSize);
uint aSampleRate);
/// <summary> /// <summary>
/// Destroy a streaming state without decoding the computed logits. /// Destroy a streaming state without decoding the computed logits.
@ -79,9 +75,8 @@ namespace DeepSpeechClient.Interfaces
/// <summary> /// <summary>
/// Creates a new streaming inference state. /// Creates a new streaming inference state.
/// </summary> /// </summary>
/// <param name="aSampleRate">The sample-rate of the audio signal</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception> /// <exception cref="ArgumentException">Thrown when the native binary failed to initialize the streaming mode.</exception>
unsafe void CreateStream(uint aSampleRate); unsafe void CreateStream();
/// <summary> /// <summary>
/// Feeds audio samples to an ongoing streaming inference. /// Feeds audio samples to an ongoing streaming inference.

View File

@ -31,21 +31,19 @@ namespace DeepSpeechClient
CharSet = CharSet.Ansi, SetLastError = true)] CharSet = CharSet.Ansi, SetLastError = true)]
internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx, internal static unsafe extern IntPtr DS_SpeechToText(IntPtr** aCtx,
short[] aBuffer, short[] aBuffer,
uint aBufferSize, uint aBufferSize);
uint aSampleRate);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl, SetLastError = true)]
internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx, internal static unsafe extern IntPtr DS_SpeechToTextWithMetadata(IntPtr** aCtx,
short[] aBuffer, short[] aBuffer,
uint aBufferSize, uint aBufferSize);
uint aSampleRate);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeModel(IntPtr** aCtx); internal static unsafe extern void DS_FreeModel(IntPtr** aCtx);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx, internal static unsafe extern ErrorCodes DS_CreateStream(IntPtr** aCtx,
uint aSampleRate, ref IntPtr** retval); ref IntPtr** retval);
[DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)] [DllImport("libdeepspeech.so", CallingConvention = CallingConvention.Cdecl)]
internal static unsafe extern void DS_FreeStream(ref IntPtr** aSctx); internal static unsafe extern void DS_FreeStream(ref IntPtr** aSctx);

View File

@ -91,12 +91,12 @@ namespace CSharpExamples
string speechResult; string speechResult;
if (extended) if (extended)
{ {
Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
speechResult = MetadataToString(metaResult); speechResult = MetadataToString(metaResult);
} }
else else
{ {
speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2));
} }
stopwatch.Stop(); stopwatch.Stop();