diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs index e332da6d..31b1f9d4 100644 --- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs +++ b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml.cs @@ -202,15 +202,13 @@ namespace DeepSpeechWPF { _audioCapture.Device = _audioCaptureDevices[cbxAudioInputs.SelectedIndex]; } - InitilizeAudioCapture(); + InitializeAudioCapture(_sttClient.GetModelSampleRate()); } - - /// <summary> /// Initializes the recorder and setup the native stream. /// </summary> - private void InitilizeAudioCapture() + private void InitializeAudioCapture(int desiredSampleRate) { _audioCapture.Initialize(); _audioCapture.DataAvailable += _capture_DataAvailable; @@ -218,7 +216,7 @@ namespace DeepSpeechWPF //create a source, that converts the data provided by the //soundInSource to required by the deepspeech model _convertedSource = _soundInSource - .ChangeSampleRate(16000) // sample rate + .ChangeSampleRate(desiredSampleRate) // sample rate .ToSampleSource() .ToWaveSource(16); //bits per sample diff --git a/examples/nodejs_wav/index.js b/examples/nodejs_wav/index.js index 20ccb2ab..7883a010 100644 --- a/examples/nodejs_wav/index.js +++ b/examples/nodejs_wav/index.js @@ -11,6 +11,8 @@ let alphabetPath = './models/alphabet.txt'; let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH); +let desiredSampleRate = model.sampleRate(); + const LM_ALPHA = 0.75; const LM_BETA = 1.85; let lmPath = './models/lm.binary'; @@ -28,8 +30,8 @@ if (!Fs.existsSync(audioFile)) { const buffer = Fs.readFileSync(audioFile); const result = Wav.decode(buffer); -if (result.sampleRate < 16000) { - console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.'); +if (result.sampleRate < desiredSampleRate) { + console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than ' + desiredSampleRate + 'Hz. Up-sampling might produce erratic speech recognition.'); } function bufferToStream(buffer) { @@ -47,7 +49,7 @@ pipe(Sox({ }, output: { bits: 16, - rate: 16000, + rate: desiredSampleRate, channels: 1, encoding: 'signed-integer', endian: 'little', @@ -58,10 +60,9 @@ pipe(Sox({ pipe(audioStream); audioStream.on('finish', () => { - let audioBuffer = audioStream.toBuffer(); - const audioLength = (audioBuffer.length / 2) * ( 1 / 16000); + const audioLength = (audioBuffer.length / 2) * (1 / desiredSampleRate); console.log('audio length', audioLength); let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2));