Expose and use model sample rate in JavaScript

This commit is contained in:
Reuben Morais 2019-10-10 21:55:08 +02:00
parent afea2b4231
commit 0be2787e4e
2 changed files with 31 additions and 18 deletions

View File

@ -62,11 +62,29 @@ function metadataToString(metadata) {
return retval; return retval;
} }
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));
var desired_sample_rate = model.sampleRate();
if (args['lm'] && args['trie']) {
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
const lm_load_start = process.hrtime();
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
const lm_load_end = process.hrtime(lm_load_start);
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
}
const buffer = Fs.readFileSync(args['audio']); const buffer = Fs.readFileSync(args['audio']);
const result = Wav.decode(buffer); const result = Wav.decode(buffer);
if (result.sampleRate < 16000) { if (result.sampleRate < desired_sample_rate) {
console.error('Warning: original sample rate (' + result.sampleRate + ') is lower than 16kHz. Up-sampling might produce erratic speech recognition.'); console.error('Warning: original sample rate (' + result.sampleRate + ') ' +
'is lower than ' + desired_sample_rate + 'Hz. ' +
'Up-sampling might produce erratic speech recognition.');
} }
function bufferToStream(buffer) { function bufferToStream(buffer) {
@ -84,7 +102,7 @@ bufferToStream(buffer).
}, },
output: { output: {
bits: 16, bits: 16,
rate: 16000, rate: desired_sample_rate,
channels: 1, channels: 1,
encoding: 'signed-integer', encoding: 'signed-integer',
endian: 'little', endian: 'little',
@ -97,23 +115,9 @@ bufferToStream(buffer).
audioStream.on('finish', () => { audioStream.on('finish', () => {
let audioBuffer = audioStream.toBuffer(); let audioBuffer = audioStream.toBuffer();
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
var model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));
if (args['lm'] && args['trie']) {
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
const lm_load_start = process.hrtime();
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
const lm_load_end = process.hrtime(lm_load_start);
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
}
const inference_start = process.hrtime(); const inference_start = process.hrtime();
console.error('Running inference.'); console.error('Running inference.');
const audioLength = (audioBuffer.length / 2) * ( 1 / 16000); const audioLength = (audioBuffer.length / 2) * (1 / desired_sample_rate);
// We take half of the buffer_size because buffer is a char* while // We take half of the buffer_size because buffer is a char* while
// LocalDsSTT() expected a short* // LocalDsSTT() expected a short*

View File

@ -45,6 +45,15 @@ function Model() {
this._impl = impl; this._impl = impl;
} }
/**
* Return the sample rate expected by the model.
*
* @return {number} Sample rate.
*/
Model.prototype.sampleRate = function() {
return binding.GetModelSampleRate(this._impl);
}
/** /**
* Enable decoding using beam scoring with a KenLM language model. * Enable decoding using beam scoring with a KenLM language model.
* *