Use model sample rate in client.cc
This commit is contained in:
parent
0241f725cd
commit
c1ed6d711d
@ -111,7 +111,7 @@ typedef struct {
|
|||||||
} ds_audio_buffer;
|
} ds_audio_buffer;
|
||||||
|
|
||||||
ds_audio_buffer
|
ds_audio_buffer
|
||||||
GetAudioBuffer(const char* path)
|
GetAudioBuffer(const char* path, int desired_sample_rate)
|
||||||
{
|
{
|
||||||
ds_audio_buffer res = {0};
|
ds_audio_buffer res = {0};
|
||||||
|
|
||||||
@ -121,7 +121,7 @@ GetAudioBuffer(const char* path)
|
|||||||
|
|
||||||
// Resample/reformat the audio so we can pass it through the MFCC functions
|
// Resample/reformat the audio so we can pass it through the MFCC functions
|
||||||
sox_signalinfo_t target_signal = {
|
sox_signalinfo_t target_signal = {
|
||||||
16000, // Rate
|
static_cast<sox_rate_t>(desired_sample_rate), // Rate
|
||||||
1, // Channels
|
1, // Channels
|
||||||
16, // Precision
|
16, // Precision
|
||||||
SOX_UNSPEC, // Length
|
SOX_UNSPEC, // Length
|
||||||
@ -158,8 +158,10 @@ GetAudioBuffer(const char* path)
|
|||||||
|
|
||||||
assert(output);
|
assert(output);
|
||||||
|
|
||||||
if ((int)input->signal.rate < 16000) {
|
if ((int)input->signal.rate < desired_sample_rate) {
|
||||||
fprintf(stderr, "Warning: original sample rate (%d) is lower than 16kHz. Up-sampling might produce erratic speech recognition.\n", (int)input->signal.rate);
|
fprintf(stderr, "Warning: original sample rate (%d) is lower than %dkHz. "
|
||||||
|
"Up-sampling might produce erratic speech recognition.\n",
|
||||||
|
desired_sample_rate, (int)input->signal.rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup the effects chain to decode/resample
|
// Setup the effects chain to decode/resample
|
||||||
@ -205,7 +207,7 @@ GetAudioBuffer(const char* path)
|
|||||||
#endif // NO_SOX
|
#endif // NO_SOX
|
||||||
|
|
||||||
#ifdef NO_SOX
|
#ifdef NO_SOX
|
||||||
// FIXME: Hack and support only 16kHz mono 16-bits PCM
|
// FIXME: Hack and support only mono 16-bits PCM with standard SoX header
|
||||||
FILE* wave = fopen(path, "r");
|
FILE* wave = fopen(path, "r");
|
||||||
|
|
||||||
size_t rv;
|
size_t rv;
|
||||||
@ -224,12 +226,12 @@ GetAudioBuffer(const char* path)
|
|||||||
|
|
||||||
assert(audio_format == 1); // 1 is PCM
|
assert(audio_format == 1); // 1 is PCM
|
||||||
assert(num_channels == 1); // MONO
|
assert(num_channels == 1); // MONO
|
||||||
assert(sample_rate == 16000); // 16000 Hz
|
assert(sample_rate == desired_sample_rate); // at desired sample rate
|
||||||
assert(bits_per_sample == 16); // 16 bits per sample
|
assert(bits_per_sample == 16); // 16 bits per sample
|
||||||
|
|
||||||
fprintf(stderr, "audio_format=%d\n", audio_format);
|
fprintf(stderr, "audio_format=%d\n", audio_format);
|
||||||
fprintf(stderr, "num_channels=%d\n", num_channels);
|
fprintf(stderr, "num_channels=%d\n", num_channels);
|
||||||
fprintf(stderr, "sample_rate=%d\n", sample_rate);
|
fprintf(stderr, "sample_rate=%d (desired=%d)\n", sample_rate, desired_sample_rate);
|
||||||
fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
|
fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
|
||||||
|
|
||||||
fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
|
fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
|
||||||
@ -257,7 +259,7 @@ GetAudioBuffer(const char* path)
|
|||||||
void
|
void
|
||||||
ProcessFile(ModelState* context, const char* path, bool show_times)
|
ProcessFile(ModelState* context, const char* path, bool show_times)
|
||||||
{
|
{
|
||||||
ds_audio_buffer audio = GetAudioBuffer(path);
|
ds_audio_buffer audio = GetAudioBuffer(path, DS_GetModelSampleRate(context));
|
||||||
|
|
||||||
// Pass audio to DeepSpeech
|
// Pass audio to DeepSpeech
|
||||||
// We take half of buffer_size because buffer is a char* while
|
// We take half of buffer_size because buffer is a char* while
|
||||||
|
Loading…
Reference in New Issue
Block a user