Merge pull request #2181 from eggonlea/streaming
Add option to native client binary to print intermediate transcriptions while streaming the input file
This commit is contained in:
commit
e136b5299a
@ -30,6 +30,8 @@ bool extended_metadata = false;
|
|||||||
|
|
||||||
bool json_output = false;
|
bool json_output = false;
|
||||||
|
|
||||||
|
int stream_size = 0;
|
||||||
|
|
||||||
void PrintHelp(const char* bin)
|
void PrintHelp(const char* bin)
|
||||||
{
|
{
|
||||||
std::cout <<
|
std::cout <<
|
||||||
@ -45,6 +47,7 @@ void PrintHelp(const char* bin)
|
|||||||
" -t Run in benchmark mode, output mfcc & inference time\n"
|
" -t Run in benchmark mode, output mfcc & inference time\n"
|
||||||
" --extended Output string from extended metadata\n"
|
" --extended Output string from extended metadata\n"
|
||||||
" --json Extended output, shows word timings as JSON\n"
|
" --json Extended output, shows word timings as JSON\n"
|
||||||
|
" --stream size Run in stream mode, output intermediate results\n"
|
||||||
" --help Show help\n"
|
" --help Show help\n"
|
||||||
" --version Print version and exits\n";
|
" --version Print version and exits\n";
|
||||||
DS_PrintVersions();
|
DS_PrintVersions();
|
||||||
@ -64,6 +67,7 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
{"t", no_argument, nullptr, 't'},
|
{"t", no_argument, nullptr, 't'},
|
||||||
{"extended", no_argument, nullptr, 'e'},
|
{"extended", no_argument, nullptr, 'e'},
|
||||||
{"json", no_argument, nullptr, 'j'},
|
{"json", no_argument, nullptr, 'j'},
|
||||||
|
{"stream", required_argument, nullptr, 's'},
|
||||||
{"help", no_argument, nullptr, 'h'},
|
{"help", no_argument, nullptr, 'h'},
|
||||||
{"version", no_argument, nullptr, 'v'},
|
{"version", no_argument, nullptr, 'v'},
|
||||||
{nullptr, no_argument, nullptr, 0}
|
{nullptr, no_argument, nullptr, 0}
|
||||||
@ -118,6 +122,10 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
json_output = true;
|
json_output = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 's':
|
||||||
|
stream_size = atoi(optarg);
|
||||||
|
break;
|
||||||
|
|
||||||
case 'h': // -h or --help
|
case 'h': // -h or --help
|
||||||
case '?': // Unrecognized option
|
case '?': // Unrecognized option
|
||||||
default:
|
default:
|
||||||
@ -136,6 +144,12 @@ bool ProcessArgs(int argc, char** argv)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stream_size < 0 || stream_size % 160 != 0) {
|
||||||
|
std::cout <<
|
||||||
|
"Stream buffer size must be multiples of 160\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,6 +70,31 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
|||||||
Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
|
Metadata *metadata = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, aSampleRate);
|
||||||
res.string = JSONOutput(metadata);
|
res.string = JSONOutput(metadata);
|
||||||
DS_FreeMetadata(metadata);
|
DS_FreeMetadata(metadata);
|
||||||
|
} else if (stream_size > 0) {
|
||||||
|
StreamingState* ctx;
|
||||||
|
int status = DS_SetupStream(aCtx, 0, aSampleRate, &ctx);
|
||||||
|
if (status != DS_ERR_OK) {
|
||||||
|
res.string = strdup("");
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
size_t off = 0;
|
||||||
|
const char *last = nullptr;
|
||||||
|
while (off < aBufferSize) {
|
||||||
|
size_t cur = aBufferSize - off > stream_size ? stream_size : aBufferSize - off;
|
||||||
|
DS_FeedAudioContent(ctx, aBuffer + off, cur);
|
||||||
|
off += cur;
|
||||||
|
const char* partial = DS_IntermediateDecode(ctx);
|
||||||
|
if (last == nullptr || strcmp(last, partial)) {
|
||||||
|
printf("%s\n", partial);
|
||||||
|
last = partial;
|
||||||
|
} else {
|
||||||
|
DS_FreeString((char *) partial);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (last != nullptr) {
|
||||||
|
DS_FreeString((char *) last);
|
||||||
|
}
|
||||||
|
res.string = DS_FinishStream(ctx);
|
||||||
} else {
|
} else {
|
||||||
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize, aSampleRate);
|
res.string = DS_SpeechToText(aCtx, aBuffer, aBufferSize, aSampleRate);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user