Adjust client.cc for new API and small cleanup of code and function names
This commit is contained in:
parent
ea8c7d2957
commit
c74dcffe79
@ -44,10 +44,115 @@ struct meta_word {
|
||||
float duration;
|
||||
};
|
||||
|
||||
char* metadataToString(Metadata* metadata);
|
||||
std::vector<meta_word> WordsFromMetadata(Metadata* metadata);
|
||||
char* JSONOutput(Result* result);
|
||||
std::string MetadataOutput(Metadata* metadata);
|
||||
char*
|
||||
CandidateTranscriptToString(CandidateTranscript* transcript)
|
||||
{
|
||||
std::string retval = "";
|
||||
for (int i = 0; i < transcript->num_tokens; i++) {
|
||||
TokenMetadata token = transcript->tokens[i];
|
||||
retval += token.text;
|
||||
}
|
||||
return strdup(retval.c_str());
|
||||
}
|
||||
|
||||
std::vector<meta_word>
|
||||
CandidateTranscriptToWords(CandidateTranscript* transcript)
|
||||
{
|
||||
std::vector<meta_word> word_list;
|
||||
|
||||
std::string word = "";
|
||||
float word_start_time = 0;
|
||||
|
||||
// Loop through each token
|
||||
for (int i = 0; i < transcript->num_tokens; i++) {
|
||||
TokenMetadata token = transcript->tokens[i];
|
||||
|
||||
// Append token to word if it's not a space
|
||||
if (strcmp(token.text, u8" ") != 0) {
|
||||
// Log the start time of the new word
|
||||
if (word.length() == 0) {
|
||||
word_start_time = token.start_time;
|
||||
}
|
||||
word.append(token.text);
|
||||
}
|
||||
|
||||
// Word boundary is either a space or the last token in the array
|
||||
if (strcmp(token.text, u8" ") == 0 || i == transcript->num_tokens-1) {
|
||||
float word_duration = token.start_time - word_start_time;
|
||||
|
||||
if (word_duration < 0) {
|
||||
word_duration = 0;
|
||||
}
|
||||
|
||||
meta_word w;
|
||||
w.word = word;
|
||||
w.start_time = word_start_time;
|
||||
w.duration = word_duration;
|
||||
|
||||
word_list.push_back(w);
|
||||
|
||||
// Reset
|
||||
word = "";
|
||||
word_start_time = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return word_list;
|
||||
}
|
||||
|
||||
std::string
|
||||
CandidateTranscriptToJSON(CandidateTranscript *transcript)
|
||||
{
|
||||
std::ostringstream out_string;
|
||||
|
||||
std::vector<meta_word> words = CandidateTranscriptToWords(transcript);
|
||||
|
||||
out_string << R"("metadata":{"confidence":)" << transcript->confidence << R"(},"words":[)";
|
||||
|
||||
for (int i = 0; i < words.size(); i++) {
|
||||
meta_word w = words[i];
|
||||
out_string << R"({"word":")" << w.word << R"(","time":)" << w.start_time << R"(,"duration":)" << w.duration << "}";
|
||||
|
||||
if (i < words.size() - 1) {
|
||||
out_string << ",";
|
||||
}
|
||||
}
|
||||
|
||||
out_string << "]";
|
||||
|
||||
return out_string.str();
|
||||
}
|
||||
|
||||
char*
|
||||
MetadataToJSON(Metadata* result)
|
||||
{
|
||||
std::ostringstream out_string;
|
||||
out_string << "{\n";
|
||||
|
||||
for (int j=0; j < result->num_transcripts; ++j) {
|
||||
CandidateTranscript *transcript = &result->transcripts[j];
|
||||
|
||||
if (j == 0) {
|
||||
out_string << CandidateTranscriptToJSON(transcript);
|
||||
|
||||
if (result->num_transcripts > 1) {
|
||||
out_string << ",\n" << R"("alternatives")" << ":[\n";
|
||||
}
|
||||
} else {
|
||||
out_string << "{" << CandidateTranscriptToJSON(transcript) << "}";
|
||||
|
||||
if (j < result->num_transcripts - 1) {
|
||||
out_string << ",\n";
|
||||
} else {
|
||||
out_string << "\n]";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out_string << "\n}\n";
|
||||
|
||||
return strdup(out_string.str().c_str());
|
||||
}
|
||||
|
||||
ds_result
|
||||
LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
||||
@ -58,13 +163,13 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize,
|
||||
clock_t ds_start_time = clock();
|
||||
|
||||
if (extended_output) {
|
||||
Result *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1);
|
||||
res.string = metadataToString(&result->transcriptions[0]);
|
||||
DS_FreeResult(result);
|
||||
Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1);
|
||||
res.string = CandidateTranscriptToString(&result->transcripts[0]);
|
||||
DS_FreeMetadata(result);
|
||||
} else if (json_output) {
|
||||
Result *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 3);
|
||||
res.string = JSONOutput(result);
|
||||
DS_FreeResult(result);
|
||||
Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 3);
|
||||
res.string = MetadataToJSON(result);
|
||||
DS_FreeMetadata(result);
|
||||
} else if (stream_size > 0) {
|
||||
StreamingState* ctx;
|
||||
int status = DS_CreateStream(aCtx, &ctx);
|
||||
@ -279,119 +384,6 @@ ProcessFile(ModelState* context, const char* path, bool show_times)
|
||||
}
|
||||
}
|
||||
|
||||
char*
|
||||
metadataToString(Metadata* metadata)
|
||||
{
|
||||
std::string retval = "";
|
||||
for (int i = 0; i < metadata->num_items; i++) {
|
||||
MetadataItem item = metadata->items[i];
|
||||
retval += item.character;
|
||||
}
|
||||
return strdup(retval.c_str());
|
||||
}
|
||||
|
||||
std::vector<meta_word>
|
||||
WordsFromMetadata(Metadata* metadata)
|
||||
{
|
||||
std::vector<meta_word> word_list;
|
||||
|
||||
std::string word = "";
|
||||
float word_start_time = 0;
|
||||
|
||||
// Loop through each character
|
||||
for (int i = 0; i < metadata->num_items; i++) {
|
||||
MetadataItem item = metadata->items[i];
|
||||
|
||||
// Append character to word if it's not a space
|
||||
if (strcmp(item.character, u8" ") != 0) {
|
||||
// Log the start time of the new word
|
||||
if (word.length() == 0) {
|
||||
word_start_time = item.start_time;
|
||||
}
|
||||
word.append(item.character);
|
||||
}
|
||||
|
||||
// Word boundary is either a space or the last character in the array
|
||||
if (strcmp(item.character, " ") == 0
|
||||
|| strcmp(item.character, u8" ") == 0
|
||||
|| i == metadata->num_items-1) {
|
||||
|
||||
float word_duration = item.start_time - word_start_time;
|
||||
|
||||
if (word_duration < 0) {
|
||||
word_duration = 0;
|
||||
}
|
||||
|
||||
meta_word w;
|
||||
w.word = word;
|
||||
w.start_time = word_start_time;
|
||||
w.duration = word_duration;
|
||||
|
||||
word_list.push_back(w);
|
||||
|
||||
// Reset
|
||||
word = "";
|
||||
word_start_time = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return word_list;
|
||||
}
|
||||
|
||||
char*
|
||||
JSONOutput(Result* result)
|
||||
{
|
||||
std::ostringstream out_string;
|
||||
out_string << "{\n";
|
||||
|
||||
for (int j=0; j < result->num_transcriptions; ++j) {
|
||||
Metadata *metadata = &result->transcriptions[j];
|
||||
|
||||
if (j == 0) {
|
||||
out_string << MetadataOutput(metadata);
|
||||
|
||||
if (result->num_transcriptions > 1) {
|
||||
out_string << ",\n" << R"("alternatives")" << ":[\n";
|
||||
}
|
||||
} else {
|
||||
out_string << "{" << MetadataOutput(metadata) << "}";
|
||||
|
||||
if (j < result->num_transcriptions - 1) {
|
||||
out_string << ",\n";
|
||||
} else {
|
||||
out_string << "\n]";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out_string << "\n}\n";
|
||||
|
||||
return strdup(out_string.str().c_str());
|
||||
}
|
||||
|
||||
std::string
|
||||
MetadataOutput(Metadata *metadata)
|
||||
{
|
||||
std::ostringstream out_string;
|
||||
|
||||
std::vector<meta_word> words = WordsFromMetadata(metadata);
|
||||
|
||||
out_string << R"("metadata":{"confidence":)" << metadata->confidence << R"(},"words":[)";
|
||||
|
||||
for (int i = 0; i < words.size(); i++) {
|
||||
meta_word w = words[i];
|
||||
out_string << R"({"word":")" << w.word << R"(","time":)" << w.start_time << R"(,"duration":)" << w.duration << "}";
|
||||
|
||||
if (i < words.size() - 1) {
|
||||
out_string << ",";
|
||||
}
|
||||
}
|
||||
|
||||
out_string << "]";
|
||||
|
||||
return out_string.str();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user