From c74dcffe79dd8346e47a9f9ed5a56e46c2c1810a Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Tue, 25 Feb 2020 12:38:42 +0100 Subject: [PATCH] Adjust client.cc for new API and small cleanup of code and function names --- native_client/client.cc | 238 +++++++++++++++++++--------------------- 1 file changed, 115 insertions(+), 123 deletions(-) diff --git a/native_client/client.cc b/native_client/client.cc index 413be288..9ab47f27 100644 --- a/native_client/client.cc +++ b/native_client/client.cc @@ -44,10 +44,115 @@ struct meta_word { float duration; }; -char* metadataToString(Metadata* metadata); -std::vector WordsFromMetadata(Metadata* metadata); -char* JSONOutput(Result* result); -std::string MetadataOutput(Metadata* metadata); +char* +CandidateTranscriptToString(CandidateTranscript* transcript) +{ + std::string retval = ""; + for (int i = 0; i < transcript->num_tokens; i++) { + TokenMetadata token = transcript->tokens[i]; + retval += token.text; + } + return strdup(retval.c_str()); +} + +std::vector +CandidateTranscriptToWords(CandidateTranscript* transcript) +{ + std::vector word_list; + + std::string word = ""; + float word_start_time = 0; + + // Loop through each token + for (int i = 0; i < transcript->num_tokens; i++) { + TokenMetadata token = transcript->tokens[i]; + + // Append token to word if it's not a space + if (strcmp(token.text, u8" ") != 0) { + // Log the start time of the new word + if (word.length() == 0) { + word_start_time = token.start_time; + } + word.append(token.text); + } + + // Word boundary is either a space or the last token in the array + if (strcmp(token.text, u8" ") == 0 || i == transcript->num_tokens-1) { + float word_duration = token.start_time - word_start_time; + + if (word_duration < 0) { + word_duration = 0; + } + + meta_word w; + w.word = word; + w.start_time = word_start_time; + w.duration = word_duration; + + word_list.push_back(w); + + // Reset + word = ""; + word_start_time = 0; + } + } + + return word_list; +} + +std::string +CandidateTranscriptToJSON(CandidateTranscript *transcript) +{ + std::ostringstream out_string; + + std::vector words = CandidateTranscriptToWords(transcript); + + out_string << R"("metadata":{"confidence":)" << transcript->confidence << R"(},"words":[)"; + + for (int i = 0; i < words.size(); i++) { + meta_word w = words[i]; + out_string << R"({"word":")" << w.word << R"(","time":)" << w.start_time << R"(,"duration":)" << w.duration << "}"; + + if (i < words.size() - 1) { + out_string << ","; + } + } + + out_string << "]"; + + return out_string.str(); +} + +char* +MetadataToJSON(Metadata* result) +{ + std::ostringstream out_string; + out_string << "{\n"; + + for (int j=0; j < result->num_transcripts; ++j) { + CandidateTranscript *transcript = &result->transcripts[j]; + + if (j == 0) { + out_string << CandidateTranscriptToJSON(transcript); + + if (result->num_transcripts > 1) { + out_string << ",\n" << R"("alternatives")" << ":[\n"; + } + } else { + out_string << "{" << CandidateTranscriptToJSON(transcript) << "}"; + + if (j < result->num_transcripts - 1) { + out_string << ",\n"; + } else { + out_string << "\n]"; + } + } + } + + out_string << "\n}\n"; + + return strdup(out_string.str().c_str()); +} ds_result LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, @@ -58,13 +163,13 @@ LocalDsSTT(ModelState* aCtx, const short* aBuffer, size_t aBufferSize, clock_t ds_start_time = clock(); if (extended_output) { - Result *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); - res.string = metadataToString(&result->transcriptions[0]); - DS_FreeResult(result); + Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 1); + res.string = CandidateTranscriptToString(&result->transcripts[0]); + DS_FreeMetadata(result); } else if (json_output) { - Result *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 3); - res.string = JSONOutput(result); - DS_FreeResult(result); + Metadata *result = DS_SpeechToTextWithMetadata(aCtx, aBuffer, aBufferSize, 3); + res.string = MetadataToJSON(result); + DS_FreeMetadata(result); } else if (stream_size > 0) { StreamingState* ctx; int status = DS_CreateStream(aCtx, &ctx); @@ -279,119 +384,6 @@ ProcessFile(ModelState* context, const char* path, bool show_times) } } -char* -metadataToString(Metadata* metadata) -{ - std::string retval = ""; - for (int i = 0; i < metadata->num_items; i++) { - MetadataItem item = metadata->items[i]; - retval += item.character; - } - return strdup(retval.c_str()); -} - -std::vector -WordsFromMetadata(Metadata* metadata) -{ - std::vector word_list; - - std::string word = ""; - float word_start_time = 0; - - // Loop through each character - for (int i = 0; i < metadata->num_items; i++) { - MetadataItem item = metadata->items[i]; - - // Append character to word if it's not a space - if (strcmp(item.character, u8" ") != 0) { - // Log the start time of the new word - if (word.length() == 0) { - word_start_time = item.start_time; - } - word.append(item.character); - } - - // Word boundary is either a space or the last character in the array - if (strcmp(item.character, " ") == 0 - || strcmp(item.character, u8" ") == 0 - || i == metadata->num_items-1) { - - float word_duration = item.start_time - word_start_time; - - if (word_duration < 0) { - word_duration = 0; - } - - meta_word w; - w.word = word; - w.start_time = word_start_time; - w.duration = word_duration; - - word_list.push_back(w); - - // Reset - word = ""; - word_start_time = 0; - } - } - - return word_list; -} - -char* -JSONOutput(Result* result) -{ - std::ostringstream out_string; - out_string << "{\n"; - - for (int j=0; j < result->num_transcriptions; ++j) { - Metadata *metadata = &result->transcriptions[j]; - - if (j == 0) { - out_string << MetadataOutput(metadata); - - if (result->num_transcriptions > 1) { - out_string << ",\n" << R"("alternatives")" << ":[\n"; - } - } else { - out_string << "{" << MetadataOutput(metadata) << "}"; - - if (j < result->num_transcriptions - 1) { - out_string << ",\n"; - } else { - out_string << "\n]"; - } - } - } - - out_string << "\n}\n"; - - return strdup(out_string.str().c_str()); -} - -std::string -MetadataOutput(Metadata *metadata) -{ - std::ostringstream out_string; - - std::vector words = WordsFromMetadata(metadata); - - out_string << R"("metadata":{"confidence":)" << metadata->confidence << R"(},"words":[)"; - - for (int i = 0; i < words.size(); i++) { - meta_word w = words[i]; - out_string << R"({"word":")" << w.word << R"(","time":)" << w.start_time << R"(,"duration":)" << w.duration << "}"; - - if (i < words.size() - 1) { - out_string << ","; - } - } - - out_string << "]"; - - return out_string.str(); -} - int main(int argc, char **argv) {