From 5b74d9b1ce349a5500f3fad58fe8c259e55b2ed1 Mon Sep 17 00:00:00 2001
From: aayagar001 <46563478+aayagar001@users.noreply.github.com>
Date: Wed, 13 Nov 2019 11:10:52 +0530
Subject: [PATCH] client.py for supporting --json argument for timestamp info

Added function to convert metadata info into timestamp based json.
---
 native_client/python/client.py | 46 ++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/native_client/python/client.py b/native_client/python/client.py
index a7194e9a..21bb3474 100644
--- a/native_client/python/client.py
+++ b/native_client/python/client.py
@@ -8,6 +8,7 @@ import shlex
 import subprocess
 import sys
 import wave
+import json
 
 from deepspeech import Model, printVersions
 from timeit import default_timer as timer
@@ -32,6 +33,47 @@ def convert_samplerate(audio_path, desired_sample_rate):
 def metadata_to_string(metadata):
     return ''.join(item.character for item in metadata.items)
 
+def words_from_metadata(metadata):
+    word = ""
+    word_list = []
+    word_start_time = 0
+    # Loop through each character
+    for i in range(0, metadata.num_items):
+        item = metadata.items[i]
+        # Append character to word if it's not a space
+        if item.character != " ":
+            word = word + item.character
+        # Word boundary is either a space or the last character in the array
+        if item.character == " " or i == metadata.num_items - 1:
+            word_duration = item.start_time - word_start_time
+
+            if word_duration < 0:
+                word_duration = 0
+
+            each_word = dict()
+            each_word["word"] = word
+            each_word["start_time "] = round(word_start_time, 4)
+            each_word["duration"] = round(word_duration, 4)
+
+            word_list.append(each_word)
+            # Reset
+            word = ""
+            word_start_time = 0
+        else:
+            if len(word) == 1:
+                # Log the start time of the new word
+                word_start_time = item.start_time
+
+    return word_list
+
+
+def metadata_json_output(metadata):
+	json_result=dict()
+	json_result["words"] = words_from_metadata(metadata)
+	json_result["confidence"]=metadata.confidence
+	return json.dumps(json_result)
+	
+
 
 class VersionAction(argparse.Action):
     def __init__(self, *args, **kwargs):
@@ -62,6 +104,8 @@ def main():
                         help='Print version and exits')
     parser.add_argument('--extended', required=False, action='store_true',
                         help='Output string from extended metadata')
+	parser.add_argument('--json', required=False, action='store_false',
+                        help='Output json from metadata with timestamp of each word')
     args = parser.parse_args()
 
     print('Loading model from file {}'.format(args.model), file=sys.stderr)
@@ -94,6 +138,8 @@ def main():
     inference_start = timer()
     if args.extended:
         print(metadata_to_string(ds.sttWithMetadata(audio)))
+	if args.json:
+		print(metadata_json_output(ds.sttWithMetadata(audio)))
     else:
         print(ds.stt(audio))
     inference_end = timer() - inference_start