Small adjustments to avoid hardcoding filenames and avoid generic DeepSpeech name

2020-07-27 20:29:37 +02:00 · 2020-07-27 20:29:37 +02:00 · e3c34b29d6
commit e3c34b29d6
parent 35d2908db9
3 changed files with 26 additions and 29 deletions
--- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj
+++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj
@ -7,6 +7,9 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
+		504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
+		504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
 		507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
 		507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
 		507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
@ -44,6 +47,7 @@
 			dstPath = "";
 			dstSubfolderSpec = 10;
 			files = (
+				504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
 				507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
 			);
 			name = "Embed Frameworks";
@ -52,8 +56,10 @@
 /* End PBXCopyFilesBuildPhase section */

 /* Begin PBXFileReference section */
+		504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
+		504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
 		507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = "<group>"; };
+		507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
 		50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
 		50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
@ -130,6 +136,8 @@
 		50F787F12497683900D52237 /* deepspeech_ios_test */ = {
 			isa = PBXGroup;
 			children = (
+				504EC34224CF4EFD0073C22E /* AudioContext.swift */,
+				504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */,
 				50F787F22497683900D52237 /* AppDelegate.swift */,
 				50F787F42497683900D52237 /* SceneDelegate.swift */,
 				50F787F62497683900D52237 /* ContentView.swift */,
@ -299,7 +307,9 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */,
 				50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
+				504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */,
 				50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
 				50F787F72497683900D52237 /* ContentView.swift in Sources */,
 			);
--- a/native_client/swift/deepspeech_ios_test/ContentView.swift
+++ b/native_client/swift/deepspeech_ios_test/ContentView.swift
@ -9,7 +9,7 @@
 import SwiftUI

 struct ContentView: View {
-    private var deepspeech = DeepSpeech()
+    private var stt = SpeechRecognitionImpl()
    @State var isRecognizingMicrophone = false
    
    var body: some View {
@ -30,17 +30,17 @@ struct ContentView: View {
    }
    
    func recognizeFiles() {
-        self.deepspeech.recognizeFiles()
+        self.stt.recognizeFiles()
    }
    
    func startMicRecognition() {
        isRecognizingMicrophone = true
-        self.deepspeech.startMicrophoneRecognition()
+        self.stt.startMicrophoneRecognition()
    }
    
    func stopMicRecognition() {
        isRecognizingMicrophone = false
-        self.deepspeech.stopMicrophoneRecognition()
+        self.stt.stopMicrophoneRecognition()
    }
 }

--- a/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift
+++ b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift
@ -18,7 +18,7 @@ struct FillComplexInputParm {
    var sourceSize: UInt32
 };

-class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
+class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
    private var model: DeepSpeechModel
    private var stream: DeepSpeechStream?
    
@ -134,8 +134,6 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
                
                let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
                stream!.feedAudioContent(buffer: shorts)
-                let intermediateResult = stream!.intermediateDecode()
-                print("Intermediate result: " + intermediateResult)
                
                // save bytes to audio data for creating a pcm file later for the captured audio
                let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
@ -261,34 +259,23 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
    }
    
    public func recognizeFiles() {
-        let files = [
-            "5639-40744-0008",
-            "1089-134686-0019",
-            "2094-142345-0053",
-            "8463-294825-0010",
-            "121-123852-0001",
-            "7021-79740-0008",
-            "6930-76324-0010",
-            "5105-28240-0001",
-            "1089-134691-0012",
-            "5142-33396-0027",
-            "260-123288-0004",
-            "6930-75918-0008",
-            "8463-294828-0005",
-            "61-70970-0002"
-        ]
+        // Add file names (without extension) here if you want to test recognition from files.
+        // Remember to add them to the project under Copy Bundle Resources.
+        let files: [String] = []

        let serialQueue = DispatchQueue(label: "serialQueue")
        let group = DispatchGroup()
        group.enter()
        
-        serialQueue.async {
-            self.recognizeFile(audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) {
-                group.leave()
+        if let first = files.first {
+            serialQueue.async {
+                self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) {
+                    group.leave()
+                }
            }
        }
-        
-        for path in files {
+
+        for path in files.dropFirst() {
            group.wait()
            group.enter()
            self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {