diff --git a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj index eadc4fae..a57f983c 100644 --- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj +++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj @@ -7,6 +7,9 @@ objects = { /* Begin PBXBuildFile section */ + 504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; }; + 504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; }; + 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; }; 507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; }; 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; @@ -44,6 +47,7 @@ dstPath = ""; dstSubfolderSpec = 10; files = ( + 504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */, 507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */, ); name = "Embed Frameworks"; @@ -52,8 +56,10 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = ""; }; + 504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = ""; }; 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = ""; }; + 507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = ""; }; 50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; }; 50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; @@ -130,6 +136,8 @@ 50F787F12497683900D52237 /* deepspeech_ios_test */ = { isa = PBXGroup; children = ( + 504EC34224CF4EFD0073C22E /* AudioContext.swift */, + 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */, 50F787F22497683900D52237 /* AppDelegate.swift */, 50F787F42497683900D52237 /* SceneDelegate.swift */, 50F787F62497683900D52237 /* ContentView.swift */, @@ -299,7 +307,9 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */, 50F787F32497683900D52237 /* AppDelegate.swift in Sources */, + 504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */, 50F787F52497683900D52237 /* SceneDelegate.swift in Sources */, 50F787F72497683900D52237 /* ContentView.swift in Sources */, ); diff --git a/native_client/swift/deepspeech_ios_test/ContentView.swift b/native_client/swift/deepspeech_ios_test/ContentView.swift index c33e6365..0eb7c776 100644 --- a/native_client/swift/deepspeech_ios_test/ContentView.swift +++ b/native_client/swift/deepspeech_ios_test/ContentView.swift @@ -9,7 +9,7 @@ import SwiftUI struct ContentView: View { - private var deepspeech = DeepSpeech() + private var stt = SpeechRecognitionImpl() @State var isRecognizingMicrophone = false var body: some View { @@ -30,17 +30,17 @@ struct ContentView: View { } func recognizeFiles() { - self.deepspeech.recognizeFiles() + self.stt.recognizeFiles() } func startMicRecognition() { isRecognizingMicrophone = true - self.deepspeech.startMicrophoneRecognition() + self.stt.startMicrophoneRecognition() } func stopMicRecognition() { isRecognizingMicrophone = false - self.deepspeech.stopMicrophoneRecognition() + self.stt.stopMicrophoneRecognition() } } diff --git a/native_client/swift/deepspeech_ios_test/DeepSpeech.swift b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift similarity index 92% rename from native_client/swift/deepspeech_ios_test/DeepSpeech.swift rename to native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift index 52124f17..b3a4ac9b 100644 --- a/native_client/swift/deepspeech_ios_test/DeepSpeech.swift +++ b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift @@ -18,7 +18,7 @@ struct FillComplexInputParm { var sourceSize: UInt32 }; -class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate { +class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate { private var model: DeepSpeechModel private var stream: DeepSpeechStream? @@ -134,8 +134,6 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate { let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2)) stream!.feedAudioContent(buffer: shorts) - let intermediateResult = stream!.intermediateDecode() - print("Intermediate result: " + intermediateResult) // save bytes to audio data for creating a pcm file later for the captured audio let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self)) @@ -261,34 +259,23 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate { } public func recognizeFiles() { - let files = [ - "5639-40744-0008", - "1089-134686-0019", - "2094-142345-0053", - "8463-294825-0010", - "121-123852-0001", - "7021-79740-0008", - "6930-76324-0010", - "5105-28240-0001", - "1089-134691-0012", - "5142-33396-0027", - "260-123288-0004", - "6930-75918-0008", - "8463-294828-0005", - "61-70970-0002" - ] + // Add file names (without extension) here if you want to test recognition from files. + // Remember to add them to the project under Copy Bundle Resources. + let files: [String] = [] let serialQueue = DispatchQueue(label: "serialQueue") let group = DispatchGroup() group.enter() - serialQueue.async { - self.recognizeFile(audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) { - group.leave() + if let first = files.first { + serialQueue.async { + self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) { + group.leave() + } } } - - for path in files { + + for path in files.dropFirst() { group.wait() group.enter() self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {