Small adjustments to avoid hardcoding filenames and avoid generic DeepSpeech name
This commit is contained in:
parent
35d2908db9
commit
e3c34b29d6
|
@ -7,6 +7,9 @@
|
|||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
|
||||
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
|
||||
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
|
||||
507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
|
||||
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
|
||||
|
@ -44,6 +47,7 @@
|
|||
dstPath = "";
|
||||
dstSubfolderSpec = 10;
|
||||
files = (
|
||||
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
|
||||
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
|
||||
);
|
||||
name = "Embed Frameworks";
|
||||
|
@ -52,8 +56,10 @@
|
|||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
|
||||
504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
|
||||
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = "<group>"; };
|
||||
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
|
||||
50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||
50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||
|
@ -130,6 +136,8 @@
|
|||
50F787F12497683900D52237 /* deepspeech_ios_test */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
504EC34224CF4EFD0073C22E /* AudioContext.swift */,
|
||||
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */,
|
||||
50F787F22497683900D52237 /* AppDelegate.swift */,
|
||||
50F787F42497683900D52237 /* SceneDelegate.swift */,
|
||||
50F787F62497683900D52237 /* ContentView.swift */,
|
||||
|
@ -299,7 +307,9 @@
|
|||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */,
|
||||
50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
|
||||
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */,
|
||||
50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
|
||||
50F787F72497683900D52237 /* ContentView.swift in Sources */,
|
||||
);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
import SwiftUI
|
||||
|
||||
struct ContentView: View {
|
||||
private var deepspeech = DeepSpeech()
|
||||
private var stt = SpeechRecognitionImpl()
|
||||
@State var isRecognizingMicrophone = false
|
||||
|
||||
var body: some View {
|
||||
|
@ -30,17 +30,17 @@ struct ContentView: View {
|
|||
}
|
||||
|
||||
func recognizeFiles() {
|
||||
self.deepspeech.recognizeFiles()
|
||||
self.stt.recognizeFiles()
|
||||
}
|
||||
|
||||
func startMicRecognition() {
|
||||
isRecognizingMicrophone = true
|
||||
self.deepspeech.startMicrophoneRecognition()
|
||||
self.stt.startMicrophoneRecognition()
|
||||
}
|
||||
|
||||
func stopMicRecognition() {
|
||||
isRecognizingMicrophone = false
|
||||
self.deepspeech.stopMicrophoneRecognition()
|
||||
self.stt.stopMicrophoneRecognition()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ struct FillComplexInputParm {
|
|||
var sourceSize: UInt32
|
||||
};
|
||||
|
||||
class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
||||
class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
||||
private var model: DeepSpeechModel
|
||||
private var stream: DeepSpeechStream?
|
||||
|
||||
|
@ -134,8 +134,6 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
|||
|
||||
let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
|
||||
stream!.feedAudioContent(buffer: shorts)
|
||||
let intermediateResult = stream!.intermediateDecode()
|
||||
print("Intermediate result: " + intermediateResult)
|
||||
|
||||
// save bytes to audio data for creating a pcm file later for the captured audio
|
||||
let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
|
||||
|
@ -261,34 +259,23 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
|||
}
|
||||
|
||||
public func recognizeFiles() {
|
||||
let files = [
|
||||
"5639-40744-0008",
|
||||
"1089-134686-0019",
|
||||
"2094-142345-0053",
|
||||
"8463-294825-0010",
|
||||
"121-123852-0001",
|
||||
"7021-79740-0008",
|
||||
"6930-76324-0010",
|
||||
"5105-28240-0001",
|
||||
"1089-134691-0012",
|
||||
"5142-33396-0027",
|
||||
"260-123288-0004",
|
||||
"6930-75918-0008",
|
||||
"8463-294828-0005",
|
||||
"61-70970-0002"
|
||||
]
|
||||
// Add file names (without extension) here if you want to test recognition from files.
|
||||
// Remember to add them to the project under Copy Bundle Resources.
|
||||
let files: [String] = []
|
||||
|
||||
let serialQueue = DispatchQueue(label: "serialQueue")
|
||||
let group = DispatchGroup()
|
||||
group.enter()
|
||||
|
||||
serialQueue.async {
|
||||
self.recognizeFile(audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) {
|
||||
group.leave()
|
||||
if let first = files.first {
|
||||
serialQueue.async {
|
||||
self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) {
|
||||
group.leave()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for path in files {
|
||||
|
||||
for path in files.dropFirst() {
|
||||
group.wait()
|
||||
group.enter()
|
||||
self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {
|
Loading…
Reference in New Issue