Small adjustments to avoid hardcoding filenames and avoid generic DeepSpeech name
This commit is contained in:
parent
35d2908db9
commit
e3c34b29d6
|
@ -7,6 +7,9 @@
|
||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
|
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
|
||||||
|
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
|
||||||
|
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||||
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
|
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
|
||||||
507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
|
507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
|
||||||
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
|
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
|
||||||
|
@ -44,6 +47,7 @@
|
||||||
dstPath = "";
|
dstPath = "";
|
||||||
dstSubfolderSpec = 10;
|
dstSubfolderSpec = 10;
|
||||||
files = (
|
files = (
|
||||||
|
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
|
||||||
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
|
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
|
||||||
);
|
);
|
||||||
name = "Embed Frameworks";
|
name = "Embed Frameworks";
|
||||||
|
@ -52,8 +56,10 @@
|
||||||
/* End PBXCopyFilesBuildPhase section */
|
/* End PBXCopyFilesBuildPhase section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
|
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
|
||||||
|
504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
|
||||||
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = "<group>"; };
|
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
|
||||||
50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||||
50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||||
|
@ -130,6 +136,8 @@
|
||||||
50F787F12497683900D52237 /* deepspeech_ios_test */ = {
|
50F787F12497683900D52237 /* deepspeech_ios_test */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
504EC34224CF4EFD0073C22E /* AudioContext.swift */,
|
||||||
|
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */,
|
||||||
50F787F22497683900D52237 /* AppDelegate.swift */,
|
50F787F22497683900D52237 /* AppDelegate.swift */,
|
||||||
50F787F42497683900D52237 /* SceneDelegate.swift */,
|
50F787F42497683900D52237 /* SceneDelegate.swift */,
|
||||||
50F787F62497683900D52237 /* ContentView.swift */,
|
50F787F62497683900D52237 /* ContentView.swift */,
|
||||||
|
@ -299,7 +307,9 @@
|
||||||
isa = PBXSourcesBuildPhase;
|
isa = PBXSourcesBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
|
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */,
|
||||||
50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
|
50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
|
||||||
|
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */,
|
||||||
50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
|
50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
|
||||||
50F787F72497683900D52237 /* ContentView.swift in Sources */,
|
50F787F72497683900D52237 /* ContentView.swift in Sources */,
|
||||||
);
|
);
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
import SwiftUI
|
import SwiftUI
|
||||||
|
|
||||||
struct ContentView: View {
|
struct ContentView: View {
|
||||||
private var deepspeech = DeepSpeech()
|
private var stt = SpeechRecognitionImpl()
|
||||||
@State var isRecognizingMicrophone = false
|
@State var isRecognizingMicrophone = false
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
|
@ -30,17 +30,17 @@ struct ContentView: View {
|
||||||
}
|
}
|
||||||
|
|
||||||
func recognizeFiles() {
|
func recognizeFiles() {
|
||||||
self.deepspeech.recognizeFiles()
|
self.stt.recognizeFiles()
|
||||||
}
|
}
|
||||||
|
|
||||||
func startMicRecognition() {
|
func startMicRecognition() {
|
||||||
isRecognizingMicrophone = true
|
isRecognizingMicrophone = true
|
||||||
self.deepspeech.startMicrophoneRecognition()
|
self.stt.startMicrophoneRecognition()
|
||||||
}
|
}
|
||||||
|
|
||||||
func stopMicRecognition() {
|
func stopMicRecognition() {
|
||||||
isRecognizingMicrophone = false
|
isRecognizingMicrophone = false
|
||||||
self.deepspeech.stopMicrophoneRecognition()
|
self.stt.stopMicrophoneRecognition()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ struct FillComplexInputParm {
|
||||||
var sourceSize: UInt32
|
var sourceSize: UInt32
|
||||||
};
|
};
|
||||||
|
|
||||||
class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
||||||
private var model: DeepSpeechModel
|
private var model: DeepSpeechModel
|
||||||
private var stream: DeepSpeechStream?
|
private var stream: DeepSpeechStream?
|
||||||
|
|
||||||
|
@ -134,8 +134,6 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
||||||
|
|
||||||
let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
|
let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
|
||||||
stream!.feedAudioContent(buffer: shorts)
|
stream!.feedAudioContent(buffer: shorts)
|
||||||
let intermediateResult = stream!.intermediateDecode()
|
|
||||||
print("Intermediate result: " + intermediateResult)
|
|
||||||
|
|
||||||
// save bytes to audio data for creating a pcm file later for the captured audio
|
// save bytes to audio data for creating a pcm file later for the captured audio
|
||||||
let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
|
let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
|
||||||
|
@ -261,34 +259,23 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
|
||||||
}
|
}
|
||||||
|
|
||||||
public func recognizeFiles() {
|
public func recognizeFiles() {
|
||||||
let files = [
|
// Add file names (without extension) here if you want to test recognition from files.
|
||||||
"5639-40744-0008",
|
// Remember to add them to the project under Copy Bundle Resources.
|
||||||
"1089-134686-0019",
|
let files: [String] = []
|
||||||
"2094-142345-0053",
|
|
||||||
"8463-294825-0010",
|
|
||||||
"121-123852-0001",
|
|
||||||
"7021-79740-0008",
|
|
||||||
"6930-76324-0010",
|
|
||||||
"5105-28240-0001",
|
|
||||||
"1089-134691-0012",
|
|
||||||
"5142-33396-0027",
|
|
||||||
"260-123288-0004",
|
|
||||||
"6930-75918-0008",
|
|
||||||
"8463-294828-0005",
|
|
||||||
"61-70970-0002"
|
|
||||||
]
|
|
||||||
|
|
||||||
let serialQueue = DispatchQueue(label: "serialQueue")
|
let serialQueue = DispatchQueue(label: "serialQueue")
|
||||||
let group = DispatchGroup()
|
let group = DispatchGroup()
|
||||||
group.enter()
|
group.enter()
|
||||||
|
|
||||||
serialQueue.async {
|
if let first = files.first {
|
||||||
self.recognizeFile(audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) {
|
serialQueue.async {
|
||||||
group.leave()
|
self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) {
|
||||||
|
group.leave()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for path in files {
|
for path in files.dropFirst() {
|
||||||
group.wait()
|
group.wait()
|
||||||
group.enter()
|
group.enter()
|
||||||
self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {
|
self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {
|
Loading…
Reference in New Issue