Small adjustments to avoid hardcoding filenames and avoid generic DeepSpeech name

This commit is contained in:
Reuben Morais 2020-07-27 20:29:37 +02:00
parent 35d2908db9
commit e3c34b29d6
3 changed files with 26 additions and 29 deletions

View File

@ -7,6 +7,9 @@
objects = {
/* Begin PBXBuildFile section */
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
@ -44,6 +47,7 @@
dstPath = "";
dstSubfolderSpec = 10;
files = (
504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
);
name = "Embed Frameworks";
@ -52,8 +56,10 @@
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = "<group>"; };
507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
@ -130,6 +136,8 @@
50F787F12497683900D52237 /* deepspeech_ios_test */ = {
isa = PBXGroup;
children = (
504EC34224CF4EFD0073C22E /* AudioContext.swift */,
504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */,
50F787F22497683900D52237 /* AppDelegate.swift */,
50F787F42497683900D52237 /* SceneDelegate.swift */,
50F787F62497683900D52237 /* ContentView.swift */,
@ -299,7 +307,9 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */,
50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */,
50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
50F787F72497683900D52237 /* ContentView.swift in Sources */,
);

View File

@ -9,7 +9,7 @@
import SwiftUI
struct ContentView: View {
private var deepspeech = DeepSpeech()
private var stt = SpeechRecognitionImpl()
@State var isRecognizingMicrophone = false
var body: some View {
@ -30,17 +30,17 @@ struct ContentView: View {
}
func recognizeFiles() {
self.deepspeech.recognizeFiles()
self.stt.recognizeFiles()
}
func startMicRecognition() {
isRecognizingMicrophone = true
self.deepspeech.startMicrophoneRecognition()
self.stt.startMicrophoneRecognition()
}
func stopMicRecognition() {
isRecognizingMicrophone = false
self.deepspeech.stopMicrophoneRecognition()
self.stt.stopMicrophoneRecognition()
}
}

View File

@ -18,7 +18,7 @@ struct FillComplexInputParm {
var sourceSize: UInt32
};
class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
private var model: DeepSpeechModel
private var stream: DeepSpeechStream?
@ -134,8 +134,6 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
stream!.feedAudioContent(buffer: shorts)
let intermediateResult = stream!.intermediateDecode()
print("Intermediate result: " + intermediateResult)
// save bytes to audio data for creating a pcm file later for the captured audio
let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
@ -261,34 +259,23 @@ class DeepSpeech : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
}
public func recognizeFiles() {
let files = [
"5639-40744-0008",
"1089-134686-0019",
"2094-142345-0053",
"8463-294825-0010",
"121-123852-0001",
"7021-79740-0008",
"6930-76324-0010",
"5105-28240-0001",
"1089-134691-0012",
"5142-33396-0027",
"260-123288-0004",
"6930-75918-0008",
"8463-294828-0005",
"61-70970-0002"
]
// Add file names (without extension) here if you want to test recognition from files.
// Remember to add them to the project under Copy Bundle Resources.
let files: [String] = []
let serialQueue = DispatchQueue(label: "serialQueue")
let group = DispatchGroup()
group.enter()
serialQueue.async {
self.recognizeFile(audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) {
group.leave()
if let first = files.first {
serialQueue.async {
self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) {
group.leave()
}
}
}
for path in files {
for path in files.dropFirst() {
group.wait()
group.enter()
self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {