Merge pull request #3191 from reuben/swift-mic-streaming

iOS microphone streaming
2020-07-28 14:25:27 +02:00 · 2020-07-28 14:25:27 +02:00 · 396504ea07
parent c1fd93ac8d e3c34b29d6
commit 396504ea07
6 changed files with 398 additions and 182 deletions
--- a/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj
+++ b/native_client/swift/deepspeech_ios_test.xcodeproj/project.pbxproj
@ -7,6 +7,9 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */; };
+		504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 504EC34224CF4EFD0073C22E /* AudioContext.swift */; };
+		504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
 		507CD3A124B61FE400409BBB /* deepspeech_ios.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A024B61FE400409BBB /* deepspeech_ios.framework */; };
 		507CD3A324B61FEB00409BBB /* libdeepspeech.so in Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; };
 		507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 507CD3A224B61FEA00409BBB /* libdeepspeech.so */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
@ -44,6 +47,7 @@
 			dstPath = "";
 			dstSubfolderSpec = 10;
 			files = (
+				504EC34524CF4F4F0073C22E /* deepspeech_ios.framework in Embed Frameworks */,
 				507CD3A424B61FFC00409BBB /* libdeepspeech.so in Embed Frameworks */,
 			);
 			name = "Embed Frameworks";
@ -52,8 +56,10 @@
 /* End PBXCopyFilesBuildPhase section */

 /* Begin PBXFileReference section */
+		504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SpeechRecognitionImpl.swift; sourceTree = "<group>"; };
+		504EC34224CF4EFD0073C22E /* AudioContext.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AudioContext.swift; sourceTree = "<group>"; };
 		507CD3A024B61FE400409BBB /* deepspeech_ios.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = deepspeech_ios.framework; sourceTree = BUILT_PRODUCTS_DIR; };
-		507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = libdeepspeech.so; path = libdeepspeech.so; sourceTree = "<group>"; };
+		507CD3A224B61FEA00409BBB /* libdeepspeech.so */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; path = libdeepspeech.so; sourceTree = "<group>"; };
 		50F787EF2497683900D52237 /* deepspeech_ios_test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = deepspeech_ios_test.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		50F787F22497683900D52237 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
 		50F787F42497683900D52237 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
@ -130,6 +136,8 @@
 		50F787F12497683900D52237 /* deepspeech_ios_test */ = {
 			isa = PBXGroup;
 			children = (
+				504EC34224CF4EFD0073C22E /* AudioContext.swift */,
+				504EC34124CF4EFD0073C22E /* SpeechRecognitionImpl.swift */,
 				50F787F22497683900D52237 /* AppDelegate.swift */,
 				50F787F42497683900D52237 /* SceneDelegate.swift */,
 				50F787F62497683900D52237 /* ContentView.swift */,
@ -299,7 +307,9 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				504EC34424CF4EFD0073C22E /* AudioContext.swift in Sources */,
 				50F787F32497683900D52237 /* AppDelegate.swift in Sources */,
+				504EC34324CF4EFD0073C22E /* SpeechRecognitionImpl.swift in Sources */,
 				50F787F52497683900D52237 /* SceneDelegate.swift in Sources */,
 				50F787F72497683900D52237 /* ContentView.swift in Sources */,
 			);
--- a/native_client/swift/deepspeech_ios_test/AppDelegate.swift
+++ b/native_client/swift/deepspeech_ios_test/AppDelegate.swift
@ -7,190 +7,10 @@
 //

 import UIKit
-import Foundation
-import AVFoundation
-import AudioToolbox
-import Accelerate
-
-import deepspeech_ios
-
-/// Holds audio information used for building waveforms
-final class AudioContext {
-
-    /// The audio asset URL used to load the context
-    public let audioURL: URL
-
-    /// Total number of samples in loaded asset
-    public let totalSamples: Int
-
-    /// Loaded asset
-    public let asset: AVAsset
-
-    // Loaded assetTrack
-    public let assetTrack: AVAssetTrack
-
-    private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
-        self.audioURL = audioURL
-        self.totalSamples = totalSamples
-        self.asset = asset
-        self.assetTrack = assetTrack
-    }
-
-    public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
-        let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
-
-        guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
-            fatalError("Couldn't load AVAssetTrack")
-        }
-
-        asset.loadValuesAsynchronously(forKeys: ["duration"]) {
-            var error: NSError?
-            let status = asset.statusOfValue(forKey: "duration", error: &error)
-            switch status {
-            case .loaded:
-                guard
-                    let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
-                    let audioFormatDesc = formatDescriptions.first,
-                    let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
-                    else { break }
-
-                let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
-                let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
-                completionHandler(audioContext)
-                return
-
-            case .failed, .cancelled, .loading, .unknown:
-                print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
-            }
-
-            completionHandler(nil)
-        }
-    }
-}
-
-func render(audioContext: AudioContext?, stream: DeepSpeechStream) {
-    guard let audioContext = audioContext else {
-        fatalError("Couldn't create the audioContext")
-    }
-
-    let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
-
-    guard let reader = try? AVAssetReader(asset: audioContext.asset)
-        else {
-            fatalError("Couldn't initialize the AVAssetReader")
-    }
-
-    reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
-                                   duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
-
-    let outputSettingsDict: [String : Any] = [
-        AVFormatIDKey: Int(kAudioFormatLinearPCM),
-        AVLinearPCMBitDepthKey: 16,
-        AVLinearPCMIsBigEndianKey: false,
-        AVLinearPCMIsFloatKey: false,
-        AVLinearPCMIsNonInterleaved: false
-    ]
-
-    let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
-                                                outputSettings: outputSettingsDict)
-    readerOutput.alwaysCopiesSampleData = false
-    reader.add(readerOutput)
-
-    var sampleBuffer = Data()
-
-    // 16-bit samples
-    reader.startReading()
-    defer { reader.cancelReading() }
-
-    while reader.status == .reading {
-        guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
-            let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
-                break
-        }
-        // Append audio sample buffer into our current sample buffer
-        var readBufferLength = 0
-        var readBufferPointer: UnsafeMutablePointer<Int8>?
-        CMBlockBufferGetDataPointer(readBuffer,
-                                    atOffset: 0,
-                                    lengthAtOffsetOut: &readBufferLength,
-                                    totalLengthOut: nil,
-                                    dataPointerOut: &readBufferPointer)
-        sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
-        CMSampleBufferInvalidate(readSampleBuffer)
-
-        let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
-        print("read \(totalSamples) samples")
-
-        sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
-            let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
-            stream.feedAudioContent(buffer: unsafeBufferPointer)
-        }
-
-        sampleBuffer.removeAll()
-    }
-
-    // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
-    guard reader.status == .completed else {
-        fatalError("Couldn't read the audio file")
-    }
-}
-
-func test(model: DeepSpeechModel, audioPath: String, completion: @escaping () -> ()) {
-    let url = URL(fileURLWithPath: audioPath)
-
-    let stream = try! model.createStream()
-    print("\(audioPath)")
-    let start = CFAbsoluteTimeGetCurrent()
-    AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
-        guard let audioContext = audioContext else {
-            fatalError("Couldn't create the audioContext")
-        }
-        render(audioContext: audioContext, stream: stream)
-        let result = stream.finishStream()
-        let end = CFAbsoluteTimeGetCurrent()
-        print("\"\(audioPath)\": \(end - start) - \(result)")
-        completion()
-    })
-}

@UIApplicationMain
 class AppDelegate: UIResponder, UIApplicationDelegate {
    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
-        let model = try! DeepSpeechModel(modelPath: Bundle.main.path(forResource: "output_graph", ofType: "tflite")!)
-        try! model.enableExternalScorer(scorerPath: Bundle.main.path(forResource: "librispeech_en_utf8_nonpruned_o6", ofType: "scorer")!)
-
-        let files = [
-            "5639-40744-0008",
-            "1089-134686-0019",
-            "2094-142345-0053",
-            "8463-294825-0010",
-            "121-123852-0001",
-            "7021-79740-0008",
-            "6930-76324-0010",
-            "5105-28240-0001",
-            "1089-134691-0012",
-            "5142-33396-0027",
-            "260-123288-0004",
-            "6930-75918-0008",
-            "8463-294828-0005",
-            "61-70970-0002"
-        ]
-
-        let serialQueue = DispatchQueue(label: "serialQueue")
-        let group = DispatchGroup()
-        group.enter()
-        serialQueue.async {
-            test(model: model, audioPath: Bundle.main.path(forResource: "1284-134647-0003", ofType: "wav")!) {
-                group.leave()
-            }
-        }
-        for path in files {
-            group.wait()
-            group.enter()
-            test(model: model, audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {
-                group.leave()
-            }
-        }
        return true
    }

--- a/native_client/swift/deepspeech_ios_test/AudioContext.swift
+++ b/native_client/swift/deepspeech_ios_test/AudioContext.swift
@ -0,0 +1,68 @@
+//
+//  AudioContext.swift
+//  deepspeech_ios_test
+//
+//  Created by Erik Ziegler on 27.07.20.
+//  Copyright © 2020 Mozilla. All rights reserved.
+//
+
+import Foundation
+import AVFoundation
+import AudioToolbox
+import Accelerate
+
+import deepspeech_ios
+
+/// Holds audio information used for building waveforms
+final class AudioContext {
+
+    /// The audio asset URL used to load the context
+    public let audioURL: URL
+
+    /// Total number of samples in loaded asset
+    public let totalSamples: Int
+
+    /// Loaded asset
+    public let asset: AVAsset
+
+    // Loaded assetTrack
+    public let assetTrack: AVAssetTrack
+
+    private init(audioURL: URL, totalSamples: Int, asset: AVAsset, assetTrack: AVAssetTrack) {
+        self.audioURL = audioURL
+        self.totalSamples = totalSamples
+        self.asset = asset
+        self.assetTrack = assetTrack
+    }
+
+    public static func load(fromAudioURL audioURL: URL, completionHandler: @escaping (_ audioContext: AudioContext?) -> ()) {
+        let asset = AVURLAsset(url: audioURL, options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])
+
+        guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {
+            fatalError("Couldn't load AVAssetTrack")
+        }
+
+        asset.loadValuesAsynchronously(forKeys: ["duration"]) {
+            var error: NSError?
+            let status = asset.statusOfValue(forKey: "duration", error: &error)
+            switch status {
+            case .loaded:
+                guard
+                    let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],
+                    let audioFormatDesc = formatDescriptions.first,
+                    let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)
+                    else { break }
+
+                let totalSamples = Int((asbd.pointee.mSampleRate) * Float64(asset.duration.value) / Float64(asset.duration.timescale))
+                let audioContext = AudioContext(audioURL: audioURL, totalSamples: totalSamples, asset: asset, assetTrack: assetTrack)
+                completionHandler(audioContext)
+                return
+
+            case .failed, .cancelled, .loading, .unknown:
+                print("Couldn't load asset: \(error?.localizedDescription ?? "Unknown error")")
+            }
+
+            completionHandler(nil)
+        }
+    }
+}
--- a/native_client/swift/deepspeech_ios_test/ContentView.swift
+++ b/native_client/swift/deepspeech_ios_test/ContentView.swift
@ -9,8 +9,38 @@
 import SwiftUI

 struct ContentView: View {
+    private var stt = SpeechRecognitionImpl()
+    @State var isRecognizingMicrophone = false
+    
    var body: some View {
-        Text("Hello, World!")
+        VStack {
+            Text("DeepSpeech iOS Demo")
+                .font(.system(size: 30))
+            Button("Recognize files", action: recognizeFiles)
+                .padding(30)
+            Button(
+                isRecognizingMicrophone
+                    ? "Stop Microphone Recognition"
+                    : "Start Microphone Recognition",
+                action: isRecognizingMicrophone
+                    ? stopMicRecognition
+                    : startMicRecognition)
+                .padding(30)
+        }
+    }
+    
+    func recognizeFiles() {
+        self.stt.recognizeFiles()
+    }
+    
+    func startMicRecognition() {
+        isRecognizingMicrophone = true
+        self.stt.startMicrophoneRecognition()
+    }
+    
+    func stopMicRecognition() {
+        isRecognizingMicrophone = false
+        self.stt.stopMicrophoneRecognition()
    }
 }

--- a/native_client/swift/deepspeech_ios_test/Info.plist
+++ b/native_client/swift/deepspeech_ios_test/Info.plist
@ -4,6 +4,8 @@
 <dict>
 	<key>CFBundleDevelopmentRegion</key>
 	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>NSMicrophoneUsageDescription</key>
+	<string>Please grant access to the microphone.</string>
 	<key>CFBundleExecutable</key>
 	<string>$(EXECUTABLE_NAME)</string>
 	<key>CFBundleIdentifier</key>
--- a/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift
+++ b/native_client/swift/deepspeech_ios_test/SpeechRecognitionImpl.swift
@ -0,0 +1,286 @@
+//
+//  DeepSpeech.swift
+//  deepspeech_ios_test
+//
+//  Created by Erik Ziegler on 27.07.20.
+//  Copyright © 2020 Mozilla. All rights reserved.
+//
+
+import Foundation
+import AVFoundation
+import AudioToolbox
+import Accelerate
+
+import deepspeech_ios
+
+struct FillComplexInputParm {
+    var source: UnsafeMutablePointer<Int8>
+    var sourceSize: UInt32
+};
+
+class SpeechRecognitionImpl : NSObject, AVCaptureAudioDataOutputSampleBufferDelegate {
+    private var model: DeepSpeechModel
+    private var stream: DeepSpeechStream?
+    
+    private var captureSession = AVCaptureSession()
+    private var audioData = Data()
+    
+    override init() {
+        let modelPath = Bundle.main.path(forResource: "deepspeech-0.7.4-models", ofType: "tflite")!
+        let scorerPath = Bundle.main.path(forResource: "deepspeech-0.7.4-models", ofType: "scorer")!
+
+        model = try! DeepSpeechModel(modelPath: modelPath)
+        try! model.enableExternalScorer(scorerPath: scorerPath)
+        
+        super.init()
+        
+        // prepare audio capture
+        self.configureCaptureSession()
+    }
+    
+    // MARK: Microphone recognition
+    
+    private func configureCaptureSession() {
+        captureSession.beginConfiguration()
+        
+        let audioDevice = AVCaptureDevice.default(.builtInMicrophone, for: .audio, position: .unspecified)
+        
+        let audioDeviceInput = try! AVCaptureDeviceInput(device: audioDevice!)
+        guard captureSession.canAddInput(audioDeviceInput) else { return }
+        captureSession.addInput(audioDeviceInput)
+        
+        let serialQueue = DispatchQueue(label: "serialQueue")
+        let audioOutput = AVCaptureAudioDataOutput()
+        audioOutput.setSampleBufferDelegate(self, queue: serialQueue)
+        
+        guard captureSession.canAddOutput(audioOutput) else { return }
+        captureSession.sessionPreset = .inputPriority
+        captureSession.addOutput(audioOutput)
+        captureSession.commitConfiguration()
+    }
+    
+    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
+        var sourceFormat = (sampleBuffer.formatDescription?.audioFormatList[0].mASBD)!
+        var destinationFormat = sourceFormat
+        destinationFormat.mSampleRate = 16000.0
+        
+        var audioConverterRef: AudioConverterRef?
+        let createConverterStatus = AudioConverterNew(&sourceFormat, &destinationFormat, &audioConverterRef)
+        
+        if (createConverterStatus != noErr) {
+            print("Error creating converter")
+        }
+        
+        var quality = kAudioConverterQuality_Max
+        
+        AudioConverterSetProperty(audioConverterRef!, kAudioConverterSampleRateConverterQuality, UInt32(MemoryLayout<UInt32>.size), &quality)
+
+        let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer)
+
+        var pcmLength: Int = 0
+        var pcmData: UnsafeMutablePointer<Int8>?
+        let status: OSStatus = CMBlockBufferGetDataPointer(blockBuffer!, atOffset: 0, lengthAtOffsetOut: nil, totalLengthOut: &pcmLength, dataPointerOut: &pcmData)
+        
+        if status != noErr {
+            print("Error getting something")
+        } else {
+            var input = FillComplexInputParm(source: pcmData!, sourceSize: UInt32(pcmLength))
+            
+            let outputBuffer = malloc(pcmLength)
+            memset(outputBuffer, 0, pcmLength);
+            
+            var outputBufferList = AudioBufferList()
+            outputBufferList.mNumberBuffers = 1
+            outputBufferList.mBuffers.mData = outputBuffer
+            outputBufferList.mBuffers.mDataByteSize = UInt32(Double(pcmLength) * destinationFormat.mSampleRate / sourceFormat.mSampleRate)
+            outputBufferList.mBuffers.mNumberChannels = 1
+
+            func inputDataProc(
+                inAudioConverter: AudioConverterRef,
+                ioNumberDataPacket: UnsafeMutablePointer<UInt32>,
+                ioData: UnsafeMutablePointer<AudioBufferList>,
+                outDataPacketDescription: UnsafeMutablePointer<UnsafeMutablePointer<AudioStreamPacketDescription>?>?,
+                inUserData: UnsafeMutableRawPointer?
+            ) -> OSStatus {
+                var inputPtr = inUserData!.load(as: FillComplexInputParm.self)
+                
+                if (inputPtr.sourceSize <= 0) {
+                    ioNumberDataPacket.pointee = 1
+                    return -1
+                }
+                
+                let rawPtr = UnsafeMutableRawPointer(inputPtr.source)
+                
+                ioData.pointee.mNumberBuffers = 1
+                ioData.pointee.mBuffers.mData = rawPtr
+                ioData.pointee.mBuffers.mDataByteSize = inputPtr.sourceSize
+                ioData.pointee.mBuffers.mNumberChannels = 1
+                
+                ioNumberDataPacket.pointee = (inputPtr.sourceSize / 2)
+                inputPtr.sourceSize = 0
+                
+                return noErr
+            };
+            
+            var packetSize: UInt32 = UInt32(pcmLength / 2)
+            
+            let status: OSStatus = AudioConverterFillComplexBuffer(audioConverterRef!, inputDataProc, &input, &packetSize, &outputBufferList, nil)
+        
+            if (status != noErr) {
+                print("Error: " + status.description)
+            } else {
+                let data = outputBufferList.mBuffers.mData!
+                let byteSize = outputBufferList.mBuffers.mDataByteSize
+                
+                let shorts = UnsafeBufferPointer(start: data.assumingMemoryBound(to: Int16.self), count: Int(byteSize / 2))
+                stream!.feedAudioContent(buffer: shorts)
+                
+                // save bytes to audio data for creating a pcm file later for the captured audio
+                let ptr = UnsafePointer(data.assumingMemoryBound(to: UInt8.self))
+                audioData.append(ptr, count: Int(byteSize))
+            }
+            
+            free(outputBuffer)
+            AudioConverterDispose(audioConverterRef!)
+        }
+    }
+    
+    
+    public func startMicrophoneRecognition() {
+        audioData = Data()
+        stream = try! model.createStream()
+        captureSession.startRunning()
+        print("Started listening...")
+    }
+    
+    private func writeAudioDataToPCMFile() {
+        let documents = NSSearchPathForDirectoriesInDomains(FileManager.SearchPathDirectory.documentDirectory, FileManager.SearchPathDomainMask.userDomainMask, true)[0]
+        let filePath = documents + "/recording.pcm"
+        let url = URL(fileURLWithPath: filePath)
+        try! audioData.write(to: url)
+        print("Saved audio to " + filePath)
+    }
+    
+    public func stopMicrophoneRecognition() {
+        captureSession.stopRunning()
+        
+        let result = stream?.finishStream()
+        print("Result: " + result!)
+        
+        // optional, useful for checking the recorded audio
+        writeAudioDataToPCMFile()
+    }
+    
+    // MARK: Audio file recognition
+    
+    private func render(audioContext: AudioContext?, stream: DeepSpeechStream) {
+        guard let audioContext = audioContext else {
+            fatalError("Couldn't create the audioContext")
+        }
+
+        let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples
+
+        guard let reader = try? AVAssetReader(asset: audioContext.asset)
+            else {
+                fatalError("Couldn't initialize the AVAssetReader")
+        }
+
+        reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound), timescale: audioContext.asset.duration.timescale),
+                                       duration: CMTime(value: Int64(sampleRange.count), timescale: audioContext.asset.duration.timescale))
+
+        let outputSettingsDict: [String : Any] = [
+            AVFormatIDKey: Int(kAudioFormatLinearPCM),
+            AVLinearPCMBitDepthKey: 16,
+            AVLinearPCMIsBigEndianKey: false,
+            AVLinearPCMIsFloatKey: false,
+            AVLinearPCMIsNonInterleaved: false
+        ]
+
+        let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,
+                                                    outputSettings: outputSettingsDict)
+        readerOutput.alwaysCopiesSampleData = false
+        reader.add(readerOutput)
+
+        var sampleBuffer = Data()
+
+        // 16-bit samples
+        reader.startReading()
+        defer { reader.cancelReading() }
+
+        while reader.status == .reading {
+            guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),
+                let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {
+                    break
+            }
+            // Append audio sample buffer into our current sample buffer
+            var readBufferLength = 0
+            var readBufferPointer: UnsafeMutablePointer<Int8>?
+            CMBlockBufferGetDataPointer(readBuffer,
+                                        atOffset: 0,
+                                        lengthAtOffsetOut: &readBufferLength,
+                                        totalLengthOut: nil,
+                                        dataPointerOut: &readBufferPointer)
+            sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
+            CMSampleBufferInvalidate(readSampleBuffer)
+
+            let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size
+            print("read \(totalSamples) samples")
+
+            sampleBuffer.withUnsafeBytes { (samples: UnsafeRawBufferPointer) in
+                let unsafeBufferPointer = samples.bindMemory(to: Int16.self)
+                stream.feedAudioContent(buffer: unsafeBufferPointer)
+            }
+
+            sampleBuffer.removeAll()
+        }
+
+        // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnknown)
+        guard reader.status == .completed else {
+            fatalError("Couldn't read the audio file")
+        }
+    }
+    
+    private func recognizeFile(audioPath: String, completion: @escaping () -> ()) {
+        let url = URL(fileURLWithPath: audioPath)
+
+        let stream = try! model.createStream()
+        print("\(audioPath)")
+        let start = CFAbsoluteTimeGetCurrent()
+        AudioContext.load(fromAudioURL: url, completionHandler: { audioContext in
+            guard let audioContext = audioContext else {
+                fatalError("Couldn't create the audioContext")
+            }
+            self.render(audioContext: audioContext, stream: stream)
+            let result = stream.finishStream()
+            let end = CFAbsoluteTimeGetCurrent()
+            print("\"\(audioPath)\": \(end - start) - \(result)")
+            completion()
+        })
+    }
+    
+    public func recognizeFiles() {
+        // Add file names (without extension) here if you want to test recognition from files.
+        // Remember to add them to the project under Copy Bundle Resources.
+        let files: [String] = []
+
+        let serialQueue = DispatchQueue(label: "serialQueue")
+        let group = DispatchGroup()
+        group.enter()
+        
+        if let first = files.first {
+            serialQueue.async {
+                self.recognizeFile(audioPath: Bundle.main.path(forResource: first, ofType: "wav")!) {
+                    group.leave()
+                }
+            }
+        }
+
+        for path in files.dropFirst() {
+            group.wait()
+            group.enter()
+            self.recognizeFile(audioPath: Bundle.main.path(forResource: path, ofType: "wav")!) {
+                group.leave()
+            }
+        }
+    }
+}