STT/native_client/dotnet/DeepSpeechWPF/ViewModels/MainWindowViewModel.cs
Reuben Morais ae0cf8db6a Revert "Merge branch 'rename-real'"
This reverts commit ae9fdb183e, reversing
changes made to 2eb75b6206.
2020-08-26 11:46:09 +02:00

425 lines
14 KiB
C#

using AsyncAwaitBestPractices.MVVM;
using CSCore;
using CSCore.CoreAudioAPI;
using CSCore.SoundIn;
using CSCore.Streams;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using GalaSoft.MvvmLight.CommandWpf;
using Microsoft.Win32;
using System;
using System.Collections.Concurrent;
using System.Collections.ObjectModel;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace DeepSpeech.WPF.ViewModels
{
/// <summary>
/// View model of the MainWindow View.
/// </summary>
public class MainWindowViewModel : BindableBase
{
#region Constants
private const int SampleRate = 16000;
private const string ScorerPath = "kenlm.scorer";
#endregion
private readonly IDeepSpeech _sttClient;
#region Commands
/// <summary>
/// Gets or sets the command that enables the external scorer.
/// </summary>
public IAsyncCommand EnableExternalScorerCommand { get; private set; }
/// <summary>
/// Gets or sets the command that runs inference using an audio file.
/// </summary>
public IAsyncCommand InferenceFromFileCommand { get; private set; }
/// <summary>
/// Gets or sets the command that opens a dialog to select an audio file.
/// </summary>
public RelayCommand SelectFileCommand { get; private set; }
/// <summary>
/// Gets or sets the command that starts to record.
/// </summary>
public RelayCommand StartRecordingCommand { get; private set; }
/// <summary>
/// Gets or sets the command that stops the recording and gets the result.
/// </summary>
public IAsyncCommand StopRecordingCommand { get; private set; }
#endregion
#region Streaming
/// <summary>
/// Stream used to feed data into the acoustic model.
/// </summary>
private DeepSpeechStream _sttStream;
/// <summary>
/// Records the audio of the selected device.
/// </summary>
private WasapiCapture _audioCapture;
/// <summary>
/// Converts the device source into a wavesource.
/// </summary>
private SoundInSource _soundInSource;
/// <summary>
/// Target wave source.(16KHz Mono 16bit for DeepSpeech)
/// </summary>
private IWaveSource _convertedSource;
/// <summary>
/// Queue that prevents feeding data to the inference engine if it is busy.
/// </summary>
private ConcurrentQueue<short[]> _bufferQueue = new ConcurrentQueue<short[]>();
private int _threadSafeBoolBackValue = 0;
/// <summary>
/// Lock to process items in the queue one at time.
/// </summary>
public bool StreamingIsBusy
{
get => (Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 1, 1) == 1);
set
{
if (value) Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 1, 0);
else Interlocked.CompareExchange(ref _threadSafeBoolBackValue, 0, 1);
}
}
#endregion
#region ViewProperties
private bool _enableStartRecord;
/// <summary>
/// Gets or sets record status to control the record command.
/// </summary>
public bool EnableStartRecord
{
get => _enableStartRecord;
set => SetProperty(ref _enableStartRecord, value);
}
private bool _stopRecordStopRecord;
/// <summary>
/// Gets or sets record status to control stop command.
/// </summary>
public bool EnableStopRecord
{
get => _stopRecordStopRecord;
set => SetProperty(ref _stopRecordStopRecord, value,
onChanged: ()=> ((AsyncCommand)StopRecordingCommand).RaiseCanExecuteChanged());
}
private MMDevice _selectedDevice;
/// <summary>
/// Gets or sets the selected recording device.
/// </summary>
public MMDevice SelectedDevice
{
get => _selectedDevice;
set => SetProperty(ref _selectedDevice, value,
onChanged: UpdateSelectedDevice);
}
private string _statusMessage;
/// <summary>
/// Gets or sets status message.
/// </summary>
public string StatusMessage
{
get => _statusMessage;
set => SetProperty(ref _statusMessage, value);
}
private bool _externalScorerEnabled;
/// <summary>
/// Gets or sets the external scorer status.
/// </summary>
private bool ExternalScorerEnabled
{
get => _externalScorerEnabled;
set => SetProperty(ref _externalScorerEnabled, value,
onChanged: () => ((AsyncCommand)EnableExternalScorerCommand).RaiseCanExecuteChanged());
}
private bool _isRunningInference;
/// <summary>
/// Gets or sets whenever the model is running inference.
/// </summary>
private bool IsRunningInference
{
get => _isRunningInference;
set => SetProperty(ref _isRunningInference, value,
onChanged: () => ((AsyncCommand)InferenceFromFileCommand).RaiseCanExecuteChanged());
}
private string _transcription;
/// <summary>
/// Gets or sets the current transcription.
/// </summary>
public string Transcription
{
get => _transcription;
set => SetProperty(ref _transcription, value);
}
private string _audioFilePaht;
/// <summary>
/// Gets or sets the selected audio file path.
/// </summary>
public string AudioFilePath
{
get => _audioFilePaht;
set => SetProperty(ref _audioFilePaht, value);
}
private ObservableCollection<MMDevice> _deviceNames;
/// <summary>
/// Gets or sets the available recording devices.
/// </summary>
public ObservableCollection<MMDevice> AvailableRecordDevices
{
get => _deviceNames;
set => SetProperty(ref _deviceNames, value);
}
#endregion
#region Ctors
public MainWindowViewModel(IDeepSpeech sttClient)
{
_sttClient = sttClient;
EnableExternalScorerCommand = new AsyncCommand(()=>EnableExternalScorerAsync(ScorerPath),
_ => !ExternalScorerEnabled);
InferenceFromFileCommand = new AsyncCommand(ExecuteInferenceFromFileAsync,
_ => !IsRunningInference);
SelectFileCommand = new RelayCommand(SelectAudioFile);
StartRecordingCommand = new RelayCommand(StartRecording,
canExecute: CanExecuteStartRecording);
StopRecordingCommand = new AsyncCommand(StopRecordingAsync,
_ => EnableStopRecord);
LoadAvailableCaptureDevices();
}
#endregion
/// <summary>
/// Releases the current capture device and initializes the selected one.
/// </summary>
private void UpdateSelectedDevice()
{
ReleaseCapture();
InitializeAudioCapture();
}
/// <summary>
/// Releases the capture device.
/// </summary>
private void ReleaseCapture()
{
if (_audioCapture != null)
{
_audioCapture.DataAvailable -= Capture_DataAvailable;
_audioCapture.Dispose();
}
}
/// <summary>
/// Command usage to know when the recording can start.
/// </summary>
/// <returns>If the device is not null.</returns>
private bool CanExecuteStartRecording() =>
SelectedDevice != null;
/// <summary>
/// Loads all the available audio capture devices.
/// </summary>
private void LoadAvailableCaptureDevices()
{
AvailableRecordDevices = new ObservableCollection<MMDevice>(
MMDeviceEnumerator.EnumerateDevices(DataFlow.All, DeviceState.Active)); //we get only enabled devices
EnableStartRecord = true;
if (AvailableRecordDevices?.Count != 0)
SelectedDevice = AvailableRecordDevices[0];
}
/// <summary>
/// Initializes the capture source.
/// </summary>
private void InitializeAudioCapture()
{
if (SelectedDevice != null)
{
_audioCapture = SelectedDevice.DataFlow == DataFlow.Capture ?
new WasapiCapture() : new WasapiLoopbackCapture();
_audioCapture.Device = SelectedDevice;
_audioCapture.Initialize();
_audioCapture.DataAvailable += Capture_DataAvailable;
_soundInSource = new SoundInSource(_audioCapture) { FillWithZeros = false };
//create a source, that converts the data provided by the
//soundInSource to required format
_convertedSource = _soundInSource
.ChangeSampleRate(SampleRate) // sample rate
.ToSampleSource()
.ToWaveSource(16); //bits per sample
_convertedSource = _convertedSource.ToMono();
}
}
private void Capture_DataAvailable(object sender, DataAvailableEventArgs e)
{
//read data from the converedSource
//important: don't use the e.Data here
//the e.Data contains the raw data provided by the
//soundInSource which won't have the deepspeech required audio format
byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
int read;
//keep reading as long as we still get some data
while ((read = _convertedSource.Read(buffer, 0, buffer.Length)) > 0)
{
short[] sdata = new short[(int)Math.Ceiling(Convert.ToDecimal(read / 2))];
Buffer.BlockCopy(buffer, 0, sdata, 0, read);
_bufferQueue.Enqueue(sdata);
Task.Run(() => OnNewData());
}
}
/// <summary>
/// Starts processing data from the queue.
/// </summary>
private void OnNewData()
{
while (!StreamingIsBusy && !_bufferQueue.IsEmpty)
{
if (_bufferQueue.TryDequeue(out short[] buffer))
{
StreamingIsBusy = true;
_sttClient.FeedAudioContent(_sttStream, buffer, Convert.ToUInt32(buffer.Length));
StreamingIsBusy = false;
}
}
}
/// <summary>
/// Enables the external scorer.
/// </summary>
/// <param name="scorerPath">External scorer path.</param>
/// <returns>A Task to await.</returns>
public async Task EnableExternalScorerAsync(string scorerPath)
{
try
{
StatusMessage = "Loading external scorer...";
await Task.Run(() => _sttClient.EnableExternalScorer(ScorerPath));
ExternalScorerEnabled = true;
StatusMessage = "External scorer loaded.";
}
catch (Exception ex)
{
StatusMessage = ex.Message;
}
}
/// <summary>
/// Runs inference and sets the transcription of an audio file.
/// </summary>
/// <returns>A Task to await.</returns>
public async Task ExecuteInferenceFromFileAsync()
{
try
{
IsRunningInference = true;
Transcription = string.Empty;
StatusMessage = "Running inference...";
Stopwatch watch = new Stopwatch();
var waveBuffer = new NAudio.Wave.WaveBuffer(File.ReadAllBytes(AudioFilePath));
using (var waveInfo = new NAudio.Wave.WaveFileReader(AudioFilePath))
{
watch.Start();
string speechResult = await Task.Run(() => _sttClient.SpeechToText(
waveBuffer.ShortBuffer,
Convert.ToUInt32(waveBuffer.MaxSize / 2)));
watch.Stop();
Transcription = $"Audio duration: {waveInfo.TotalTime.ToString()} {Environment.NewLine}" +
$"Inference took: {watch.Elapsed.ToString()} {Environment.NewLine}" +
$"Recognized text: {speechResult}";
}
waveBuffer.Clear();
StatusMessage = string.Empty;
}
catch (Exception ex)
{
StatusMessage = ex.Message;
}
finally
{
IsRunningInference = false;
}
}
/// <summary>
/// Stops the recording and sets the transcription of the closed stream.
/// </summary>
/// <returns>A Task to await.</returns>
private async Task StopRecordingAsync()
{
EnableStopRecord = false;
_audioCapture.Stop();
while (!_bufferQueue.IsEmpty && StreamingIsBusy) //we wait for all the queued buffers to be processed
{
await Task.Delay(90);
}
Transcription = _sttClient.FinishStream(_sttStream);
EnableStartRecord = true;
}
/// <summary>
/// Creates a new stream and starts the recording.
/// </summary>
private void StartRecording()
{
_sttStream =_sttClient.CreateStream();
_audioCapture.Start();
EnableStartRecord = false;
EnableStopRecord = true;
}
/// <summary>
/// Opens a dialog to select an audio file.
/// </summary>
private void SelectAudioFile()
{
OpenFileDialog dialog = new OpenFileDialog
{
Filter = "wav Files |*.wav",
Multiselect = false,
Title = "Please select a wav file."
};
if ((bool)dialog.ShowDialog())
{
AudioFilePath = dialog.FileName;
}
}
}
}