.NET rename

This commit is contained in:
Reuben Morais 2020-08-04 11:15:27 +02:00
parent fa21911048
commit ee7bf86460
39 changed files with 234 additions and 103 deletions

View File

@ -2,9 +2,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30204.135
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DeepSpeechClient", "DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MozillaVoiceSttClient", "MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechConsole", "DeepSpeechConsole\DeepSpeechConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttConsole", "MozillaVoiceSttConsole\MozillaVoiceSttConsole.csproj", "{312965E5-C4F6-4D95-BA64-79906B8BC7AC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution

View File

@ -1,7 +1,7 @@
namespace DeepSpeechClient.Enums
namespace MozillaVoiceSttClient.Enums
{
/// <summary>
/// Error codes from the native DeepSpeech binary.
/// Error codes from the native Mozilla Voice STT binary.
/// </summary>
internal enum ErrorCodes
{

View File

@ -1,9 +1,9 @@
using DeepSpeechClient.Structs;
using MozillaVoiceSttClient.Structs;
using System;
using System.Runtime.InteropServices;
using System.Text;
namespace DeepSpeechClient.Extensions
namespace MozillaVoiceSttClient.Extensions
{
internal static class NativeExtensions
{

View File

@ -1,13 +1,13 @@
using DeepSpeechClient.Models;
using MozillaVoiceSttClient.Models;
using System;
using System.IO;
namespace DeepSpeechClient.Interfaces
namespace MozillaVoiceSttClient.Interfaces
{
/// <summary>
/// Client interface of Mozilla's DeepSpeech implementation.
/// Client interface of Mozilla Voice STT.
/// </summary>
public interface IDeepSpeech : IDisposable
public interface IModel : IDisposable
{
/// <summary>
/// Return version of this library. The returned version is a semantic version
@ -59,7 +59,7 @@ namespace DeepSpeechClient.Interfaces
unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta);
/// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text.
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
@ -68,7 +68,7 @@ namespace DeepSpeechClient.Interfaces
uint aBufferSize);
/// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
@ -83,26 +83,26 @@ namespace DeepSpeechClient.Interfaces
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
unsafe void FreeStream(DeepSpeechStream stream);
unsafe void FreeStream(MozillaVoiceSttStream stream);
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
unsafe DeepSpeechStream CreateStream();
unsafe MozillaVoiceSttStream CreateStream();
/// <summary>
/// Feeds audio samples to an ongoing streaming inference.
/// </summary>
/// <param name="stream">Instance of the stream to feed the data.</param>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize);
unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize);
/// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference.
/// </summary>
/// <param name="stream">Instance of the stream to decode.</param>
/// <returns>The STT intermediate result.</returns>
unsafe string IntermediateDecode(DeepSpeechStream stream);
unsafe string IntermediateDecode(MozillaVoiceSttStream stream);
/// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
@ -110,14 +110,14 @@ namespace DeepSpeechClient.Interfaces
/// <param name="stream">Instance of the stream to decode.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata result.</returns>
unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults);
unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
/// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary>
/// <param name="stream">Instance of the stream to finish.</param>
/// <returns>The STT result.</returns>
unsafe string FinishStream(DeepSpeechStream stream);
unsafe string FinishStream(MozillaVoiceSttStream stream);
/// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
@ -125,6 +125,6 @@ namespace DeepSpeechClient.Interfaces
/// <param name="stream">Instance of the stream to finish.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata result.</returns>
unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults);
unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
}
}

View File

@ -0,0 +1,130 @@
using MozillaVoiceSttClient.Models;
using System;
using System.IO;
namespace MozillaVoiceSttClient.Interfaces
{
/// <summary>
/// Client interface of Mozilla Voice STT.
/// </summary>
public interface IMozillaVoiceSttModel : IDisposable
{
/// <summary>
/// Return version of this library. The returned version is a semantic version
/// (SemVer 2.0.0).
/// </summary>
unsafe string Version();
/// <summary>
/// Return the sample rate expected by the model.
/// </summary>
/// <returns>Sample rate.</returns>
unsafe int GetModelSampleRate();
/// <summary>
/// Get beam width value used by the model. If SetModelBeamWidth was not
/// called before, will return the default value loaded from the model
/// file.
/// </summary>
/// <returns>Beam width value used by the model.</returns>
unsafe uint GetModelBeamWidth();
/// <summary>
/// Set beam width value used by the model.
/// </summary>
/// <param name="aBeamWidth">The beam width used by the decoder. A larger beam width value generates better results at the cost of decoding time.</param>
/// <exception cref="ArgumentException">Thrown on failure.</exception>
unsafe void SetModelBeamWidth(uint aBeamWidth);
/// <summary>
/// Enable decoding using an external scorer.
/// </summary>
/// <param name="aScorerPath">The path to the external scorer file.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to enable decoding with an external scorer.</exception>
/// <exception cref="FileNotFoundException">Thrown when cannot find the scorer file.</exception>
unsafe void EnableExternalScorer(string aScorerPath);
/// <summary>
/// Disable decoding using an external scorer.
/// </summary>
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
unsafe void DisableExternalScorer();
/// <summary>
/// Set hyperparameters alpha and beta of the external scorer.
/// </summary>
/// <param name="aAlpha">The alpha hyperparameter of the decoder. Language model weight.</param>
/// <param name="aBeta">The beta hyperparameter of the decoder. Word insertion weight.</param>
/// <exception cref="ArgumentException">Thrown when an external scorer is not enabled.</exception>
unsafe void SetScorerAlphaBeta(float aAlpha, float aBeta);
/// <summary>
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <returns>The STT result. Returns NULL on error.</returns>
unsafe string SpeechToText(short[] aBuffer,
uint aBufferSize);
/// <summary>
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata. Returns NULL on error.</returns>
unsafe Metadata SpeechToTextWithMetadata(short[] aBuffer,
uint aBufferSize,
uint aNumResults);
/// <summary>
/// Destroy a streaming state without decoding the computed logits.
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
unsafe void FreeStream(MozillaVoiceSttStream stream);
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
unsafe MozillaVoiceSttStream CreateStream();
/// <summary>
/// Feeds audio samples to an ongoing streaming inference.
/// </summary>
/// <param name="stream">Instance of the stream to feed the data.</param>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize);
/// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference.
/// </summary>
/// <param name="stream">Instance of the stream to decode.</param>
/// <returns>The STT intermediate result.</returns>
unsafe string IntermediateDecode(MozillaVoiceSttStream stream);
/// <summary>
/// Computes the intermediate decoding of an ongoing streaming inference, including metadata.
/// </summary>
/// <param name="stream">Instance of the stream to decode.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata result.</returns>
unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
/// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal.
/// </summary>
/// <param name="stream">Instance of the stream to finish.</param>
/// <returns>The STT result.</returns>
unsafe string FinishStream(MozillaVoiceSttStream stream);
/// <summary>
/// Closes the ongoing streaming inference, returns the STT result over the whole audio signal, including metadata.
/// </summary>
/// <param name="stream">Instance of the stream to finish.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata result.</returns>
unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults);
}
}

View File

@ -1,4 +1,4 @@
namespace DeepSpeechClient.Models
namespace MozillaVoiceSttClient.Models
{
/// <summary>
/// Stores the entire CTC output as an array of character metadata objects.

View File

@ -1,19 +1,19 @@
using System;
namespace DeepSpeechClient.Models
namespace MozillaVoiceSttClient.Models
{
/// <summary>
/// Wrapper of the pointer used for the decoding stream.
/// </summary>
public class DeepSpeechStream : IDisposable
public class MozillaVoiceSttStream : IDisposable
{
private unsafe IntPtr** _streamingStatePp;
/// <summary>
/// Initializes a new instance of <see cref="DeepSpeechStream"/>.
/// Initializes a new instance of <see cref="MozillaVoiceSttStream"/>.
/// </summary>
/// <param name="streamingStatePP">Native pointer of the native stream.</param>
public unsafe DeepSpeechStream(IntPtr** streamingStatePP)
public unsafe MozillaVoiceSttStream(IntPtr** streamingStatePP)
{
_streamingStatePp = streamingStatePP;
}

View File

@ -1,4 +1,4 @@
namespace DeepSpeechClient.Models
namespace MozillaVoiceSttClient.Models
{
/// <summary>
/// Stores the entire CTC output as an array of character metadata objects.

View File

@ -1,4 +1,4 @@
namespace DeepSpeechClient.Models
namespace MozillaVoiceSttClient.Models
{
/// <summary>
/// Stores each individual character, along with its timing information.

View File

@ -1,34 +1,34 @@
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Extensions;
using MozillaVoiceStt.Interfaces;
using MozillaVoiceStt.Extensions;
using System;
using System.IO;
using DeepSpeechClient.Enums;
using DeepSpeechClient.Models;
using MozillaVoiceStt.Enums;
using MozillaVoiceStt.Models;
namespace DeepSpeechClient
namespace MozillaVoiceStt
{
/// <summary>
/// Concrete implementation of <see cref="DeepSpeechClient.Interfaces.IDeepSpeech"/>.
/// Concrete implementation of <see cref="MozillaVoiceStt.Interfaces.IMozillaVoiceSttModel"/>.
/// </summary>
public class DeepSpeech : IDeepSpeech
public class MozillaVoiceSttModel : IMozillaVoiceSttModel
{
private unsafe IntPtr** _modelStatePP;
/// <summary>
/// Initializes a new instance of <see cref="DeepSpeech"/> class and creates a new acoustic model.
/// Initializes a new instance of <see cref="MozillaVoiceSttModel"/> class and creates a new acoustic model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
public DeepSpeech(string aModelPath)
public MozillaVoiceSttModel(string aModelPath)
{
CreateModel(aModelPath);
}
#region IDeepSpeech
#region IMozillaVoiceSttModel
/// <summary>
/// Create an object providing an interface to a trained DeepSpeech model.
/// Create an object providing an interface to a trained Mozilla Voice STT model.
/// </summary>
/// <param name="aModelPath">The path to the frozen model graph.</param>
/// <exception cref="ArgumentException">Thrown when the native binary failed to create the model.</exception>
@ -153,7 +153,7 @@ namespace DeepSpeechClient
/// </summary>
/// <param name="stream">Instance of the stream to feed the data.</param>
/// <param name="aBuffer">An array of 16-bit, mono raw audio samples at the appropriate sample rate (matching what the model was trained on).</param>
public unsafe void FeedAudioContent(DeepSpeechStream stream, short[] aBuffer, uint aBufferSize)
public unsafe void FeedAudioContent(MozillaVoiceSttStream stream, short[] aBuffer, uint aBufferSize)
{
NativeImp.STT_FeedAudioContent(stream.GetNativePointer(), aBuffer, aBufferSize);
}
@ -163,7 +163,7 @@ namespace DeepSpeechClient
/// </summary>
/// <param name="stream">Instance of the stream to finish.</param>
/// <returns>The STT result.</returns>
public unsafe string FinishStream(DeepSpeechStream stream)
public unsafe string FinishStream(MozillaVoiceSttStream stream)
{
return NativeImp.STT_FinishStream(stream.GetNativePointer()).PtrToString();
}
@ -174,7 +174,7 @@ namespace DeepSpeechClient
/// <param name="stream">Instance of the stream to finish.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The extended metadata result.</returns>
public unsafe Metadata FinishStreamWithMetadata(DeepSpeechStream stream, uint aNumResults)
public unsafe Metadata FinishStreamWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
{
return NativeImp.STT_FinishStreamWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
}
@ -184,7 +184,7 @@ namespace DeepSpeechClient
/// </summary>
/// <param name="stream">Instance of the stream to decode.</param>
/// <returns>The STT intermediate result.</returns>
public unsafe string IntermediateDecode(DeepSpeechStream stream)
public unsafe string IntermediateDecode(MozillaVoiceSttStream stream)
{
return NativeImp.STT_IntermediateDecode(stream.GetNativePointer()).PtrToString();
}
@ -195,7 +195,7 @@ namespace DeepSpeechClient
/// <param name="stream">Instance of the stream to decode.</param>
/// <param name="aNumResults">Maximum number of candidate transcripts to return. Returned list might be smaller than this.</param>
/// <returns>The STT intermediate result.</returns>
public unsafe Metadata IntermediateDecodeWithMetadata(DeepSpeechStream stream, uint aNumResults)
public unsafe Metadata IntermediateDecodeWithMetadata(MozillaVoiceSttStream stream, uint aNumResults)
{
return NativeImp.STT_IntermediateDecodeWithMetadata(stream.GetNativePointer(), aNumResults).PtrToMetadata();
}
@ -212,12 +212,12 @@ namespace DeepSpeechClient
/// <summary>
/// Creates a new streaming inference state.
/// </summary>
public unsafe DeepSpeechStream CreateStream()
public unsafe MozillaVoiceSttStream CreateStream()
{
IntPtr** streamingStatePointer = null;
var resultCode = NativeImp.STT_CreateStream(_modelStatePP, ref streamingStatePointer);
EvaluateResultCode(resultCode);
return new DeepSpeechStream(streamingStatePointer);
return new MozillaVoiceSttStream(streamingStatePointer);
}
/// <summary>
@ -225,14 +225,14 @@ namespace DeepSpeechClient
/// This can be used if you no longer need the result of an ongoing streaming
/// inference and don't want to perform a costly decode operation.
/// </summary>
public unsafe void FreeStream(DeepSpeechStream stream)
public unsafe void FreeStream(MozillaVoiceSttStream stream)
{
NativeImp.STT_FreeStream(stream.GetNativePointer());
stream.Dispose();
}
/// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text.
/// Use the Mozilla Voice STT model to perform Speech-To-Text.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>
@ -243,7 +243,7 @@ namespace DeepSpeechClient
}
/// <summary>
/// Use the DeepSpeech model to perform Speech-To-Text, return results including metadata.
/// Use the Mozilla Voice STT model to perform Speech-To-Text, return results including metadata.
/// </summary>
/// <param name="aBuffer">A 16-bit, mono raw audio signal at the appropriate sample rate (matching what the model was trained on).</param>
/// <param name="aBufferSize">The number of samples in the audio signal.</param>

View File

@ -1,9 +1,9 @@
using DeepSpeechClient.Enums;
using MozillaVoiceSttClient.Enums;
using System;
using System.Runtime.InteropServices;
namespace DeepSpeechClient
namespace MozillaVoiceSttClient
{
/// <summary>
/// Wrapper for the native implementation of "libmozilla_voice_stt.so"

View File

@ -1,7 +1,7 @@
using System;
using System.Runtime.InteropServices;
namespace DeepSpeechClient.Structs
namespace MozillaVoiceSttClient.Structs
{
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct CandidateTranscript

View File

@ -1,7 +1,7 @@
using System;
using System.Runtime.InteropServices;
namespace DeepSpeechClient.Structs
namespace MozillaVoiceSttClient.Structs
{
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct Metadata

View File

@ -1,7 +1,7 @@
using System;
using System.Runtime.InteropServices;
namespace DeepSpeechClient.Structs
namespace MozillaVoiceSttClient.Structs
{
[StructLayout(LayoutKind.Sequential)]
internal unsafe struct TokenMetadata

View File

@ -6,8 +6,8 @@
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{312965E5-C4F6-4D95-BA64-79906B8BC7AC}</ProjectGuid>
<OutputType>Exe</OutputType>
<RootNamespace>DeepSpeechConsole</RootNamespace>
<AssemblyName>DeepSpeechConsole</AssemblyName>
<RootNamespace>MozillaVoiceSttConsole</RootNamespace>
<AssemblyName>MozillaVoiceSttConsole</AssemblyName>
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
@ -56,9 +56,9 @@
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
<ProjectReference Include="..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj">
<Project>{56DE4091-BBBE-47E4-852D-7268B33B971F}</Project>
<Name>DeepSpeechClient</Name>
<Name>MozillaVoiceSttClient</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>

View File

@ -1,6 +1,6 @@
using DeepSpeechClient;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using MozillaVoiceSttClient;
using MozillaVoiceSttClient.Interfaces;
using MozillaVoiceSttClient.Models;
using NAudio.Wave;
using System;
using System.Collections.Generic;
@ -52,7 +52,7 @@ namespace CSharpExamples
Console.WriteLine("Loading model...");
stopwatch.Start();
// sphinx-doc: csharp_ref_model_start
using (IDeepSpeech sttClient = new DeepSpeech(model ?? "output_graph.pbmm"))
using (IMozillaVoiceSttModel sttClient = new MozillaVoiceSttModel(model ?? "output_graph.pbmm"))
{
// sphinx-doc: csharp_ref_model_stop
stopwatch.Stop();

View File

@ -5,7 +5,7 @@ using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("DeepSpeechConsole")]
[assembly: AssemblyTitle("MozillaVoiceSttConsole")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]

View File

@ -1,8 +1,8 @@
<Application
x:Class="DeepSpeechWPF.App"
x:Class="MozillaVoiceSttWPF.App"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="clr-namespace:DeepSpeechWPF"
xmlns:local="clr-namespace:MozillaVoiceSttWPF"
StartupUri="MainWindow.xaml">
<Application.Resources />
</Application>

View File

@ -1,10 +1,10 @@
using CommonServiceLocator;
using DeepSpeech.WPF.ViewModels;
using DeepSpeechClient.Interfaces;
using MozillaVoiceStt.WPF.ViewModels;
using MozillaVoiceStt.Interfaces;
using GalaSoft.MvvmLight.Ioc;
using System.Windows;
namespace DeepSpeechWPF
namespace MozillaVoiceSttWPF
{
/// <summary>
/// Interaction logic for App.xaml
@ -18,11 +18,11 @@ namespace DeepSpeechWPF
try
{
//Register instance of DeepSpeech
DeepSpeechClient.DeepSpeech deepSpeechClient =
new DeepSpeechClient.DeepSpeech("deepspeech-0.8.0-models.pbmm");
//Register instance of Mozilla Voice STT
MozillaVoiceSttClient.Model client =
new MozillaVoiceSttClient.Model("deepspeech-0.8.0-models.pbmm");
SimpleIoc.Default.Register<IDeepSpeech>(() => deepSpeechClient);
SimpleIoc.Default.Register<IMozillaVoiceSttModel>(() => client);
SimpleIoc.Default.Register<MainWindowViewModel>();
}
catch (System.Exception ex)
@ -35,8 +35,8 @@ namespace DeepSpeechWPF
protected override void OnExit(ExitEventArgs e)
{
base.OnExit(e);
//Dispose instance of DeepSpeech
ServiceLocator.Current.GetInstance<IDeepSpeech>()?.Dispose();
//Dispose instance of Mozilla Voice STT
ServiceLocator.Current.GetInstance<IMozillaVoiceSttModel>()?.Dispose();
}
}
}

View File

@ -1,10 +1,10 @@
<Window
x:Class="DeepSpeechWPF.MainWindow"
x:Class="MozillaVoiceSttWPF.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
Title="Deepspeech client"
Title="Mozilla Voice STT Client"
Width="800"
Height="600"
Loaded="Window_Loaded"

View File

@ -1,8 +1,8 @@
using CommonServiceLocator;
using DeepSpeech.WPF.ViewModels;
using MozillaVoiceStt.WPF.ViewModels;
using System.Windows;
namespace DeepSpeechWPF
namespace MozillaVoiceSttWPF
{
/// <summary>
/// Interaction logic for MainWindow.xaml

View File

@ -6,8 +6,8 @@
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{54BFD766-4305-4F4C-BA59-AF45505DF3C1}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>DeepSpeech.WPF</RootNamespace>
<AssemblyName>DeepSpeech.WPF</AssemblyName>
<RootNamespace>MozillaVoiceStt.WPF</RootNamespace>
<AssemblyName>MozillaVoiceStt.WPF</AssemblyName>
<TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
@ -131,9 +131,9 @@
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\DeepSpeechClient\DeepSpeechClient.csproj">
<ProjectReference Include="..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj">
<Project>{56de4091-bbbe-47e4-852d-7268b33b971f}</Project>
<Name>DeepSpeechClient</Name>
<Name>MozillaVoiceSttClient</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />

View File

@ -3,9 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28307.421
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceStt.WPF", "MozillaVoiceStt.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MozillaVoiceSttClient", "..\MozillaVoiceSttClient\MozillaVoiceSttClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution

View File

@ -7,11 +7,11 @@ using System.Windows;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("DeepSpeech.WPF")]
[assembly: AssemblyTitle("MozillaVoiceStt.WPF")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("DeepSpeech.WPF.SingleFiles")]
[assembly: AssemblyProduct("MozillaVoiceStt.WPF.SingleFiles")]
[assembly: AssemblyCopyright("Copyright © 2018")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

View File

@ -8,7 +8,7 @@
// </auto-generated>
//------------------------------------------------------------------------------
namespace DeepSpeech.WPF.Properties {
namespace MozillaVoiceStt.WPF.Properties {
using System;
@ -39,7 +39,7 @@ namespace DeepSpeech.WPF.Properties {
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepSpeech.WPF.Properties.Resources", typeof(Resources).Assembly);
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MozillaVoiceStt.WPF.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;

View File

@ -8,7 +8,7 @@
// </auto-generated>
//------------------------------------------------------------------------------
namespace DeepSpeech.WPF.Properties {
namespace MozillaVoiceStt.WPF.Properties {
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]

View File

@ -3,7 +3,7 @@ using System.Collections.Generic;
using System.ComponentModel;
using System.Runtime.CompilerServices;
namespace DeepSpeech.WPF.ViewModels
namespace MozillaVoiceStt.WPF.ViewModels
{
/// <summary>
/// Implementation of <see cref="INotifyPropertyChanged"/> to simplify models.

View File

@ -3,8 +3,8 @@ using CSCore;
using CSCore.CoreAudioAPI;
using CSCore.SoundIn;
using CSCore.Streams;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
using MozillaVoiceSttClient.Interfaces;
using MozillaVoiceSttClient.Models;
using GalaSoft.MvvmLight.CommandWpf;
using Microsoft.Win32;
using System;
@ -15,7 +15,7 @@ using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace DeepSpeech.WPF.ViewModels
namespace MozillaVoiceStt.WPF.ViewModels
{
/// <summary>
/// View model of the MainWindow View.
@ -27,7 +27,7 @@ namespace DeepSpeech.WPF.ViewModels
private const string ScorerPath = "kenlm.scorer";
#endregion
private readonly IDeepSpeech _sttClient;
private readonly IMozillaVoiceSttModel _sttClient;
#region Commands
/// <summary>
@ -62,7 +62,7 @@ namespace DeepSpeech.WPF.ViewModels
/// <summary>
/// Stream used to feed data into the acoustic model.
/// </summary>
private DeepSpeechStream _sttStream;
private MozillaVoiceSttStream _sttStream;
/// <summary>
/// Records the audio of the selected device.
@ -75,7 +75,7 @@ namespace DeepSpeech.WPF.ViewModels
private SoundInSource _soundInSource;
/// <summary>
/// Target wave source.(16KHz Mono 16bit for DeepSpeech)
/// Target wave source.(16KHz Mono 16bit for Mozilla Voice STT)
/// </summary>
private IWaveSource _convertedSource;
@ -200,7 +200,7 @@ namespace DeepSpeech.WPF.ViewModels
#endregion
#region Ctors
public MainWindowViewModel(IDeepSpeech sttClient)
public MainWindowViewModel(IMozillaVoiceSttModel sttClient)
{
_sttClient = sttClient;
@ -290,7 +290,8 @@ namespace DeepSpeech.WPF.ViewModels
//read data from the converedSource
//important: don't use the e.Data here
//the e.Data contains the raw data provided by the
//soundInSource which won't have the deepspeech required audio format
//soundInSource which won't have the Mozilla Voice STT required
// audio format
byte[] buffer = new byte[_convertedSource.WaveFormat.BytesPerSecond / 2];
int read;

View File

@ -1,8 +1,8 @@
Building DeepSpeech native client for Windows
Building Mozilla Voice STT native client for Windows
=============================================
Now we can build the native client of DeepSpeech and run inference on Windows using the C# client, to do that we need to compile the ``native_client``.
Now we can build the native client of Mozilla Voice STT and run inference on Windows using the C# client, to do that we need to compile the ``native_client``.
**Table of Contents**
@ -59,8 +59,8 @@ There should already be a symbolic link, for this example let's suppose that we
.
├── D:\
│ ├── cloned # Contains DeepSpeech and tensorflow side by side
│ │ └── DeepSpeech # Root of the cloned DeepSpeech
│ ├── cloned # Contains Mozilla Voice STT and tensorflow side by side
│ │ └── DeepSpeech # Root of the cloned Mozilla Voice STT
│ │ ├── tensorflow # Root of the cloned Mozilla's tensorflow
└── ...
@ -142,4 +142,4 @@ Be patient, if you enabled AVX/AVX2 and CUDA it will take a long time. Finally y
Using the generated library
---------------------------
As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ <https://github.com/mozilla/DeepSpeech/tree/master/native_client/dotnet>`_ in your DeepSpeech directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory.
As for now we can only use the generated ``libmozilla_voice_stt.so`` with the C# clients, go to `native_client/dotnet/ <https://github.com/mozilla/DeepSpeech/tree/master/native_client/dotnet>`_ in your Mozilla Voice STT directory and open the Visual Studio solution, then we need to build in debug or release mode, finally we just need to copy ``libmozilla_voice_stt.so`` to the generated ``x64/Debug`` or ``x64/Release`` directory.

View File

@ -3,13 +3,13 @@
<metadata>
<id>$NUPKG_ID</id>
<version>$NUPKG_VERSION</version>
<title>Mozilla_Voice_STT</title>
<title>Mozilla.Voice.STT</title>
<authors>Mozilla</authors>
<owners>Mozilla</owners>
<license type="expression">MPL-2.0</license>
<projectUrl>http://github.com/mozilla/DeepSpeech</projectUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>A library for running inference with a DeepSpeech model</description>
<description>A library for running inference with a Mozilla Voice STT model</description>
<copyright>Copyright (c) 2019 Mozilla Corporation</copyright>
<tags>native speech speech_recognition</tags>
</metadata>