diff --git a/examples/README.rst b/examples/README.rst index 88ebd87c..f5ebb1bd 100644 --- a/examples/README.rst +++ b/examples/README.rst @@ -1,17 +1,6 @@ Examples ======== -There are various examples on how to use or integrate DeepSpeech on a basic level. -It is a good way to just try out DeepSpeech, without much knowledge on how DeepSpeech actually works. - -**List of examples** +DeepSpeech examples were moved to a separate repository. -* `Microphone VAD streaming `_ - -* `FFMPEG VAD streaming `_ - -* `.NET framework `_ - -* `Node.JS wav `_ - -* `VAD transcriber `_ +New location: https://github.com/mozilla/DeepSpeech-examples diff --git a/examples/ffmpeg_vad_streaming/README.MD b/examples/ffmpeg_vad_streaming/README.MD deleted file mode 100644 index 7c1de91a..00000000 --- a/examples/ffmpeg_vad_streaming/README.MD +++ /dev/null @@ -1,62 +0,0 @@ -# FFmpeg VAD Streaming - -Streaming inference from arbitrary source (FFmpeg input) to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Node.js. - -This example was successfully tested with a mobile phone streaming a live feed to a RTMP server (nginx-rtmp), which then could be used by this script for near real time speech recognition. - -## Installation - -```bash -npm install -``` - -Moreover FFmpeg must be installed: - -```bash -sudo apt-get install ffmpeg -``` - -## Usage - -Here is an example for a local audio file: -```bash -node ./index.js --audio \ - --model $HOME/models/output_graph.pbmm \ -``` - -Here is an example for a remote RTMP-Stream: -```bash -node ./index.js --audio rtmp://:1935/live/teststream \ - --model $HOME/models/output_graph.pbmm \ -``` - -## Examples -Real time streaming inference with DeepSpeech's example audio ([audio-0.4.1.tar.gz](https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz)). -```bash -node ./index.js --audio $HOME/audio/2830-3980-0043.wav \ - --lm $HOME/models/lm.binary \ - --trie $HOME/models/trie \ - --model $HOME/models/output_graph.pbmm \ -``` -```bash -node ./index.js --audio $HOME/audio/4507-16021-0012.wav \ - --lm $HOME/models/lm.binary \ - --trie $HOME/models/trie \ - --model $HOME/models/output_graph.pbmm \ -``` -```bash -node ./index.js --audio $HOME/audio/8455-210777-0068.wav \ - --lm $HOME/models/lm.binary \ - --trie $HOME/models/trie \ - --model $HOME/models/output_graph.pbmm \ -``` -Real time streaming inference in combination with a RTMP server. -```bash -node ./index.js --audio rtmp://// \ - --lm $HOME/models/lm.binary \ - --trie $HOME/models/trie \ - --model $HOME/models/output_graph.pbmm \ -``` - -## Notes -To get the best result mapped on to your own scenario, it might be helpful to adjust the parameters `VAD_MODE` and `DEBUNCE_TIME`. \ No newline at end of file diff --git a/examples/ffmpeg_vad_streaming/index.js b/examples/ffmpeg_vad_streaming/index.js deleted file mode 100644 index 05d5b49b..00000000 --- a/examples/ffmpeg_vad_streaming/index.js +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env node - -const VAD = require("node-vad"); -const Ds = require('deepspeech'); -const argparse = require('argparse'); -const util = require('util'); -const { spawn } = require('child_process'); - -// These constants control the beam search decoder - -// Beam width used in the CTC decoder when building candidate transcriptions -const BEAM_WIDTH = 500; - -// The alpha hyperparameter of the CTC decoder. Language Model weight -const LM_ALPHA = 0.75; - -// The beta hyperparameter of the CTC decoder. Word insertion bonus. -const LM_BETA = 1.85; - -let VersionAction = function VersionAction(options) { - options = options || {}; - options.nargs = 0; - argparse.Action.call(this, options); -}; - -util.inherits(VersionAction, argparse.Action); - -VersionAction.prototype.call = function(parser) { - Ds.printVersions(); - process.exit(0); -}; - -let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'}); -parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'}); -parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'}); -parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'}); -parser.addArgument(['--audio'], {required: true, help: 'Path to the audio source to run (ffmpeg supported formats)'}); -parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'}); -let args = parser.parseArgs(); - -function totalTime(hrtimeValue) { - return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4); -} - -console.error('Loading model from file %s', args['model']); -const model_load_start = process.hrtime(); -let model = new Ds.Model(args['model'], BEAM_WIDTH); -const model_load_end = process.hrtime(model_load_start); -console.error('Loaded model in %ds.', totalTime(model_load_end)); - -if (args['lm'] && args['trie']) { - console.error('Loading language model from files %s %s', args['lm'], args['trie']); - const lm_load_start = process.hrtime(); - model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA); - const lm_load_end = process.hrtime(lm_load_start); - console.error('Loaded language model in %ds.', totalTime(lm_load_end)); -} - -// Default is 16kHz -const AUDIO_SAMPLE_RATE = 16000; - -// Defines different thresholds for voice detection -// NORMAL: Suitable for high bitrate, low-noise data. May classify noise as voice, too. -// LOW_BITRATE: Detection mode optimised for low-bitrate audio. -// AGGRESSIVE: Detection mode best suited for somewhat noisy, lower quality audio. -// VERY_AGGRESSIVE: Detection mode with lowest miss-rate. Works well for most inputs. -const VAD_MODE = VAD.Mode.NORMAL; -// const VAD_MODE = VAD.Mode.LOW_BITRATE; -// const VAD_MODE = VAD.Mode.AGGRESSIVE; -// const VAD_MODE = VAD.Mode.VERY_AGGRESSIVE; - -// Time in milliseconds for debouncing speech active state -const DEBOUNCE_TIME = 20; - -// Create voice activity stream -const VAD_STREAM = VAD.createStream({ - mode: VAD_MODE, - audioFrequency: AUDIO_SAMPLE_RATE, - debounceTime: DEBOUNCE_TIME -}); - -// Spawn ffmpeg process -const ffmpeg = spawn('ffmpeg', [ - '-hide_banner', - '-nostats', - '-loglevel', 'fatal', - '-i', args['audio'], - '-vn', - '-acodec', 'pcm_s16le', - '-ac', 1, - '-ar', AUDIO_SAMPLE_RATE, - '-f', 's16le', - 'pipe:' -]); - -let audioLength = 0; -let sctx = model.createStream(); - -function finishStream() { - const model_load_start = process.hrtime(); - console.error('Running inference.'); - console.log('Transcription: ', model.finishStream(sctx)); - const model_load_end = process.hrtime(model_load_start); - console.error('Inference took %ds for %ds audio file.', totalTime(model_load_end), audioLength.toPrecision(4)); - audioLength = 0; -} - -function intermediateDecode() { - finishStream(); - sctx = model.createStream(); -} - -function feedAudioContent(chunk) { - audioLength += (chunk.length / 2) * ( 1 / AUDIO_SAMPLE_RATE); - model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2)); -} - -function processVad(data) { - if (data.speech.start||data.speech.state) feedAudioContent(data.audioData) - else if (data.speech.end) { feedAudioContent(data.audioData); intermediateDecode() } -} - -ffmpeg.stdout.pipe(VAD_STREAM).on('data', processVad); diff --git a/examples/ffmpeg_vad_streaming/package.json b/examples/ffmpeg_vad_streaming/package.json deleted file mode 100644 index 1693b6f6..00000000 --- a/examples/ffmpeg_vad_streaming/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "ffmpeg-vad-streaming", - "version": "1.0.0", - "description": "Streaming inference from arbitrary source with VAD and FFmpeg", - "main": "index.js", - "scripts": { - "start": "node ./index.js" - }, - "dependencies": { - "argparse": "^1.0.10", - "deepspeech": "0.6.0", - "node-vad": "^1.1.1", - "util": "^0.11.1" - }, - "license" : "MIT" -} diff --git a/examples/ffmpeg_vad_streaming/test.sh b/examples/ffmpeg_vad_streaming/test.sh deleted file mode 100755 index 3966ca96..00000000 --- a/examples/ffmpeg_vad_streaming/test.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -xe - -THIS=$(dirname "$0") - -pushd ${THIS} - source ../tests.sh - - npm install $(get_npm_package_url) - npm install - - node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \ - --lm $HOME/DeepSpeech/models/lm.binary \ - --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm - - node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \ - --lm $HOME/DeepSpeech/models/lm.binary \ - --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm - - node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \ - --lm $HOME/DeepSpeech/models/lm.binary \ - --trie $HOME/DeepSpeech/models/trie \ - --model $HOME/DeepSpeech/models/output_graph.pbmm -popd diff --git a/examples/mic_vad_streaming/README.rst b/examples/mic_vad_streaming/README.rst deleted file mode 100644 index 4eace37d..00000000 --- a/examples/mic_vad_streaming/README.rst +++ /dev/null @@ -1,69 +0,0 @@ - -Microphone VAD Streaming -======================== - -Stream from microphone to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Python. Also useful for quick, real-time testing of models and decoding parameters. - -Installation ------------- - -.. code-block:: bash - - pip install -r requirements.txt - -Uses portaudio for microphone access, so on Linux, you may need to install its header files to compile the ``pyaudio`` package: - -.. code-block:: bash - - sudo apt install portaudio19-dev - -Installation on MacOS may fail due to portaudio, use brew to install it: - -.. code-block:: bash - - brew install portaudio - -Usage ------ - -.. code-block:: - - usage: mic_vad_streaming.py [-h] [-v VAD_AGGRESSIVENESS] [--nospinner] - [-w SAVEWAV] -m MODEL [-l LM] - [-t TRIE] [-nf N_FEATURES] [-nc N_CONTEXT] - [-la LM_ALPHA] [-lb LM_BETA] - [-bw BEAM_WIDTH] - - Stream from microphone to DeepSpeech using VAD - - optional arguments: - -h, --help show this help message and exit - -v VAD_AGGRESSIVENESS, --vad_aggressiveness VAD_AGGRESSIVENESS - Set aggressiveness of VAD: an integer between 0 and 3, - 0 being the least aggressive about filtering out non- - speech, 3 the most aggressive. Default: 3 - --nospinner Disable spinner - -w SAVEWAV, --savewav SAVEWAV - Save .wav files of utterences to given directory - -m MODEL, --model MODEL - Path to the model (protocol buffer binary file, or - entire directory containing all standard-named files - for model) - -l LM, --lm LM Path to the language model binary file. Default: - lm.binary - -t TRIE, --trie TRIE Path to the language model trie file created with - native_client/generate_trie. Default: trie - -nf N_FEATURES, --n_features N_FEATURES - Number of MFCC features to use. Default: 26 - -nc N_CONTEXT, --n_context N_CONTEXT - Size of the context window used for producing - timesteps in the input vector. Default: 9 - -la LM_ALPHA, --lm_alpha LM_ALPHA - The alpha hyperparameter of the CTC decoder. Language - Model weight. Default: 0.75 - -lb LM_BETA, --lm_beta LM_BETA - The beta hyperparameter of the CTC decoder. Word insertion - bonus. Default: 1.85 - -bw BEAM_WIDTH, --beam_width BEAM_WIDTH - Beam width used in the CTC decoder when building - candidate transcriptions. Default: 500 diff --git a/examples/mic_vad_streaming/mic_vad_streaming.py b/examples/mic_vad_streaming/mic_vad_streaming.py deleted file mode 100755 index 6933c0dd..00000000 --- a/examples/mic_vad_streaming/mic_vad_streaming.py +++ /dev/null @@ -1,237 +0,0 @@ -import time, logging -from datetime import datetime -import threading, collections, queue, os, os.path -import deepspeech -import numpy as np -import pyaudio -import wave -import webrtcvad -from halo import Halo -from scipy import signal - -logging.basicConfig(level=20) - -class Audio(object): - """Streams raw audio from microphone. Data is received in a separate thread, and stored in a buffer, to be read from.""" - - FORMAT = pyaudio.paInt16 - # Network/VAD rate-space - RATE_PROCESS = 16000 - CHANNELS = 1 - BLOCKS_PER_SECOND = 50 - - def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS, file=None): - def proxy_callback(in_data, frame_count, time_info, status): - #pylint: disable=unused-argument - if self.chunk is not None: - in_data = self.wf.readframes(self.chunk) - callback(in_data) - return (None, pyaudio.paContinue) - if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data) - self.buffer_queue = queue.Queue() - self.device = device - self.input_rate = input_rate - self.sample_rate = self.RATE_PROCESS - self.block_size = int(self.RATE_PROCESS / float(self.BLOCKS_PER_SECOND)) - self.block_size_input = int(self.input_rate / float(self.BLOCKS_PER_SECOND)) - self.pa = pyaudio.PyAudio() - - kwargs = { - 'format': self.FORMAT, - 'channels': self.CHANNELS, - 'rate': self.input_rate, - 'input': True, - 'frames_per_buffer': self.block_size_input, - 'stream_callback': proxy_callback, - } - - self.chunk = None - # if not default device - if self.device: - kwargs['input_device_index'] = self.device - elif file is not None: - self.chunk = 320 - self.wf = wave.open(file, 'rb') - - self.stream = self.pa.open(**kwargs) - self.stream.start_stream() - - def resample(self, data, input_rate): - """ - Microphone may not support our native processing sampling rate, so - resample from input_rate to RATE_PROCESS here for webrtcvad and - deepspeech - - Args: - data (binary): Input audio stream - input_rate (int): Input audio rate to resample from - """ - data16 = np.fromstring(string=data, dtype=np.int16) - resample_size = int(len(data16) / self.input_rate * self.RATE_PROCESS) - resample = signal.resample(data16, resample_size) - resample16 = np.array(resample, dtype=np.int16) - return resample16.tostring() - - def read_resampled(self): - """Return a block of audio data resampled to 16000hz, blocking if necessary.""" - return self.resample(data=self.buffer_queue.get(), - input_rate=self.input_rate) - - def read(self): - """Return a block of audio data, blocking if necessary.""" - return self.buffer_queue.get() - - def destroy(self): - self.stream.stop_stream() - self.stream.close() - self.pa.terminate() - - frame_duration_ms = property(lambda self: 1000 * self.block_size // self.sample_rate) - - def write_wav(self, filename, data): - logging.info("write wav %s", filename) - wf = wave.open(filename, 'wb') - wf.setnchannels(self.CHANNELS) - # wf.setsampwidth(self.pa.get_sample_size(FORMAT)) - assert self.FORMAT == pyaudio.paInt16 - wf.setsampwidth(2) - wf.setframerate(self.sample_rate) - wf.writeframes(data) - wf.close() - - -class VADAudio(Audio): - """Filter & segment audio with voice activity detection.""" - - def __init__(self, aggressiveness=3, device=None, input_rate=None, file=None): - super().__init__(device=device, input_rate=input_rate, file=file) - self.vad = webrtcvad.Vad(aggressiveness) - - def frame_generator(self): - """Generator that yields all audio frames from microphone.""" - if self.input_rate == self.RATE_PROCESS: - while True: - yield self.read() - else: - while True: - yield self.read_resampled() - - def vad_collector(self, padding_ms=300, ratio=0.75, frames=None): - """Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None. - Determines voice activity by ratio of frames in padding_ms. Uses a buffer to include padding_ms prior to being triggered. - Example: (frame, ..., frame, None, frame, ..., frame, None, ...) - |---utterence---| |---utterence---| - """ - if frames is None: frames = self.frame_generator() - num_padding_frames = padding_ms // self.frame_duration_ms - ring_buffer = collections.deque(maxlen=num_padding_frames) - triggered = False - - for frame in frames: - if len(frame) < 640: - return - - is_speech = self.vad.is_speech(frame, self.sample_rate) - - if not triggered: - ring_buffer.append((frame, is_speech)) - num_voiced = len([f for f, speech in ring_buffer if speech]) - if num_voiced > ratio * ring_buffer.maxlen: - triggered = True - for f, s in ring_buffer: - yield f - ring_buffer.clear() - - else: - yield frame - ring_buffer.append((frame, is_speech)) - num_unvoiced = len([f for f, speech in ring_buffer if not speech]) - if num_unvoiced > ratio * ring_buffer.maxlen: - triggered = False - yield None - ring_buffer.clear() - -def main(ARGS): - # Load DeepSpeech model - if os.path.isdir(ARGS.model): - model_dir = ARGS.model - ARGS.model = os.path.join(model_dir, 'output_graph.pb') - ARGS.lm = os.path.join(model_dir, ARGS.lm) - ARGS.trie = os.path.join(model_dir, ARGS.trie) - - print('Initializing model...') - logging.info("ARGS.model: %s", ARGS.model) - model = deepspeech.Model(ARGS.model, ARGS.beam_width) - if ARGS.lm and ARGS.trie: - logging.info("ARGS.lm: %s", ARGS.lm) - logging.info("ARGS.trie: %s", ARGS.trie) - model.enableDecoderWithLM(ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta) - - # Start audio with VAD - vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness, - device=ARGS.device, - input_rate=ARGS.rate, - file=ARGS.file) - print("Listening (ctrl-C to exit)...") - frames = vad_audio.vad_collector() - - # Stream from microphone to DeepSpeech using VAD - spinner = None - if not ARGS.nospinner: - spinner = Halo(spinner='line') - stream_context = model.createStream() - wav_data = bytearray() - for frame in frames: - if frame is not None: - if spinner: spinner.start() - logging.debug("streaming frame") - model.feedAudioContent(stream_context, np.frombuffer(frame, np.int16)) - if ARGS.savewav: wav_data.extend(frame) - else: - if spinner: spinner.stop() - logging.debug("end utterence") - if ARGS.savewav: - vad_audio.write_wav(os.path.join(ARGS.savewav, datetime.now().strftime("savewav_%Y-%m-%d_%H-%M-%S_%f.wav")), wav_data) - wav_data = bytearray() - text = model.finishStream(stream_context) - print("Recognized: %s" % text) - stream_context = model.createStream() - -if __name__ == '__main__': - BEAM_WIDTH = 500 - DEFAULT_SAMPLE_RATE = 16000 - LM_ALPHA = 0.75 - LM_BETA = 1.85 - - import argparse - parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD") - - parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3, - help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3") - parser.add_argument('--nospinner', action='store_true', - help="Disable spinner") - parser.add_argument('-w', '--savewav', - help="Save .wav files of utterences to given directory") - parser.add_argument('-f', '--file', - help="Read from .wav file instead of microphone") - - parser.add_argument('-m', '--model', required=True, - help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)") - parser.add_argument('-l', '--lm', default='lm.binary', - help="Path to the language model binary file. Default: lm.binary") - parser.add_argument('-t', '--trie', default='trie', - help="Path to the language model trie file created with native_client/generate_trie. Default: trie") - parser.add_argument('-d', '--device', type=int, default=None, - help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().") - parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE, - help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.") - parser.add_argument('-la', '--lm_alpha', type=float, default=LM_ALPHA, - help=f"The alpha hyperparameter of the CTC decoder. Language Model weight. Default: {LM_ALPHA}") - parser.add_argument('-lb', '--lm_beta', type=float, default=LM_BETA, - help=f"The beta hyperparameter of the CTC decoder. Word insertion bonus. Default: {LM_BETA}") - parser.add_argument('-bw', '--beam_width', type=int, default=BEAM_WIDTH, - help=f"Beam width used in the CTC decoder when building candidate transcriptions. Default: {BEAM_WIDTH}") - - ARGS = parser.parse_args() - if ARGS.savewav: os.makedirs(ARGS.savewav, exist_ok=True) - main(ARGS) diff --git a/examples/mic_vad_streaming/requirements.txt b/examples/mic_vad_streaming/requirements.txt deleted file mode 100644 index c7e290ba..00000000 --- a/examples/mic_vad_streaming/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -deepspeech==0.6.0 -pyaudio~=0.2.11 -webrtcvad~=2.0.10 -halo~=0.0.18 -numpy>=1.15.1 -scipy>=1.1.0 diff --git a/examples/mic_vad_streaming/test.sh b/examples/mic_vad_streaming/test.sh deleted file mode 100755 index 5359d68e..00000000 --- a/examples/mic_vad_streaming/test.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -set -xe - -THIS=$(dirname "$0") - -pushd ${THIS} - source ../tests.sh - - pip install --user $(get_python_wheel_url "$1") - pip install --user -r requirements.txt - - pulseaudio & - - python mic_vad_streaming.py \ - --model $HOME/DeepSpeech/models/output_graph.pbmm \ - --lm $HOME/DeepSpeech/models/lm.binary \ - --trie $HOME/DeepSpeech/models/trie \ - --file $HOME/DeepSpeech/audio/2830-3980-0043.wav -popd diff --git a/examples/net_framework/.gitignore b/examples/net_framework/.gitignore deleted file mode 100644 index 3e759b75..00000000 --- a/examples/net_framework/.gitignore +++ /dev/null @@ -1,330 +0,0 @@ -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. -## -## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore - -# User-specific files -*.suo -*.user -*.userosscache -*.sln.docstates - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -bld/ -[Bb]in/ -[Oo]bj/ -[Ll]og/ - -# Visual Studio 2015/2017 cache/options directory -.vs/ -# Uncomment if you have tasks that create the project's static files in wwwroot -#wwwroot/ - -# Visual Studio 2017 auto generated files -Generated\ Files/ - -# MSTest test Results -[Tt]est[Rr]esult*/ -[Bb]uild[Ll]og.* - -# NUNIT -*.VisualState.xml -TestResult.xml - -# Build Results of an ATL Project -[Dd]ebugPS/ -[Rr]eleasePS/ -dlldata.c - -# Benchmark Results -BenchmarkDotNet.Artifacts/ - -# .NET Core -project.lock.json -project.fragment.lock.json -artifacts/ -**/Properties/launchSettings.json - -# StyleCop -StyleCopReport.xml - -# Files built by Visual Studio -*_i.c -*_p.c -*_i.h -*.ilk -*.meta -*.obj -*.iobj -*.pch -*.pdb -*.ipdb -*.pgc -*.pgd -*.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.tmp_proj -*.log -*.vspscc -*.vssscc -.builds -*.pidb -*.svclog -*.scc - -# Chutzpah Test files -_Chutzpah* - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opendb -*.opensdf -*.sdf -*.cachefile -*.VC.db -*.VC.VC.opendb - -# Visual Studio profiler -*.psess -*.vsp -*.vspx -*.sap - -# Visual Studio Trace Files -*.e2e - -# TFS 2012 Local Workspace -$tf/ - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper*/ -*.[Rr]e[Ss]harper -*.DotSettings.user - -# JustCode is a .NET coding add-in -.JustCode - -# TeamCity is a build add-in -_TeamCity* - -# DotCover is a Code Coverage Tool -*.dotCover - -# AxoCover is a Code Coverage Tool -.axoCover/* -!.axoCover/settings.json - -# Visual Studio code coverage results -*.coverage -*.coveragexml - -# NCrunch -_NCrunch_* -.*crunch*.local.xml -nCrunchTemp_* - -# MightyMoose -*.mm.* -AutoTest.Net/ - -# Web workbench (sass) -.sass-cache/ - -# Installshield output folder -[Ee]xpress/ - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish/ - -# Publish Web Output -*.[Pp]ublish.xml -*.azurePubxml -# Note: Comment the next line if you want to checkin your web deploy settings, -# but database connection strings (with potential passwords) will be unencrypted -*.pubxml -*.publishproj - -# Microsoft Azure Web App publish settings. Comment the next line if you want to -# checkin your Azure Web App publish settings, but sensitive information contained -# in these scripts will be unencrypted -PublishScripts/ - -# NuGet Packages -*.nupkg -# The packages folder can be ignored because of Package Restore -**/[Pp]ackages/* -# except build/, which is used as an MSBuild target. -!**/[Pp]ackages/build/ -# Uncomment if necessary however generally it will be regenerated when needed -#!**/[Pp]ackages/repositories.config -# NuGet v3's project.json files produces more ignorable files -*.nuget.props -*.nuget.targets - -# Microsoft Azure Build Output -csx/ -*.build.csdef - -# Microsoft Azure Emulator -ecf/ -rcf/ - -# Windows Store app package directories and files -AppPackages/ -BundleArtifacts/ -Package.StoreAssociation.xml -_pkginfo.txt -*.appx - -# Visual Studio cache files -# files ending in .cache can be ignored -*.[Cc]ache -# but keep track of directories ending in .cache -!*.[Cc]ache/ - -# Others -ClientBin/ -~$* -*~ -*.dbmdl -*.dbproj.schemaview -*.jfm -*.pfx -*.publishsettings -orleans.codegen.cs - -# Including strong name files can present a security risk -# (https://github.com/github/gitignore/pull/2483#issue-259490424) -#*.snk - -# Since there are multiple workflows, uncomment next line to ignore bower_components -# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) -#bower_components/ - -# RIA/Silverlight projects -Generated_Code/ - -# Backup & report files from converting an old project file -# to a newer Visual Studio version. Backup files are not needed, -# because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML -UpgradeLog*.htm -ServiceFabricBackup/ -*.rptproj.bak - -# SQL Server files -*.mdf -*.ldf -*.ndf - -# Business Intelligence projects -*.rdl.data -*.bim.layout -*.bim_*.settings -*.rptproj.rsuser - -# Microsoft Fakes -FakesAssemblies/ - -# GhostDoc plugin setting file -*.GhostDoc.xml - -# Node.js Tools for Visual Studio -.ntvs_analysis.dat -node_modules/ - -# Visual Studio 6 build log -*.plg - -# Visual Studio 6 workspace options file -*.opt - -# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) -*.vbw - -# Visual Studio LightSwitch build output -**/*.HTMLClient/GeneratedArtifacts -**/*.DesktopClient/GeneratedArtifacts -**/*.DesktopClient/ModelManifest.xml -**/*.Server/GeneratedArtifacts -**/*.Server/ModelManifest.xml -_Pvt_Extensions - -# Paket dependency manager -.paket/paket.exe -paket-files/ - -# FAKE - F# Make -.fake/ - -# JetBrains Rider -.idea/ -*.sln.iml - -# CodeRush -.cr/ - -# Python Tools for Visual Studio (PTVS) -__pycache__/ -*.pyc - -# Cake - Uncomment if you are using it -# tools/** -# !tools/packages.config - -# Tabs Studio -*.tss - -# Telerik's JustMock configuration file -*.jmconfig - -# BizTalk build output -*.btp.cs -*.btm.cs -*.odx.cs -*.xsd.cs - -# OpenCover UI analysis results -OpenCover/ - -# Azure Stream Analytics local run output -ASALocalRun/ - -# MSBuild Binary and Structured Log -*.binlog - -# NVidia Nsight GPU debugger configuration file -*.nvuser - -# MFractors (Xamarin productivity tool) working folder -.mfractor/ diff --git a/examples/net_framework/DeepSpeechWPF/App.config b/examples/net_framework/DeepSpeechWPF/App.config deleted file mode 100644 index b50c74f3..00000000 --- a/examples/net_framework/DeepSpeechWPF/App.config +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/examples/net_framework/DeepSpeechWPF/App.xaml b/examples/net_framework/DeepSpeechWPF/App.xaml deleted file mode 100644 index 16ebb0d4..00000000 --- a/examples/net_framework/DeepSpeechWPF/App.xaml +++ /dev/null @@ -1,8 +0,0 @@ - - - diff --git a/examples/net_framework/DeepSpeechWPF/App.xaml.cs b/examples/net_framework/DeepSpeechWPF/App.xaml.cs deleted file mode 100644 index 67dad8ed..00000000 --- a/examples/net_framework/DeepSpeechWPF/App.xaml.cs +++ /dev/null @@ -1,44 +0,0 @@ -using CommonServiceLocator; -using DeepSpeech.WPF.ViewModels; -using DeepSpeechClient.Interfaces; -using GalaSoft.MvvmLight.Ioc; -using System.Windows; - -namespace DeepSpeechWPF -{ - /// - /// Interaction logic for App.xaml - /// - public partial class App : Application - { - protected override void OnStartup(StartupEventArgs e) - { - base.OnStartup(e); - ServiceLocator.SetLocatorProvider(() => SimpleIoc.Default); - - const int BEAM_WIDTH = 500; - - //Register instance of DeepSpeech - DeepSpeechClient.DeepSpeech deepSpeechClient = new DeepSpeechClient.DeepSpeech(); - try - { - deepSpeechClient.CreateModel("output_graph.pbmm", BEAM_WIDTH); - } - catch (System.Exception ex) - { - MessageBox.Show(ex.Message); - Current.Shutdown(); - } - - SimpleIoc.Default.Register(() => deepSpeechClient); - SimpleIoc.Default.Register(); - } - - protected override void OnExit(ExitEventArgs e) - { - base.OnExit(e); - //Dispose instance of DeepSpeech - ServiceLocator.Current.GetInstance()?.Dispose(); - } - } -} diff --git a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj deleted file mode 100644 index 532c1a9a..00000000 --- a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.csproj +++ /dev/null @@ -1,140 +0,0 @@ - - - - - Debug - AnyCPU - {54BFD766-4305-4F4C-BA59-AF45505DF3C1} - WinExe - DeepSpeech.WPF - DeepSpeech.WPF - v4.6.2 - 512 - {60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} - 4 - true - true - - - - - true - bin\x64\Debug\ - DEBUG;TRACE - full - AnyCPU - prompt - MinimumRecommendedRules.ruleset - false - true - - - bin\x64\Release\ - TRACE - true - pdbonly - x64 - prompt - MinimumRecommendedRules.ruleset - true - true - - - - packages\AsyncAwaitBestPractices.3.1.0\lib\netstandard1.0\AsyncAwaitBestPractices.dll - - - packages\AsyncAwaitBestPractices.MVVM.3.1.0\lib\netstandard1.0\AsyncAwaitBestPractices.MVVM.dll - - - packages\CommonServiceLocator.2.0.2\lib\net45\CommonServiceLocator.dll - - - packages\CSCore.1.2.1.2\lib\net35-client\CSCore.dll - - - packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.dll - - - packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.Extras.dll - - - packages\MvvmLightLibs.5.4.1.1\lib\net45\GalaSoft.MvvmLight.Platform.dll - - - packages\NAudio.1.9.0\lib\net35\NAudio.dll - - - - - - packages\MvvmLightLibs.5.4.1.1\lib\net45\System.Windows.Interactivity.dll - - - - - - - - - 4.0 - - - - - - - - MSBuild:Compile - Designer - - - - MSBuild:Compile - Designer - - - App.xaml - Code - - - - MainWindow.xaml - Code - - - - - Code - - - True - True - Resources.resx - - - True - Settings.settings - True - - - ResXFileCodeGenerator - Resources.Designer.cs - - - - SettingsSingleFileGenerator - Settings.Designer.cs - - - - - - - - {56de4091-bbbe-47e4-852d-7268b33b971f} - DeepSpeechClient - - - - \ No newline at end of file diff --git a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln b/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln deleted file mode 100644 index 96b4e6bc..00000000 --- a/examples/net_framework/DeepSpeechWPF/DeepSpeech.WPF.sln +++ /dev/null @@ -1,31 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28307.421 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeech.WPF", "DeepSpeech.WPF.csproj", "{54BFD766-4305-4F4C-BA59-AF45505DF3C1}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepSpeechClient", "..\..\..\native_client\dotnet\DeepSpeechClient\DeepSpeechClient.csproj", "{56DE4091-BBBE-47E4-852D-7268B33B971F}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.ActiveCfg = Debug|x64 - {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Debug|x64.Build.0 = Debug|x64 - {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.ActiveCfg = Release|x64 - {54BFD766-4305-4F4C-BA59-AF45505DF3C1}.Release|x64.Build.0 = Release|x64 - {56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.ActiveCfg = Debug|x64 - {56DE4091-BBBE-47E4-852D-7268B33B971F}.Debug|x64.Build.0 = Debug|x64 - {56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.ActiveCfg = Release|x64 - {56DE4091-BBBE-47E4-852D-7268B33B971F}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {19C58802-CCEC-4FD1-8D17-A6EB766116F7} - EndGlobalSection -EndGlobal diff --git a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml b/examples/net_framework/DeepSpeechWPF/MainWindow.xaml deleted file mode 100644 index 00f91fd7..00000000 --- a/examples/net_framework/DeepSpeechWPF/MainWindow.xaml +++ /dev/null @@ -1,102 +0,0 @@ - - - - - - - -