Merge pull request #2354 from lissyx/run-examples-taskcluster
Run examples on TaskCluster
This commit is contained in:
commit
f98bfefc77
|
@ -17,16 +17,6 @@ const LM_ALPHA = 0.75;
|
||||||
// The beta hyperparameter of the CTC decoder. Word insertion bonus.
|
// The beta hyperparameter of the CTC decoder. Word insertion bonus.
|
||||||
const LM_BETA = 1.85;
|
const LM_BETA = 1.85;
|
||||||
|
|
||||||
// These constants are tied to the shape of the graph used (changing them changes
|
|
||||||
// the geometry of the first layer), so make sure you use the same constants that
|
|
||||||
// were used during training
|
|
||||||
|
|
||||||
// Number of MFCC features to use
|
|
||||||
const N_FEATURES = 26;
|
|
||||||
|
|
||||||
// Size of the context window used for producing timesteps in the input vector
|
|
||||||
const N_CONTEXT = 9;
|
|
||||||
|
|
||||||
let VersionAction = function VersionAction(options) {
|
let VersionAction = function VersionAction(options) {
|
||||||
options = options || {};
|
options = options || {};
|
||||||
options.nargs = 0;
|
options.nargs = 0;
|
||||||
|
@ -55,15 +45,14 @@ function totalTime(hrtimeValue) {
|
||||||
|
|
||||||
console.error('Loading model from file %s', args['model']);
|
console.error('Loading model from file %s', args['model']);
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
let model = new Ds.Model(args['model'], N_FEATURES, N_CONTEXT, args['alphabet'], BEAM_WIDTH);
|
let model = new Ds.Model(args['model'], args['alphabet'], BEAM_WIDTH);
|
||||||
const model_load_end = process.hrtime(model_load_start);
|
const model_load_end = process.hrtime(model_load_start);
|
||||||
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
console.error('Loaded model in %ds.', totalTime(model_load_end));
|
||||||
|
|
||||||
if (args['lm'] && args['trie']) {
|
if (args['lm'] && args['trie']) {
|
||||||
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
|
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
|
||||||
const lm_load_start = process.hrtime();
|
const lm_load_start = process.hrtime();
|
||||||
model.enableDecoderWithLM(args['alphabet'], args['lm'], args['trie'],
|
model.enableDecoderWithLM(args['lm'], args['trie'], LM_ALPHA, LM_BETA);
|
||||||
LM_ALPHA, LM_BETA);
|
|
||||||
const lm_load_end = process.hrtime(lm_load_start);
|
const lm_load_end = process.hrtime(lm_load_start);
|
||||||
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
|
||||||
}
|
}
|
||||||
|
@ -106,7 +95,7 @@ const ffmpeg = spawn('ffmpeg', [
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let audioLength = 0;
|
let audioLength = 0;
|
||||||
let sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
let sctx = model.createStream(AUDIO_SAMPLE_RATE);
|
||||||
|
|
||||||
function finishStream() {
|
function finishStream() {
|
||||||
const model_load_start = process.hrtime();
|
const model_load_start = process.hrtime();
|
||||||
|
@ -119,7 +108,7 @@ function finishStream() {
|
||||||
|
|
||||||
function intermediateDecode() {
|
function intermediateDecode() {
|
||||||
finishStream();
|
finishStream();
|
||||||
sctx = model.setupStream(AUDIO_SAMPLE_RATE);
|
sctx = model.createStream(AUDIO_SAMPLE_RATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
function feedAudioContent(chunk) {
|
function feedAudioContent(chunk) {
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"argparse": "^1.0.10",
|
"argparse": "^1.0.10",
|
||||||
"deepspeech": "^0.4.1",
|
"deepspeech": "^0.6.0-alpha.5",
|
||||||
"node-vad": "^1.1.1",
|
"node-vad": "^1.1.1",
|
||||||
"util": "^0.11.1"
|
"util": "^0.11.1"
|
||||||
},
|
},
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
THIS=$(dirname "$0")
|
||||||
|
|
||||||
|
pushd ${THIS}
|
||||||
|
source ../tests.sh
|
||||||
|
|
||||||
|
npm install $(get_npm_package_url)
|
||||||
|
npm install
|
||||||
|
|
||||||
|
node ./index.js --audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
||||||
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
|
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||||
|
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||||
|
|
||||||
|
node ./index.js --audio $HOME/DeepSpeech/audio/4507-16021-0012.wav \
|
||||||
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
|
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||||
|
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||||
|
|
||||||
|
node ./index.js --audio $HOME/DeepSpeech/audio/8455-210777-0068.wav \
|
||||||
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
|
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||||
|
--alphabet $HOME/DeepSpeech/models/alphabet.txt
|
||||||
|
popd
|
|
@ -20,8 +20,11 @@ class Audio(object):
|
||||||
CHANNELS = 1
|
CHANNELS = 1
|
||||||
BLOCKS_PER_SECOND = 50
|
BLOCKS_PER_SECOND = 50
|
||||||
|
|
||||||
def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS):
|
def __init__(self, callback=None, device=None, input_rate=RATE_PROCESS, file=None):
|
||||||
def proxy_callback(in_data, frame_count, time_info, status):
|
def proxy_callback(in_data, frame_count, time_info, status):
|
||||||
|
#pylint: disable=unused-argument
|
||||||
|
if self.chunk is not None:
|
||||||
|
in_data = self.wf.readframes(self.chunk)
|
||||||
callback(in_data)
|
callback(in_data)
|
||||||
return (None, pyaudio.paContinue)
|
return (None, pyaudio.paContinue)
|
||||||
if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
|
if callback is None: callback = lambda in_data: self.buffer_queue.put(in_data)
|
||||||
|
@ -42,9 +45,13 @@ class Audio(object):
|
||||||
'stream_callback': proxy_callback,
|
'stream_callback': proxy_callback,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.chunk = None
|
||||||
# if not default device
|
# if not default device
|
||||||
if self.device:
|
if self.device:
|
||||||
kwargs['input_device_index'] = self.device
|
kwargs['input_device_index'] = self.device
|
||||||
|
elif file is not None:
|
||||||
|
self.chunk = 320
|
||||||
|
self.wf = wave.open(file, 'rb')
|
||||||
|
|
||||||
self.stream = self.pa.open(**kwargs)
|
self.stream = self.pa.open(**kwargs)
|
||||||
self.stream.start_stream()
|
self.stream.start_stream()
|
||||||
|
@ -96,8 +103,8 @@ class Audio(object):
|
||||||
class VADAudio(Audio):
|
class VADAudio(Audio):
|
||||||
"""Filter & segment audio with voice activity detection."""
|
"""Filter & segment audio with voice activity detection."""
|
||||||
|
|
||||||
def __init__(self, aggressiveness=3, device=None, input_rate=None):
|
def __init__(self, aggressiveness=3, device=None, input_rate=None, file=None):
|
||||||
super().__init__(device=device, input_rate=input_rate)
|
super().__init__(device=device, input_rate=input_rate, file=file)
|
||||||
self.vad = webrtcvad.Vad(aggressiveness)
|
self.vad = webrtcvad.Vad(aggressiveness)
|
||||||
|
|
||||||
def frame_generator(self):
|
def frame_generator(self):
|
||||||
|
@ -121,6 +128,9 @@ class VADAudio(Audio):
|
||||||
triggered = False
|
triggered = False
|
||||||
|
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
|
if len(frame) < 640:
|
||||||
|
return
|
||||||
|
|
||||||
is_speech = self.vad.is_speech(frame, self.sample_rate)
|
is_speech = self.vad.is_speech(frame, self.sample_rate)
|
||||||
|
|
||||||
if not triggered:
|
if not triggered:
|
||||||
|
@ -153,23 +163,25 @@ def main(ARGS):
|
||||||
print('Initializing model...')
|
print('Initializing model...')
|
||||||
logging.info("ARGS.model: %s", ARGS.model)
|
logging.info("ARGS.model: %s", ARGS.model)
|
||||||
logging.info("ARGS.alphabet: %s", ARGS.alphabet)
|
logging.info("ARGS.alphabet: %s", ARGS.alphabet)
|
||||||
model = deepspeech.Model(ARGS.model, ARGS.n_features, ARGS.n_context, ARGS.alphabet, ARGS.beam_width)
|
model = deepspeech.Model(ARGS.model, ARGS.alphabet, ARGS.beam_width)
|
||||||
if ARGS.lm and ARGS.trie:
|
if ARGS.lm and ARGS.trie:
|
||||||
logging.info("ARGS.lm: %s", ARGS.lm)
|
logging.info("ARGS.lm: %s", ARGS.lm)
|
||||||
logging.info("ARGS.trie: %s", ARGS.trie)
|
logging.info("ARGS.trie: %s", ARGS.trie)
|
||||||
model.enableDecoderWithLM(ARGS.alphabet, ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
|
model.enableDecoderWithLM(ARGS.lm, ARGS.trie, ARGS.lm_alpha, ARGS.lm_beta)
|
||||||
|
|
||||||
# Start audio with VAD
|
# Start audio with VAD
|
||||||
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
|
vad_audio = VADAudio(aggressiveness=ARGS.vad_aggressiveness,
|
||||||
device=ARGS.device,
|
device=ARGS.device,
|
||||||
input_rate=ARGS.rate)
|
input_rate=ARGS.rate,
|
||||||
|
file=ARGS.file)
|
||||||
print("Listening (ctrl-C to exit)...")
|
print("Listening (ctrl-C to exit)...")
|
||||||
frames = vad_audio.vad_collector()
|
frames = vad_audio.vad_collector()
|
||||||
|
|
||||||
# Stream from microphone to DeepSpeech using VAD
|
# Stream from microphone to DeepSpeech using VAD
|
||||||
spinner = None
|
spinner = None
|
||||||
if not ARGS.nospinner: spinner = Halo(spinner='line')
|
if not ARGS.nospinner:
|
||||||
stream_context = model.setupStream()
|
spinner = Halo(spinner='line')
|
||||||
|
stream_context = model.createStream()
|
||||||
wav_data = bytearray()
|
wav_data = bytearray()
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
if frame is not None:
|
if frame is not None:
|
||||||
|
@ -185,25 +197,25 @@ def main(ARGS):
|
||||||
wav_data = bytearray()
|
wav_data = bytearray()
|
||||||
text = model.finishStream(stream_context)
|
text = model.finishStream(stream_context)
|
||||||
print("Recognized: %s" % text)
|
print("Recognized: %s" % text)
|
||||||
stream_context = model.setupStream()
|
stream_context = model.createStream()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
BEAM_WIDTH = 500
|
BEAM_WIDTH = 500
|
||||||
DEFAULT_SAMPLE_RATE = 16000
|
DEFAULT_SAMPLE_RATE = 16000
|
||||||
LM_ALPHA = 0.75
|
LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
N_FEATURES = 26
|
|
||||||
N_CONTEXT = 9
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
|
parser = argparse.ArgumentParser(description="Stream from microphone to DeepSpeech using VAD")
|
||||||
|
|
||||||
parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3,
|
parser.add_argument('-v', '--vad_aggressiveness', type=int, default=3,
|
||||||
help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3")
|
help="Set aggressiveness of VAD: an integer between 0 and 3, 0 being the least aggressive about filtering out non-speech, 3 the most aggressive. Default: 3")
|
||||||
parser.add_argument('--nospinner', action='store_true',
|
parser.add_argument('--nospinner', action='store_true',
|
||||||
help="Disable spinner")
|
help="Disable spinner")
|
||||||
parser.add_argument('-w', '--savewav',
|
parser.add_argument('-w', '--savewav',
|
||||||
help="Save .wav files of utterences to given directory")
|
help="Save .wav files of utterences to given directory")
|
||||||
|
parser.add_argument('-f', '--file',
|
||||||
|
help="Read from .wav file instead of microphone")
|
||||||
|
|
||||||
parser.add_argument('-m', '--model', required=True,
|
parser.add_argument('-m', '--model', required=True,
|
||||||
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
|
help="Path to the model (protocol buffer binary file, or entire directory containing all standard-named files for model)")
|
||||||
|
@ -214,13 +226,9 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('-t', '--trie', default='trie',
|
parser.add_argument('-t', '--trie', default='trie',
|
||||||
help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
|
help="Path to the language model trie file created with native_client/generate_trie. Default: trie")
|
||||||
parser.add_argument('-d', '--device', type=int, default=None,
|
parser.add_argument('-d', '--device', type=int, default=None,
|
||||||
help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device()")
|
help="Device input index (Int) as listed by pyaudio.PyAudio.get_device_info_by_index(). If not provided, falls back to PyAudio.get_default_device().")
|
||||||
parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
|
parser.add_argument('-r', '--rate', type=int, default=DEFAULT_SAMPLE_RATE,
|
||||||
help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
|
help=f"Input device sample rate. Default: {DEFAULT_SAMPLE_RATE}. Your device may require 44100.")
|
||||||
parser.add_argument('-nf', '--n_features', type=int, default=N_FEATURES,
|
|
||||||
help=f"Number of MFCC features to use. Default: {N_FEATURES}")
|
|
||||||
parser.add_argument('-nc', '--n_context', type=int, default=N_CONTEXT,
|
|
||||||
help=f"Size of the context window used for producing timesteps in the input vector. Default: {N_CONTEXT}")
|
|
||||||
parser.add_argument('-la', '--lm_alpha', type=float, default=LM_ALPHA,
|
parser.add_argument('-la', '--lm_alpha', type=float, default=LM_ALPHA,
|
||||||
help=f"The alpha hyperparameter of the CTC decoder. Language Model weight. Default: {LM_ALPHA}")
|
help=f"The alpha hyperparameter of the CTC decoder. Language Model weight. Default: {LM_ALPHA}")
|
||||||
parser.add_argument('-lb', '--lm_beta', type=float, default=LM_BETA,
|
parser.add_argument('-lb', '--lm_beta', type=float, default=LM_BETA,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
deepspeech~=0.4.1
|
deepspeech~=0.6.0a5
|
||||||
pyaudio~=0.2.11
|
pyaudio~=0.2.11
|
||||||
webrtcvad~=2.0.10
|
webrtcvad~=2.0.10
|
||||||
halo~=0.0.18
|
halo~=0.0.18
|
||||||
numpy~=1.15.1
|
numpy~=1.15.1
|
||||||
|
scipy~=1.1.0
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
THIS=$(dirname "$0")
|
||||||
|
|
||||||
|
pushd ${THIS}
|
||||||
|
source ../tests.sh
|
||||||
|
|
||||||
|
pip install --user $(get_python_wheel_url "$1")
|
||||||
|
pip install --user -r requirements.txt
|
||||||
|
|
||||||
|
pulseaudio &
|
||||||
|
|
||||||
|
python mic_vad_streaming.py \
|
||||||
|
--model $HOME/DeepSpeech/models/output_graph.pbmm \
|
||||||
|
--alphabet $HOME/DeepSpeech/models/alphabet.txt \
|
||||||
|
--lm $HOME/DeepSpeech/models/lm.binary \
|
||||||
|
--trie $HOME/DeepSpeech/models/trie \
|
||||||
|
--file $HOME/DeepSpeech/audio/2830-3980-0043.wav
|
||||||
|
popd
|
|
@ -6,19 +6,17 @@ const Duplex = require('stream').Duplex;
|
||||||
const Wav = require('node-wav');
|
const Wav = require('node-wav');
|
||||||
|
|
||||||
const BEAM_WIDTH = 1024;
|
const BEAM_WIDTH = 1024;
|
||||||
const N_FEATURES = 26;
|
|
||||||
const N_CONTEXT = 9;
|
|
||||||
let modelPath = './models/output_graph.pbmm';
|
let modelPath = './models/output_graph.pbmm';
|
||||||
let alphabetPath = './models/alphabet.txt';
|
let alphabetPath = './models/alphabet.txt';
|
||||||
|
|
||||||
let model = new DeepSpeech.Model(modelPath, N_FEATURES, N_CONTEXT, alphabetPath, BEAM_WIDTH);
|
let model = new DeepSpeech.Model(modelPath, alphabetPath, BEAM_WIDTH);
|
||||||
|
|
||||||
const LM_ALPHA = 0.75;
|
const LM_ALPHA = 0.75;
|
||||||
const LM_BETA = 1.85;
|
const LM_BETA = 1.85;
|
||||||
let lmPath = './models/lm.binary';
|
let lmPath = './models/lm.binary';
|
||||||
let triePath = './models/trie';
|
let triePath = './models/trie';
|
||||||
|
|
||||||
model.enableDecoderWithLM(alphabetPath, lmPath, triePath, LM_ALPHA, LM_BETA);
|
model.enableDecoderWithLM(lmPath, triePath, LM_ALPHA, LM_BETA);
|
||||||
|
|
||||||
let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
|
let audioFile = process.argv[2] || './audio/2830-3980-0043.wav';
|
||||||
|
|
||||||
|
@ -69,4 +67,4 @@ audioStream.on('finish', () => {
|
||||||
let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
|
let result = model.stt(audioBuffer.slice(0, audioBuffer.length / 2), 16000);
|
||||||
|
|
||||||
console.log('result:', result);
|
console.log('result:', result);
|
||||||
});
|
});
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"argparse": "^1.0.10",
|
"argparse": "^1.0.10",
|
||||||
"deepspeech": "^0.4.1",
|
"deepspeech": "^0.6.0-alpha.5",
|
||||||
"node-wav": "0.0.2",
|
"node-wav": "0.0.2",
|
||||||
"sox-stream": "^2.0.3",
|
"sox-stream": "^2.0.3",
|
||||||
"util": "^0.11.1"
|
"util": "^0.11.1"
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
THIS=$(dirname "$0")
|
||||||
|
|
||||||
|
pushd ${THIS}
|
||||||
|
source ../tests.sh
|
||||||
|
|
||||||
|
npm install $(get_npm_package_url)
|
||||||
|
npm install
|
||||||
|
|
||||||
|
ln -s $HOME/DeepSpeech/models models
|
||||||
|
|
||||||
|
node index.js $HOME/DeepSpeech/audio/2830-3980-0043.wav
|
||||||
|
node index.js $HOME/DeepSpeech/audio/8455-210777-0068.wav
|
||||||
|
node index.js $HOME/DeepSpeech/audio/4507-16021-0012.wav
|
||||||
|
popd
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
THIS=$(dirname "$0")
|
||||||
|
|
||||||
|
source ../../taskcluster/tc-tests-utils.sh
|
||||||
|
|
||||||
|
DEP_TASK_ID=$(curl -s https://queue.taskcluster.net/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')
|
||||||
|
|
||||||
|
get_python_wheel_url()
|
||||||
|
{
|
||||||
|
local this_python_version=$1
|
||||||
|
|
||||||
|
extract_python_versions "${this_python_version}" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias"
|
||||||
|
|
||||||
|
echo "$(get_python_pkg_url "${pyver_pkg}" "${py_unicode_type}" "deepspeech" https://queue.taskcluster.net/v1/task/${DEP_TASK_ID}/artifacts/public)"
|
||||||
|
}
|
||||||
|
|
||||||
|
get_npm_package_url()
|
||||||
|
{
|
||||||
|
echo "https://queue.taskcluster.net/v1/task/${DEP_TASK_ID}/artifacts/public/deepspeech-${DS_VERSION}.tgz"
|
||||||
|
}
|
|
@ -22,10 +22,10 @@ def main(args):
|
||||||
parser.add_argument('--stream', required=False, action='store_true',
|
parser.add_argument('--stream', required=False, action='store_true',
|
||||||
help='To use deepspeech streaming interface')
|
help='To use deepspeech streaming interface')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.stream is True and len(sys.argv[1:]) == 3:
|
if args.stream is True:
|
||||||
print("Opening mic for streaming")
|
print("Opening mic for streaming")
|
||||||
elif args.audio is not None and len(sys.argv[1:]) == 6:
|
elif args.audio is not None:
|
||||||
logging.debug("Transcribing audio file @ %s" % args.audio)
|
logging.debug("Transcribing audio file @ %s" % args.audio)
|
||||||
else:
|
else:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
parser.exit()
|
parser.exit()
|
||||||
|
@ -72,7 +72,7 @@ def main(args):
|
||||||
logging.debug("************************************************************************************************************")
|
logging.debug("************************************************************************************************************")
|
||||||
print("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, model_retval[1], model_retval[2]))
|
print("%-30s %-20.3f %-20.3f %-20.3f %-0.3f" % (filename + ext, audio_length, inference_time, model_retval[1], model_retval[2]))
|
||||||
else:
|
else:
|
||||||
sctx = model_retval[0].setupStream()
|
sctx = model_retval[0].createStream()
|
||||||
subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
|
subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
bufsize=0)
|
bufsize=0)
|
||||||
|
|
|
@ -283,7 +283,7 @@ class App(QMainWindow):
|
||||||
logging.debug("Start Recording pressed")
|
logging.debug("Start Recording pressed")
|
||||||
logging.debug("Preparing for transcription...")
|
logging.debug("Preparing for transcription...")
|
||||||
|
|
||||||
sctx = self.model[0].setupStream()
|
sctx = self.model[0].createStream()
|
||||||
subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
|
subproc = subprocess.Popen(shlex.split('rec -q -V0 -e signed -L -c 1 -b 16 -r 16k -t raw - gain -2'),
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
bufsize=0)
|
bufsize=0)
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
deepspeech==0.4.1
|
deepspeech~=0.6.0a5
|
||||||
webrtcvad
|
webrtcvad
|
||||||
pyqt5
|
pyqt5
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -xe
|
||||||
|
|
||||||
|
THIS=$(dirname "$0")
|
||||||
|
|
||||||
|
pushd ${THIS}
|
||||||
|
source ../tests.sh
|
||||||
|
|
||||||
|
pip install --user $(get_python_wheel_url "$1")
|
||||||
|
pip install --user -r requirements.txt
|
||||||
|
|
||||||
|
python audioTranscript_cmd.py \
|
||||||
|
--audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
||||||
|
--aggressive 0 \
|
||||||
|
--model $HOME/DeepSpeech/models/
|
||||||
|
|
||||||
|
python audioTranscript_cmd.py \
|
||||||
|
--audio $HOME/DeepSpeech/audio/2830-3980-0043.wav \
|
||||||
|
--aggressive 0 \
|
||||||
|
--model $HOME/DeepSpeech/models/ \
|
||||||
|
--stream
|
||||||
|
popd
|
|
@ -16,19 +16,17 @@ Load the pre-trained model into the memory
|
||||||
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
|
Returns a list [DeepSpeech Object, Model Load Time, LM Load Time]
|
||||||
'''
|
'''
|
||||||
def load_model(models, alphabet, lm, trie):
|
def load_model(models, alphabet, lm, trie):
|
||||||
N_FEATURES = 26
|
|
||||||
N_CONTEXT = 9
|
|
||||||
BEAM_WIDTH = 500
|
BEAM_WIDTH = 500
|
||||||
LM_ALPHA = 0.75
|
LM_ALPHA = 0.75
|
||||||
LM_BETA = 1.85
|
LM_BETA = 1.85
|
||||||
|
|
||||||
model_load_start = timer()
|
model_load_start = timer()
|
||||||
ds = Model(models, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
|
ds = Model(models, alphabet, BEAM_WIDTH)
|
||||||
model_load_end = timer() - model_load_start
|
model_load_end = timer() - model_load_start
|
||||||
logging.debug("Loaded model in %0.3fs." % (model_load_end))
|
logging.debug("Loaded model in %0.3fs." % (model_load_end))
|
||||||
|
|
||||||
lm_load_start = timer()
|
lm_load_start = timer()
|
||||||
ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA)
|
ds.enableDecoderWithLM(lm, trie, LM_ALPHA, LM_BETA)
|
||||||
lm_load_end = timer() - lm_load_start
|
lm_load_end = timer() - lm_load_start
|
||||||
logging.debug('Loaded language model in %0.3fs.' % (lm_load_end))
|
logging.debug('Loaded language model in %0.3fs.' % (lm_load_end))
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
$if: '(event.event != "push") && (event.event != "tag")'
|
||||||
|
then:
|
||||||
|
taskId: ${taskcluster.taskId}
|
||||||
|
provisionerId: ${taskcluster.docker.provisionerId}
|
||||||
|
workerType: ${taskcluster.docker.workerType}
|
||||||
|
taskGroupId: ${taskcluster.taskGroupId}
|
||||||
|
schedulerId: ${taskcluster.schedulerId}
|
||||||
|
dependencies:
|
||||||
|
$map: { $eval: build.dependencies }
|
||||||
|
each(b):
|
||||||
|
$eval: as_slugid(b)
|
||||||
|
created: { $fromNow: '0 sec' }
|
||||||
|
deadline: { $fromNow: '1 day' }
|
||||||
|
expires: { $fromNow: '7 days' }
|
||||||
|
|
||||||
|
extra:
|
||||||
|
github:
|
||||||
|
{ $eval: taskcluster.github_events.pull_request }
|
||||||
|
|
||||||
|
routes:
|
||||||
|
- "notify.irc-channel.${notifications.irc}.on-exception"
|
||||||
|
- "notify.irc-channel.${notifications.irc}.on-failed"
|
||||||
|
|
||||||
|
scopes: [
|
||||||
|
"queue:route:notify.irc-channel.*"
|
||||||
|
]
|
||||||
|
|
||||||
|
payload:
|
||||||
|
maxRunTime: { $eval: to_int(build.maxRunTime) }
|
||||||
|
image: ${build.docker_image}
|
||||||
|
|
||||||
|
env:
|
||||||
|
DEEPSPEECH_MODEL: "https://github.com/lissyx/DeepSpeech/releases/download/tc-0.6.0/models.tar.gz"
|
||||||
|
DEEPSPEECH_AUDIO: "https://github.com/mozilla/DeepSpeech/releases/download/v0.4.1/audio-0.4.1.tar.gz"
|
||||||
|
PIP_DEFAULT_TIMEOUT: "60"
|
||||||
|
|
||||||
|
command:
|
||||||
|
- "/bin/bash"
|
||||||
|
- "--login"
|
||||||
|
- "-cxe"
|
||||||
|
- $let:
|
||||||
|
extraSystemSetup: { $eval: strip(str(build.system_setup)) }
|
||||||
|
in: >
|
||||||
|
apt-get -qq update && apt-get -qq -y upgrade && apt-get -qq -y install git sox sudo && ${extraSystemSetup} &&
|
||||||
|
adduser --system --home ${system.homedir.linux} ${system.username} &&
|
||||||
|
cd ${system.homedir.linux} &&
|
||||||
|
echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && wget -O - $DEEPSPEECH_MODEL | tar -C ~/DeepSpeech/ -xzvf - && wget -O - $DEEPSPEECH_AUDIO | tar -C ~/DeepSpeech/ -xzvf - " > /tmp/clone.sh && chmod +x /tmp/clone.sh &&
|
||||||
|
sudo -H -u ${system.username} /bin/bash /tmp/clone.sh &&
|
||||||
|
sudo -H -u ${system.username} --preserve-env /bin/bash ${build.args.tests_cmdline}
|
||||||
|
|
||||||
|
artifacts:
|
||||||
|
"public":
|
||||||
|
type: "directory"
|
||||||
|
path: "/tmp/artifacts/"
|
||||||
|
expires: { $fromNow: '7 days' }
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
name: ${build.metadata.name}
|
||||||
|
description: ${build.metadata.description}
|
||||||
|
owner: ${event.head.user.email}
|
||||||
|
source: ${event.head.repo.url}
|
|
@ -0,0 +1,13 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "node:10"
|
||||||
|
dependencies:
|
||||||
|
- "node-package-cpu"
|
||||||
|
system_setup:
|
||||||
|
>
|
||||||
|
apt-get -qq -y install ffmpeg
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/ffmpeg_vad_streaming/test.sh"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: ffmpeg VAD Streaming NodeJS v10.x"
|
||||||
|
description: "DeepSpeech examples: ffmpeg VAD Streaming NodeJS v10.x"
|
|
@ -0,0 +1,13 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "node:8"
|
||||||
|
dependencies:
|
||||||
|
- "node-package-cpu"
|
||||||
|
system_setup:
|
||||||
|
>
|
||||||
|
apt-get -qq -y install ffmpeg
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/ffmpeg_vad_streaming/test.sh"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: ffmpeg VAD Streaming NodeJS v8.x"
|
||||||
|
description: "DeepSpeech examples: ffmpeg VAD Streaming NodeJS v8.x"
|
|
@ -0,0 +1,13 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "python:3.6"
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
system_setup:
|
||||||
|
>
|
||||||
|
apt-get -qq -y install portaudio19-dev pulseaudio
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/mic_vad_streaming/test.sh 3.6.0:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: mic VAD streaming Py3.6"
|
||||||
|
description: "DeepSpeech examples: mic VAD streaming Python 3.6"
|
|
@ -0,0 +1,13 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "python:3.7"
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
system_setup:
|
||||||
|
>
|
||||||
|
apt-get -qq -y install portaudio19-dev pulseaudio
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/mic_vad_streaming/test.sh 3.7.0:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: mic VAD streaming Py3.7"
|
||||||
|
description: "DeepSpeech examples: mic VAD streaming Python 3.7"
|
|
@ -0,0 +1,10 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "node:10"
|
||||||
|
dependencies:
|
||||||
|
- "node-package-cpu"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/nodejs_wav/test.sh"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: NodeJS WAV NodeJS v10.x"
|
||||||
|
description: "DeepSpeech examples: NodeJS WAV NodeJS v10.x"
|
|
@ -0,0 +1,10 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "node:8"
|
||||||
|
dependencies:
|
||||||
|
- "node-package-cpu"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/nodejs_wav/test.sh"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: NodeJS WAV NodeJS v8.x"
|
||||||
|
description: "DeepSpeech examples: NodeJS WAV NodeJS v8.x"
|
|
@ -0,0 +1,10 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "python:3.5"
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/vad_transcriber/test.sh 3.5.0:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: VAD transcriber Py3.5"
|
||||||
|
description: "DeepSpeech examples: VAD transcriberaming Python 3.5"
|
|
@ -0,0 +1,10 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "python:3.6"
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/vad_transcriber/test.sh 3.6.0:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: VAD transcriber Py3.6"
|
||||||
|
description: "DeepSpeech examples: VAD transcriberaming Python 3.6"
|
|
@ -0,0 +1,10 @@
|
||||||
|
build:
|
||||||
|
template_file: examples-base.tyml
|
||||||
|
docker_image: "python:3.7"
|
||||||
|
dependencies:
|
||||||
|
- "linux-amd64-cpu-opt"
|
||||||
|
args:
|
||||||
|
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/examples/vad_transcriber/test.sh 3.7.0:m"
|
||||||
|
metadata:
|
||||||
|
name: "DeepSpeech examples: VAD transcriber Py3.7"
|
||||||
|
description: "DeepSpeech examples: VAD transcriberaming Python 3.7"
|
Loading…
Reference in New Issue