Merge branch 'master' into master

This commit is contained in:
Carlos Fonseca Murillo 2018-12-15 17:45:59 +00:00 committed by GitHub
commit fe2963158d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
88 changed files with 1885 additions and 56 deletions

View File

@ -3,7 +3,7 @@
virtualenv -p python3 ../tmp/venv
source ../tmp/venv/bin/activate
pip install -r <(grep -v tensorflow requirements.txt)
pip install tensorflow-gpu==1.12.0rc2
pip install tensorflow-gpu==1.12.0
python3 util/taskcluster.py --arch gpu --target ../tmp/native_client

View File

@ -890,6 +890,7 @@ def main(_):
if len(FLAGS.worker_hosts) == 0:
# Only one local task: this process (default case - no cluster)
with tf.Graph().as_default():
tf.set_random_seed(FLAGS.random_seed)
train()
# Now do a final test epoch
if FLAGS.test:

View File

@ -186,7 +186,7 @@ RUN cp /tensorflow/bazel-bin/native_client/generate_trie /DeepSpeech/native_clie
# Install TensorFlow
WORKDIR /DeepSpeech/
RUN pip install tensorflow-gpu==1.12.0rc2
RUN pip install tensorflow-gpu==1.12.0
# Make DeepSpeech and install Python bindings

View File

@ -227,7 +227,7 @@ If you have a capable (Nvidia, at least 8GB of VRAM) GPU, it is highly recommend
```bash
pip3 uninstall tensorflow
pip3 install 'tensorflow-gpu==1.12.0rc2'
pip3 install 'tensorflow-gpu==1.12.0'
```
### Common Voice training data
@ -284,7 +284,7 @@ If you are brave enough, you can also include the `other` dataset, which contain
The central (Python) script is `DeepSpeech.py` in the project's root directory. For its list of command line options, you can call:
```bash
./DeepSpeech.py --help
./DeepSpeech.py --helpfull
```
To get the output of this in a slightly better-formatted way, you can also look up the option definitions top of `DeepSpeech.py`.

View File

@ -1 +1 @@
0.4.0-alpha.0
0.4.0-alpha.2

View File

@ -0,0 +1,29 @@
# FFmpeg VAD Streaming
Streaming inference from arbitrary source (FFmpeg input) to DeepSpeech, using VAD (voice activity detection). A fairly simple example demonstrating the DeepSpeech streaming API in Node.js.
This example was successfully tested with a mobile phone streaming a live feed to a RTMP server (nginx-rtmp), which then could be used by this script for near real time speech recognition.
## Installation
```bash
npm install
```
Moreover FFmpeg must be installed:
```bash
sudo apt-get install ffmpeg
```
## Usage
Here is an example for a local audio file:
```bash
node ./index.js --audio <AUDIO_FILE> --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
```
Here is an example for a remote RTMP-Stream:
```bash
node ./index.js --audio rtmp://<IP>:1935/live/teststream --model $HOME/models/output_graph.pbmm --alphabet $HOME/models/alphabet.txt
```

View File

@ -0,0 +1,118 @@
#!/usr/bin/env node
const VAD = require("node-vad");
const Ds = require('deepspeech');
const argparse = require('argparse');
const util = require('util');
// These constants control the beam search decoder
// Beam width used in the CTC decoder when building candidate transcriptions
const BEAM_WIDTH = 1024;
// The alpha hyperparameter of the CTC decoder. Language Model weight
const LM_WEIGHT = 1.50;
// Valid word insertion weight. This is used to lessen the word insertion penalty
// when the inserted word is part of the vocabulary
const VALID_WORD_COUNT_WEIGHT = 2.25;
// These constants are tied to the shape of the graph used (changing them changes
// the geometry of the first layer), so make sure you use the same constants that
// were used during training
// Number of MFCC features to use
const N_FEATURES = 26;
// Size of the context window used for producing timesteps in the input vector
const N_CONTEXT = 9;
let VersionAction = function VersionAction(options) {
options = options || {};
options.nargs = 0;
argparse.Action.call(this, options);
};
util.inherits(VersionAction, argparse.Action);
VersionAction.prototype.call = function(parser) {
Ds.printVersions();
process.exit(0);
};
let parser = new argparse.ArgumentParser({addHelp: true, description: 'Running DeepSpeech inference.'});
parser.addArgument(['--model'], {required: true, help: 'Path to the model (protocol buffer binary file)'});
parser.addArgument(['--alphabet'], {required: true, help: 'Path to the configuration file specifying the alphabet used by the network'});
parser.addArgument(['--lm'], {help: 'Path to the language model binary file', nargs: '?'});
parser.addArgument(['--trie'], {help: 'Path to the language model trie file created with native_client/generate_trie', nargs: '?'});
parser.addArgument(['--audio'], {required: true, help: 'Path to the audio file to run (WAV format)'});
parser.addArgument(['--version'], {action: VersionAction, help: 'Print version and exits'});
let args = parser.parseArgs();
function totalTime(hrtimeValue) {
return (hrtimeValue[0] + hrtimeValue[1] / 1000000000).toPrecision(4);
}
console.error('Loading model from file %s', args['model']);
const model_load_start = process.hrtime();
let model = new Ds.Model(args['model'], N_FEATURES, N_CONTEXT, args['alphabet'], BEAM_WIDTH);
const model_load_end = process.hrtime(model_load_start);
console.error('Loaded model in %ds.', totalTime(model_load_end));
if (args['lm'] && args['trie']) {
console.error('Loading language model from files %s %s', args['lm'], args['trie']);
const lm_load_start = process.hrtime();
model.enableDecoderWithLM(args['alphabet'], args['lm'], args['trie'],
LM_WEIGHT, VALID_WORD_COUNT_WEIGHT);
const lm_load_end = process.hrtime(lm_load_start);
console.error('Loaded language model in %ds.', totalTime(lm_load_end));
}
const vad = new VAD(VAD.Mode.NORMAL);
const voice = {START: true, STOP: false};
let sctx = model.setupStream(150, 16000);
let state = voice.STOP;
function finishStream() {
const model_load_start = process.hrtime();
console.error('Running inference.');
console.log('Transcription: ', model.finishStream(sctx));
const model_load_end = process.hrtime(model_load_start);
console.error('Inference took %ds.', totalTime(model_load_end));
}
let ffmpeg = require('child_process').spawn('ffmpeg', [
'-hide_banner',
'-nostats',
'-loglevel', 'fatal',
'-i', args['audio'],
'-af', 'highpass=f=200,lowpass=f=3000',
'-vn',
'-acodec', 'pcm_s16le',
'-ac', 1,
'-ar', 16000,
'-f', 's16le',
'pipe:'
]);
ffmpeg.stdout.on('data', chunk => {
vad.processAudio(chunk, 16000).then(res => {
switch (res) {
case VAD.Event.SILENCE:
if (state === voice.START) {
state = voice.STOP;
finishStream();
sctx = model.setupStream(150,16000);
}
break;
case VAD.Event.VOICE:
state = voice.START;
model.feedAudioContent(sctx, chunk.slice(0, chunk.length / 2));
break;
}
});
});
ffmpeg.stdout.on('close', code => {
finishStream();
});

View File

@ -0,0 +1,16 @@
{
"name": "ffmpeg-vad-streaming",
"version": "1.0.0",
"description": "Streaming inference from arbitrary source with VAD and FFmpeg",
"main": "index.js",
"scripts": {
"start": "node ./index.js"
},
"dependencies": {
"argparse": "^1.0.10",
"deepspeech": "^0.3.0",
"node-vad": "^1.1.1",
"util": "^0.11.1"
},
"license" : "MIT"
}

View File

@ -14,6 +14,12 @@ Uses portaudio for microphone access, so on Linux, you may need to install its h
sudo apt install portaudio19-dev
```
Installation on MacOS may fail due to portaudio, use brew to install it:
```bash
brew install portaudio
```
## Usage
```

14
native_client/Android.mk Normal file
View File

@ -0,0 +1,14 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE := deepspeech-prebuilt
LOCAL_SRC_FILES := $(TFDIR)/bazel-bin/native_client/libdeepspeech.so
include $(PREBUILT_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_CPP_EXTENSION := .cc .cxx .cpp
LOCAL_MODULE := deepspeech
LOCAL_SRC_FILES := client.cc
LOCAL_SHARED_LIBRARIES := deepspeech-prebuilt
LOCAL_LDFLAGS := -Wl,--no-as-needed
include $(BUILD_EXECUTABLE)

View File

@ -90,6 +90,62 @@ cd ../DeepSpeech/native_client
make deepspeech
```
### Cross-building for RPi3 ARMv7 / LePotato ARM64
We do support cross-compilation ; please refer to our `mozilla/tensorflow` fork, where we define the following `--config` flags:
- `--config=rpi3` and `--config=rpi3_opt` for Raspbian / ARMv7
- `--config=rpi3-armv8` and `--config=rpi3-armv8_opt` for ARMBian / ARM64
So your command line for RPi3 / ARMv7 should look like:
```
bazel build --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie
```
And your command line for LePotato / ARM64 should look like:
```
bazel build --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie
```
While we test only on RPi3 Raspbian Stretch / LePotato ARMBian stretch, anything compatible with `armv7-a cortex-a53` / `armv8-a cortex-a53` should be fine.
The `deepspeech` binary can also be cross-built, with `TARGET=rpi3` or `TARGET=rpi3-armv8`. This might require you to setup a system tree using the tool `multistrap` and the multitrap configuration files: `native_client/multistrap_armbian64_stretch.conf` and `native_client/multistrap_raspbian_stretch.conf`.
The path of the system tree can be overridden from the default values defined in `definitions.mk` through `RASPBIAN` make variable.
```
cd ../DeepSpeech/native_client
make TARGET=<system> deepspeech
```
### Android devices
We have preliminary support for Android relying on TensorFlow Lite, with upcoming Java / JNI bindinds. For more details on how to experiment with those, please refer to `native_client/java/README.md`.
Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required).
You can build the `libdeepspeech.so` using (ARMv7):
```
bazel build --config=monolithic --config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so
```
Or (ARM64):
```
bazel build --config=monolithic --config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so
```
Building the `deepspeech` binary will happen through `ndk-build` (ARMv7):
```
cd ../DeepSpeech/native_client
$ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflow/ TARGET_ARCH_ABI=armeabi-v7a
```
And (ARM64):
```
cd ../DeepSpeech/native_client
$ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflowx/ TARGET_ARCH_ABI=arm64-v8a
```
## Installing
After building, the library files and binary can optionally be installed to a system path for ease of development. This is also a required step for bindings generation.

View File

@ -6,7 +6,9 @@
#include <errno.h>
#include <math.h>
#include <string.h>
#ifndef __ANDROID__
#include <sox.h>
#endif // __ANDROID__
#include <time.h>
#include <unistd.h>
@ -59,6 +61,7 @@ GetAudioBuffer(const char* path)
{
ds_audio_buffer res = {0};
#ifndef __ANDROID__
sox_format_t* input = sox_open_read(path, NULL, NULL, NULL);
assert(input);
@ -147,6 +150,51 @@ GetAudioBuffer(const char* path)
// Close sox handles
sox_close(output);
sox_close(input);
#endif // __ANDROID__
#ifdef __ANDROID__
// FIXME: Hack and support only 16kHz mono 16-bits PCM
FILE* wave = fopen(path, "r");
size_t rv;
unsigned short audio_format;
fseek(wave, 20, SEEK_SET); rv = fread(&audio_format, 2, 1, wave);
assert(rv == 2);
unsigned short num_channels;
fseek(wave, 22, SEEK_SET); rv = fread(&num_channels, 2, 1, wave);
assert(rv == 2);
unsigned int sample_rate;
fseek(wave, 24, SEEK_SET); rv = fread(&sample_rate, 4, 1, wave);
assert(rv == 2);
unsigned short bits_per_sample;
fseek(wave, 34, SEEK_SET); rv = fread(&bits_per_sample, 2, 1, wave);
assert(rv == 2);
assert(audio_format == 1); // 1 is PCM
assert(num_channels == 1); // MONO
assert(sample_rate == 16000); // 16000 Hz
assert(bits_per_sample == 16); // 16 bits per sample
fprintf(stderr, "audio_format=%d\n", audio_format);
fprintf(stderr, "num_channels=%d\n", num_channels);
fprintf(stderr, "sample_rate=%d\n", sample_rate);
fprintf(stderr, "bits_per_sample=%d\n", bits_per_sample);
fseek(wave, 40, SEEK_SET); rv = fread(&res.buffer_size, 4, 1, wave);
assert(rv == 2);
fprintf(stderr, "res.buffer_size=%ld\n", res.buffer_size);
fseek(wave, 44, SEEK_SET);
res.buffer = (char*)malloc(sizeof(char) * res.buffer_size);
rv = fread(res.buffer, sizeof(char), res.buffer_size, wave);
assert(rv == res.buffer_size);
fclose(wave);
#endif // __ANDROID__
#ifdef __APPLE__
res.buffer_size = (size_t)(output->olength * 2);
@ -255,8 +303,10 @@ main(int argc, char **argv)
break;
}
#ifndef __ANDROID__
// Deinitialise and quit
sox_quit();
#endif // __ANDROID__
DS_DestroyModel(ctx);

View File

@ -1,6 +1,10 @@
#ifndef DEEPSPEECH_H
#define DEEPSPEECH_H
#ifdef __ANDROID__
#define USE_TFLITE
#endif
#ifndef SWIG
#if defined _MSC_VER
#define DEEPSPEECH_EXPORT extern "C" __declspec(dllexport)

11
native_client/java/.gitignore vendored Normal file
View File

@ -0,0 +1,11 @@
*.iml
.gradle
/local.properties
/.idea/caches/build_file_checksums.ser
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
.DS_Store
/build
/captures
.externalNativeBuild

View File

@ -0,0 +1,29 @@
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<Objective-C-extensions>
<file>
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Import" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Macro" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Typedef" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Enum" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Constant" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Global" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Struct" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="FunctionPredecl" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Function" />
</file>
<class>
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Property" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="Synthesize" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="InitMethod" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="StaticMethod" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="InstanceMethod" />
<option name="com.jetbrains.cidr.lang.util.OCDeclarationKind" value="DeallocMethod" />
</class>
<extensions>
<pair source="cpp" header="h" fileNamingConvention="NONE" />
<pair source="c" header="h" fileNamingConvention="NONE" />
</extensions>
</Objective-C-extensions>
</code_scheme>
</component>

18
native_client/java/.idea/gradle.xml generated Normal file
View File

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
<option name="resolveModulePerSourceSet" value="false" />
</GradleProjectSettings>
</option>
</component>
</project>

38
native_client/java/.idea/misc.xml generated Normal file
View File

@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="NullableNotNullManager">
<option name="myDefaultNullable" value="android.support.annotation.Nullable" />
<option name="myDefaultNotNull" value="android.support.annotation.NonNull" />
<option name="myNullables">
<value>
<list size="7">
<item index="0" class="java.lang.String" itemvalue="org.jetbrains.annotations.Nullable" />
<item index="1" class="java.lang.String" itemvalue="javax.annotation.Nullable" />
<item index="2" class="java.lang.String" itemvalue="javax.annotation.CheckForNull" />
<item index="3" class="java.lang.String" itemvalue="edu.umd.cs.findbugs.annotations.Nullable" />
<item index="4" class="java.lang.String" itemvalue="android.support.annotation.Nullable" />
<item index="5" class="java.lang.String" itemvalue="androidx.annotation.Nullable" />
<item index="6" class="java.lang.String" itemvalue="androidx.annotation.RecentlyNullable" />
</list>
</value>
</option>
<option name="myNotNulls">
<value>
<list size="6">
<item index="0" class="java.lang.String" itemvalue="org.jetbrains.annotations.NotNull" />
<item index="1" class="java.lang.String" itemvalue="javax.annotation.Nonnull" />
<item index="2" class="java.lang.String" itemvalue="edu.umd.cs.findbugs.annotations.NonNull" />
<item index="3" class="java.lang.String" itemvalue="android.support.annotation.NonNull" />
<item index="4" class="java.lang.String" itemvalue="androidx.annotation.NonNull" />
<item index="5" class="java.lang.String" itemvalue="androidx.annotation.RecentlyNonNull" />
</list>
</value>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RunConfigurationProducerService">
<option name="ignoredProducers">
<set>
<option value="org.jetbrains.plugins.gradle.execution.test.runner.AllInPackageGradleConfigurationProducer" />
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestClassGradleConfigurationProducer" />
<option value="org.jetbrains.plugins.gradle.execution.test.runner.TestMethodGradleConfigurationProducer" />
</set>
</option>
</component>
</project>

View File

@ -0,0 +1,19 @@
.PHONY: clean apk-clean
include ../definitions.mk
LIBDEEPSPEECH_SO ?= ${TFDIR}/bazel-bin/native_client/libdeepspeech.so
all: apk
clean: apk-clean
rm -rf *.java jni/deepspeech_wrap.cpp
apk-clean:
./gradlew clean
apk: apk-clean bindings
LIBDEEPSPEECH_SO=$(LIBDEEPSPEECH_SO) ./gradlew build
bindings: clean
swig -c++ -java -package deepspeech.mozilla.org.deepspeech -outdir app/src/main/java/deepspeech/mozilla/org/deepspeech/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i

View File

@ -0,0 +1,32 @@
DeepSpeech Java / Android bindings
==================================
This is still preliminary work. Please refer to `native_client/README.md` for
building `libdeepspeech.so` and `deepspeech` binary for Android on ARMv7 and
ARM64 arch.
Running `deepspeech` via adb
============================
You should use `adb push` to send data to device, please refer to Android
documentation on how to use that.
Please push DeepSpeech data to `/sdcard/deepspeech/`, including:
- `output_graph.tflite` which is the TF Lite model
- `alphabet.txt`
- `lm.binary` and `trie` files, if you want to use the language model ; please
be aware that too big language model will make the device run out of memory
Then, push binaries from `native_client.tar.xz` to `/data/local/tmp/ds`:
- `deepspeech`
- `libdeepspeech.so`
- `libc++_shared.so`
You should then be able to run as usual, using a shell from `adb shell`:
```
user@device$ cd /data/local/tmp/ds/
user@device$ LD_LIBRARY_PATH=$(pwd)/ ./deepspeech [...]
```
Please note that Android linker does not support `rpath` so you have to set
`LD_LIBRARY_PATH`. Properly wrapped / packaged bindings does embed the library
at a place the linker knows where to search, so Android apps will be fine.

1
native_client/java/app/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/build

View File

@ -0,0 +1,58 @@
# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html
# Sets the minimum version of CMake required to build the native library.
cmake_minimum_required(VERSION 3.4.1)
# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.
# You can define multiple libraries, and CMake builds them for you.
# Gradle automatically packages shared libraries with your APK.
add_library( # Sets the name of the library.
deepspeech-jni
# Sets the library as a shared library.
SHARED
# Provides a relative path to your source file(s).
../jni/deepspeech_wrap.cpp )
add_library( deepspeech-lib
SHARED
IMPORTED )
set_target_properties( deepspeech-lib
PROPERTIES IMPORTED_LOCATION $ENV{LIBDEEPSPEECH_SO} )
add_custom_command( TARGET deepspeech-jni POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
$ENV{LIBDEEPSPEECH_SO}
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libdeepspeech.so )
# Searches for a specified prebuilt library and stores the path as a
# variable. Because CMake includes system libraries in the search path by
# default, you only need to specify the name of the public NDK library
# you want to add. CMake verifies that the library exists before
# completing its build.
find_library( # Sets the name of the path variable.
log-lib
# Specifies the name of the NDK library that
# you want CMake to locate.
log )
# Specifies libraries CMake should link to your target library. You
# can link multiple libraries, such as libraries you define in this
# build script, prebuilt third-party libraries, or system libraries.
target_link_libraries( # Specifies the target library.
deepspeech-jni
deepspeech-lib
# Links the target library to the log library
# included in the NDK.
${log-lib} )

View File

@ -0,0 +1,41 @@
apply plugin: 'com.android.application'
android {
compileSdkVersion 27
defaultConfig {
applicationId "deepspeech.mozilla.org.deepspeech"
minSdkVersion 21
targetSdkVersion 27
versionCode 1
versionName "1.0"
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
ndk {
abiFilters 'armeabi-v7a', 'arm64-v8a'
}
externalNativeBuild {
cmake {
cppFlags ""
}
}
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
}
}
externalNativeBuild {
cmake {
path "CMakeLists.txt"
}
}
}
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
implementation 'com.android.support:appcompat-v7:27.1.1'
implementation 'com.android.support.constraint:constraint-layout:1.1.3'
testImplementation 'junit:junit:4.12'
androidTestImplementation 'com.android.support.test:runner:1.0.2'
androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
}

View File

@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View File

@ -0,0 +1,26 @@
package deepspeech.mozilla.org.deepspeech;
import android.content.Context;
import android.support.test.InstrumentationRegistry;
import android.support.test.runner.AndroidJUnit4;
import org.junit.Test;
import org.junit.runner.RunWith;
import static org.junit.Assert.*;
/**
* Instrumented test, which will execute on an Android device.
*
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
*/
@RunWith(AndroidJUnit4.class)
public class ExampleInstrumentedTest {
@Test
public void useAppContext() {
// Context of the app under test.
Context appContext = InstrumentationRegistry.getTargetContext();
assertEquals("deepspeech.mozilla.org.deepspeech", appContext.getPackageName());
}
}

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="deepspeech.mozilla.org.deepspeech">
<application
android:allowBackup="true"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/AppTheme">
<activity android:name=".DeepSpeechActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
</manifest>

View File

@ -0,0 +1,10 @@
#include <jni.h>
#include <string>
extern "C" JNIEXPORT jstring JNICALL
Java_deepspeech_mozilla_org_deepspeech_DeepSpeechActivity_stringFromJNI(
JNIEnv* env,
jobject /* this */) {
std::string hello = "Hello from C++";
return env->NewStringUTF(hello.c_str());
}

View File

@ -0,0 +1,177 @@
package deepspeech.mozilla.org.deepspeech;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.view.View;
import android.widget.TextView;
import android.widget.EditText;
import android.widget.Button;
import android.media.MediaPlayer;
import java.io.RandomAccessFile;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.ByteBuffer;
public class DeepSpeechActivity extends AppCompatActivity {
// Used to load the 'native-lib' library on application startup.
static {
System.loadLibrary("deepspeech-jni");
System.loadLibrary("deepspeech");
}
Model _m = null;
EditText _tfliteModel;
EditText _alphabet;
EditText _audioFile;
TextView _decodedString;
TextView _tfliteStatus;
Button _startInference;
final int N_CEP = 26;
final int N_CONTEXT = 9;
final int BEAM_WIDTH = 50;
final float LM_WEIGHT = 1.50f;
final float VALID_WORD_COUNT_WEIGHT = 2.10f;
private char readLEChar(RandomAccessFile f) throws IOException {
byte b1 = f.readByte();
byte b2 = f.readByte();
return (char)((b2 << 8) | b1);
}
private int readLEInt(RandomAccessFile f) throws IOException {
byte b1 = f.readByte();
byte b2 = f.readByte();
byte b3 = f.readByte();
byte b4 = f.readByte();
return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24);
}
private void newModel(String tfliteModel, String alphabet) {
this._tfliteStatus.setText("Creating model");
if (this._m == null) {
this._m = new Model(tfliteModel, N_CEP, N_CONTEXT, alphabet, BEAM_WIDTH);
}
}
private void doInference(String audioFile) {
long inferenceExecTime = 0;
this._startInference.setEnabled(false);
this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString());
this._tfliteStatus.setText("Extracting audio features ...");
try {
RandomAccessFile wave = new RandomAccessFile(audioFile, "r");
wave.seek(20); char audioFormat = this.readLEChar(wave);
assert (audioFormat == 1); // 1 is PCM
// tv_audioFormat.setText("audioFormat=" + (audioFormat == 1 ? "PCM" : "!PCM"));
wave.seek(22); char numChannels = this.readLEChar(wave);
assert (numChannels == 1); // MONO
// tv_numChannels.setText("numChannels=" + (numChannels == 1 ? "MONO" : "!MONO"));
wave.seek(24); int sampleRate = this.readLEInt(wave);
assert (sampleRate == 16000); // 16000 Hz
// tv_sampleRate.setText("sampleRate=" + (sampleRate == 16000 ? "16kHz" : "!16kHz"));
wave.seek(34); char bitsPerSample = this.readLEChar(wave);
assert (bitsPerSample == 16); // 16 bits per sample
// tv_bitsPerSample.setText("bitsPerSample=" + (bitsPerSample == 16 ? "16-bits" : "!16-bits" ));
wave.seek(40); int bufferSize = this.readLEInt(wave);
assert (bufferSize > 0);
// tv_bufferSize.setText("bufferSize=" + bufferSize);
wave.seek(44);
byte[] bytes = new byte[bufferSize];
wave.readFully(bytes);
short[] shorts = new short[bytes.length/2];
// to turn bytes to shorts as either big endian or little endian.
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts);
this._tfliteStatus.setText("Running inference ...");
long inferenceStartTime = System.currentTimeMillis();
String decoded = this._m.stt(shorts, shorts.length, sampleRate);
inferenceExecTime = System.currentTimeMillis() - inferenceStartTime;
this._decodedString.setText(decoded);
} catch (FileNotFoundException ex) {
} catch (IOException ex) {
} finally {
}
this._tfliteStatus.setText("Finished! Took " + inferenceExecTime + "ms");
this._startInference.setEnabled(true);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_deep_speech);
this._decodedString = (TextView) findViewById(R.id.decodedString);
this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus);
this._tfliteModel = (EditText) findViewById(R.id.tfliteModel);
this._alphabet = (EditText) findViewById(R.id.alphabet);
this._audioFile = (EditText) findViewById(R.id.audioFile);
this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite");
this._tfliteStatus.setText("Ready, waiting ...");
this._alphabet.setText("/sdcard/deepspeech/alphabet.txt");
this._audioFile.setText("/sdcard/deepspeech/audio.wav");
this._startInference = (Button) findViewById(R.id.btnStartInference);
}
public void onClick_inference_handler(View v) {
this.playAudioFile();
this.doInference(this._audioFile.getText().toString());
}
public void playAudioFile() {
try {
MediaPlayer mediaPlayer = new MediaPlayer();
mediaPlayer.setDataSource(this._audioFile.getText().toString());
mediaPlayer.prepare();
mediaPlayer.start();
} catch (IOException ex) {
}
}
public void onClick_audio_handler(View v) {
this.playAudioFile();
}
@Override
protected void onDestroy() {
super.onDestroy();
if (this._m != null) {
this._m.destroyModel();
}
}
}

View File

@ -0,0 +1,44 @@
package deepspeech.mozilla.org.deepspeech;
public class Model {
// FIXME: We should have something better than those SWIGTYPE_*
SWIGTYPE_p_p_ModelState _mspp;
SWIGTYPE_p_ModelState _msp;
public Model(String modelPath, int n_cep, int n_context, String alphabetPath, int beam_width) {
this._mspp = impl.new_modelstatep();
impl.CreateModel(modelPath, n_cep, n_context, alphabetPath, beam_width, this._mspp);
this._msp = impl.modelstatep_value(this._mspp);
}
public void destroyModel() {
impl.DestroyModel(this._msp);
}
public void enableDecoderWihLM(String alphabet, String lm, String trie, float lm_weight, float valid_word_count_weight) {
impl.EnableDecoderWithLM(this._msp, alphabet, lm, trie, lm_weight, valid_word_count_weight);
}
public String stt(short[] buffer, int buffer_size, int sample_rate) {
return impl.SpeechToText(this._msp, buffer, buffer_size, sample_rate);
}
public SWIGTYPE_p_StreamingState setupStream(int prealloc_frames, int sample_rate) {
SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep();
impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp);
return impl.streamingstatep_value(ssp);
}
public void feedAudioContent(SWIGTYPE_p_StreamingState ctx, short[] buffer, int buffer_size) {
impl.FeedAudioContent(ctx, buffer, buffer_size);
}
public String intermediateDecode(SWIGTYPE_p_StreamingState ctx) {
return impl.IntermediateDecode(ctx);
}
public String finishStream(SWIGTYPE_p_StreamingState ctx) {
return impl.FinishStream(ctx);
}
}

View File

@ -0,0 +1,34 @@
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillType="evenOdd"
android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
android:strokeWidth="1"
android:strokeColor="#00000000">
<aapt:attr name="android:fillColor">
<gradient
android:endX="78.5885"
android:endY="90.9159"
android:startX="48.7653"
android:startY="61.0927"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>

View File

@ -0,0 +1,170 @@
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillColor="#008577"
android:pathData="M0,0h108v108h-108z" />
<path
android:fillColor="#00000000"
android:pathData="M9,0L9,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,0L19,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,0L29,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,0L39,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,0L49,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,0L59,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,0L69,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,0L79,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M89,0L89,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M99,0L99,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,9L108,9"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,19L108,19"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,29L108,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,39L108,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,49L108,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,59L108,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,69L108,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,79L108,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,89L108,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,99L108,99"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,29L89,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,39L89,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,49L89,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,59L89,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,69L89,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,79L89,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,19L29,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,19L39,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,19L49,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,19L59,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,19L69,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,19L79,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
</vector>

View File

@ -0,0 +1,192 @@
<?xml version="1.0" encoding="utf-8"?>
<android.support.constraint.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".DeepSpeechActivity">
<!--
<TextView
android:id="@+id/audioFormat"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<TextView
android:id="@+id/numChannels"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="@+id/audioFormat" />
<TextView
android:id="@+id/sampleRate"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="@+id/numChannels" />
<TextView
android:id="@+id/bitsPerSample"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="@+id/sampleRate" />
<TextView
android:id="@+id/bufferSize"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="@+id/bitsPerSample" />
-->
<android.support.constraint.Guideline
android:id="@+id/guideline"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginStart="32dp"
android:layout_marginTop="32dp"
android:layout_marginEnd="32dp"
android:layout_marginBottom="32dp"
android:orientation="horizontal"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintGuide_end="491dp"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<LinearLayout
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="vertical">
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">
<TextView
android:id="@+id/lblTfliteModel"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_weight="1"
android:text="Model file" />
<EditText
android:id="@+id/tfliteModel"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:inputType="text" />
</LinearLayout>
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">
<TextView
android:id="@+id/lblAlphabet"
android:layout_width="263dp"
android:layout_height="wrap_content"
android:layout_weight="1"
android:text="Alphabet" />
<EditText
android:id="@+id/alphabet"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:inputType="text" />
</LinearLayout>
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="horizontal">
<TextView
android:id="@+id/lblAudioFile"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_weight="1"
android:text="Audio file" />
<EditText
android:id="@+id/audioFile"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:inputType="text" />
</LinearLayout>
<Space
android:layout_width="match_parent"
android:layout_height="@android:dimen/app_icon_size" />
<TextView
android:id="@+id/tfliteStatus"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<Space
android:layout_width="match_parent"
android:layout_height="@android:dimen/app_icon_size" />
<TextView
android:id="@+id/decodedString"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="Hello World!"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<Space
android:layout_width="match_parent"
android:layout_height="@android:dimen/app_icon_size" />
<Button
android:id="@+id/btnStartInference"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="Run inference!"
android:onClick="onClick_inference_handler" />
<!--
<Space
android:layout_width="match_parent"
android:layout_height="@android:dimen/app_icon_size" />
<Button
android:id="@+id/btnPlayAudioFile"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="Listen to audio"
android:onClick="onClick_audio_handler" />
-->
</LinearLayout>
</android.support.constraint.ConstraintLayout>

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#008577</color>
<color name="colorPrimaryDark">#00574B</color>
<color name="colorAccent">#D81B60</color>
</resources>

View File

@ -0,0 +1,3 @@
<resources>
<string name="app_name">DeepSpeech</string>
</resources>

View File

@ -0,0 +1,11 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
</resources>

View File

@ -0,0 +1,17 @@
package deepspeech.mozilla.org.deepspeech;
import org.junit.Test;
import static org.junit.Assert.*;
/**
* Example local unit test, which will execute on the development machine (host).
*
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
*/
public class ExampleUnitTest {
@Test
public void addition_isCorrect() {
assertEquals(4, 2 + 2);
}
}

View File

@ -0,0 +1,27 @@
// Top-level build file where you can add configuration options common to all sub-projects/modules.
buildscript {
repositories {
google()
jcenter()
}
dependencies {
classpath 'com.android.tools.build:gradle:3.2.1'
// NOTE: Do not place your application dependencies here; they belong
// in the individual module build.gradle files
}
}
allprojects {
repositories {
google()
jcenter()
}
}
task clean(type: Delete) {
delete rootProject.buildDir
}

View File

@ -0,0 +1,15 @@
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx1536m
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true

Binary file not shown.

View File

@ -0,0 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.6-all.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

172
native_client/java/gradlew vendored Executable file
View File

@ -0,0 +1,172 @@
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"

84
native_client/java/gradlew.bat vendored Normal file
View File

@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@ -0,0 +1,20 @@
%module impl
%{
#define SWIG_FILE_WITH_INIT
#include "../../deepspeech.h"
%}
%include "typemaps.i"
%include "arrays_java.i"
// apply to DS_FeedAudioContent and DS_SpeechToText
%apply short[] { short* };
%include "cpointer.i"
%pointer_functions(ModelState*, modelstatep);
%pointer_functions(StreamingState*, streamingstatep);
%rename ("%(strip:[DS_])s") "";
%include "../deepspeech.h"

View File

@ -0,0 +1 @@
include ':app'

View File

@ -10,3 +10,19 @@ rm -rf windows include lm/filter lm/builder util/stream util/getopt.* python
This was done in order to ensure uniqueness of double_conversion:
git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/double_conversion/kenlm_double_conversion/g'
Please apply this patch to be able to build on Android:
diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc
index d53dc0a..b5e36b2 100644
--- a/native_client/kenlm/util/file.cc
+++ b/native_client/kenlm/util/file.cc
@@ -540,7 +540,7 @@ std::string DefaultTempDirectory() {
const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0};
for (int i=0; vars[i]; ++i) {
char *val =
-#if defined(_GNU_SOURCE)
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
#if __GLIBC_PREREQ(2,17)
secure_getenv
#else // __GLIBC_PREREQ

View File

@ -540,7 +540,7 @@ std::string DefaultTempDirectory() {
const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0};
for (int i=0; vars[i]; ++i) {
char *val =
#if defined(_GNU_SOURCE)
#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
#if __GLIBC_PREREQ(2,17)
secure_getenv
#else // __GLIBC_PREREQ

View File

@ -1,7 +1,7 @@
pandas
progressbar2
python-utils
tensorflow == 1.12.0rc2
tensorflow == 1.12.0
numpy
matplotlib
scipy

View File

@ -18,6 +18,7 @@ build:
scripts:
build: ''
package: ''
nc_asset_name: 'native_client.tar.xz'
args:
tests_cmdline: ''
convert_graphdef: ''

View File

@ -0,0 +1,22 @@
build:
template_file: linux-opt-base.tyml
routes:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-arm64"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-arm64"
- "index.project.deepspeech.deepspeech.native_client.android-arm64.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
system_setup:
>
${swig.packages.install_script}
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.android-arm64/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-build.sh arm64-v8a"
package: "taskcluster/android-package.sh arm64-v8a"
nc_asset_name: "native_client.arm64.cpu.android.tar.xz"
metadata:
name: "DeepSpeech Android ARM64"
description: "Building DeepSpeech for Android ARM64, optimized version"

View File

@ -0,0 +1,22 @@
build:
template_file: linux-opt-base.tyml
routes:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-armv7"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-armv7"
- "index.project.deepspeech.deepspeech.native_client.android-armv7.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
system_setup:
>
${swig.packages.install_script}
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.android-armv7/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/android-build.sh armeabi-v7a"
package: "taskcluster/android-package.sh armeabi-v7a"
nc_asset_name: "native_client.armv7.cpu.android.tar.xz"
metadata:
name: "DeepSpeech Android ARMv7"
description: "Building DeepSpeech for Android ARMv7, optimized version"

View File

@ -0,0 +1,30 @@
#!/bin/bash
set -xe
arm_flavor=$1
source $(dirname "$0")/../tc-tests-utils.sh
source ${DS_ROOT_TASK}/DeepSpeech/tf/tc-vars.sh
BAZEL_TARGETS="
//native_client:libdeepspeech.so
"
if [ "${arm_flavor}" = "armeabi-v7a" ]; then
LOCAL_ANDROID_FLAGS="${BAZEL_ANDROID_ARM_FLAGS}"
fi
if [ "${arm_flavor}" = "arm64-v8a" ]; then
LOCAL_ANDROID_FLAGS="${BAZEL_ANDROID_ARM64_FLAGS}"
fi
BAZEL_BUILD_FLAGS="${LOCAL_ANDROID_FLAGS} ${BAZEL_EXTRA_FLAGS}"
BAZEL_ENV_FLAGS="TF_NEED_CUDA=0"
SYSTEM_TARGET=
SYSTEM_RASPBIAN=
do_bazel_build
do_deepspeech_ndk_build "${arm_flavor}"

13
taskcluster/android-package.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
set -xe
arm_flavor=$1
source $(dirname "$0")/../tc-tests-utils.sh
mkdir -p ${TASKCLUSTER_ARTIFACTS} || true
cp ${DS_ROOT_TASK}/DeepSpeech/tf/bazel*.log ${TASKCLUSTER_ARTIFACTS}/
package_native_client_ndk "native_client.tar.xz" "${arm_flavor}"

View File

@ -6,10 +6,11 @@ build:
- "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.osx/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.osx/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/host-build.sh"
package: "taskcluster/package.sh"
nc_asset_name: "native_client.amd64.cpu.osx.tar.xz"
maxRunTime: 14400
metadata:
name: "DeepSpeech OSX AMD64 CPU"

View File

@ -6,7 +6,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}"
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.osx/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.osx/artifacts/public/home.tar.xz"
maxRunTime: 14400
scripts:
build: 'taskcluster/decoder-build.sh'

View File

@ -15,6 +15,7 @@ expires:
else: { $fromNow: '7 days' }
extra:
nc_asset_name: { $eval: build.nc_asset_name }
github:
$if: '(event.event == "push") || (event.event == "tag")'
then: { $eval: taskcluster.github_events.merge }

View File

@ -14,10 +14,11 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.cpu/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/host-build.sh"
package: "taskcluster/package.sh"
nc_asset_name: "native_client.amd64.cpu.linux.tar.xz"
metadata:
name: "DeepSpeech Linux AMD64 CPU"
description: "Building DeepSpeech for Linux/AMD64, CPU only, optimized version"

View File

@ -14,7 +14,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.cpu/artifacts/public/home.tar.xz"
scripts:
build: 'taskcluster/decoder-build.sh'
package: 'taskcluster/decoder-package.sh'

View File

@ -12,11 +12,12 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.gpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.gpu/artifacts/public/home.tar.xz"
maxRunTime: 14400
scripts:
build: "taskcluster/cuda-build.sh"
package: "taskcluster/package.sh"
nc_asset_name: "native_client.amd64.cuda.linux.tar.xz"
metadata:
name: "DeepSpeech Linux AMD64 CUDA"
description: "Building DeepSpeech for Linux/AMD64, CUDA-enabled, optimized version"

View File

@ -4,7 +4,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm64"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm64"
- "index.project.deepspeech.deepspeech.native_client.arm64.${event.head.sha}"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.arm64/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.arm64/artifacts/public/home.tar.xz"
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
system_setup:
>
@ -21,6 +21,7 @@ build:
scripts:
build: "taskcluster/arm64-build.sh"
package: "taskcluster/package.sh"
nc_asset_name: "native_client.arm64.cpu.linux.tar.xz"
metadata:
name: "DeepSpeech Linux ARM64 Cortex-A53 CPU"
description: "Building DeepSpeech for Linux ARM64 Cortex-A53, CPU only, optimized version"

View File

@ -17,6 +17,7 @@ then:
else: { $fromNow: '7 days' }
extra:
nc_asset_name: { $eval: build.nc_asset_name }
github:
$if: '(event.event == "push") || (event.event == "tag")'
then: { $eval: taskcluster.github_events.merge }

View File

@ -4,7 +4,7 @@ build:
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.arm"
- "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.arm"
- "index.project.deepspeech.deepspeech.native_client.arm.${event.head.sha}"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.arm/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.arm/artifacts/public/home.tar.xz"
## multistrap 2.2.0-ubuntu1 is broken in 14.04: https://bugs.launchpad.net/ubuntu/+source/multistrap/+bug/1313787
system_setup:
>
@ -21,6 +21,7 @@ build:
scripts:
build: "taskcluster/rpi3-build.sh"
package: "taskcluster/package.sh"
nc_asset_name: "native_client.rpi3.cpu.linux.tar.xz"
metadata:
name: "DeepSpeech Linux RPi3/ARMv7 CPU"
description: "Building DeepSpeech for Linux RPi3 ARMv7, CPU only, optimized version"

View File

@ -16,7 +16,7 @@ build:
system_config:
>
${swig.patch_nodejs.linux}
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.cpu/artifacts/public/home.tar.xz"
tensorflow: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.cpu/artifacts/public/home.tar.xz"
scripts:
build: "taskcluster/node-build.sh"
package: "taskcluster/node-package.sh"

View File

@ -2,25 +2,29 @@ build:
template_file: simple-task.tyml
dependencies:
# Make sure builds are ready
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
- "linux-arm64-cpu-opt"
- "darwin-amd64-cpu-opt"
- "linux-amd64-cpu-opt"
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
- "node-package"
- "android-arm64-cpu-opt"
- "android-armv7-cpu-opt"
allowed:
- "tag"
ref_match: "refs/tags/"
routes:
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
upload_targets:
- "github"
artifacts_deps:
python:
- "darwin-amd64-cpu-opt"
- "linux-amd64-cpu-opt"
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
- "linux-arm64-cpu-opt"
javascript:
# GPU package
- "linux-amd64-gpu-opt"
@ -32,6 +36,8 @@ build:
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
- "linux-arm64-cpu-opt"
- "android-arm64-cpu-opt"
- "android-armv7-cpu-opt"
metadata:
name: "DeepSpeech Packages"
description: "Trigger Uploading of DeepSpeech Packages to the Internets"
name: "DeepSpeech GitHub Packages"
description: "Trigger Uploading of DeepSpeech Packages to GitHub release page"

View File

@ -0,0 +1,25 @@
build:
template_file: simple-task.tyml
dependencies:
# Make sure builds are ready
- "linux-amd64-gpu-opt"
- "node-package"
allowed:
- "tag"
ref_match: "refs/tags/"
routes:
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
upload_targets:
- "npm"
artifacts_deps:
python: []
cpp: []
javascript:
# GPU package
- "linux-amd64-gpu-opt"
# CPU package with all archs
- "node-package"
metadata:
name: "DeepSpeech NPM Packages"
description: "Trigger Uploading of DeepSpeech Packages to NPM registry"

View File

@ -0,0 +1,29 @@
build:
template_file: simple-task.tyml
dependencies:
# Make sure builds are ready
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
- "darwin-amd64-cpu-opt"
- "linux-amd64-cpu-opt"
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
allowed:
- "tag"
ref_match: "refs/tags/"
routes:
- "notify.irc-channel.${notifications.irc}.on-exception"
- "notify.irc-channel.${notifications.irc}.on-failed"
upload_targets:
- "pypi"
artifacts_deps:
javascript: []
cpp: []
python:
- "darwin-amd64-cpu-opt"
- "linux-amd64-cpu-opt"
- "linux-amd64-gpu-opt"
- "linux-rpi3-cpu-opt"
# - "linux-arm64-cpu-opt" Aarch64 packages are refused by upload.pypi.org
metadata:
name: "DeepSpeech PyPi Packages"
description: "Trigger Uploading of DeepSpeech Packages to PyPi"

View File

@ -26,6 +26,7 @@ then:
{ $eval: build.routes }
payload:
upload_targets: { $eval: build.upload_targets }
artifacts_deps:
python:
$map: { $eval: build.artifacts_deps.python }

View File

@ -43,7 +43,7 @@ then:
PIP_DEFAULT_TIMEOUT: "60"
PIP_EXTRA_INDEX_URL: "https://lissyx.github.io/deepspeech-python-wheels/"
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Debian Stretch
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-rc2-11-gbea86c1"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-10-ge232881"
command:
- "/bin/bash"

View File

@ -41,7 +41,7 @@ then:
DEEPSPEECH_TEST_MODEL: https://queue.taskcluster.net/v1/task/${training}/artifacts/public/output_graph.pb
DEEPSPEECH_PROD_MODEL: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pb
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-rc2-11-gbea86c1"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-10-ge232881"
command:
- - "/bin/bash"

View File

@ -45,7 +45,7 @@ then:
DEEPSPEECH_PROD_MODEL_MMAP: https://github.com/reuben/DeepSpeech/releases/download/v0.2.0-prod-ctcdecode/output_graph.pbmm
DECODER_ARTIFACTS_ROOT: https://queue.taskcluster.net/v1/task/${linux_amd64_ctc}/artifacts/public
PIP_DEFAULT_TIMEOUT: "60"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-rc2-11-gbea86c1"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-10-ge232881"
command:
- "/bin/bash"

View File

@ -4,7 +4,7 @@ build:
- "test-training_upstream-linux-amd64-py27mu-opt"
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-lite_benchmark_model-ds-tests.sh"
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.cpu/artifacts/public/lite_benchmark_model"
benchmark_model_bin: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.cpu/artifacts/public/lite_benchmark_model"
metadata:
name: "DeepSpeech Linux AMD64 CPU TF Lite benchmark_model"
description: "Testing DeepSpeech TF Lite benchmark_model for Linux/AMD64, CPU only, optimized version"

View File

@ -43,7 +43,7 @@ then:
PIP_DEFAULT_TIMEOUT: "60"
PIP_EXTRA_INDEX_URL: "https://www.piwheels.org/simple"
EXTRA_PYTHON_CONFIGURE_OPTS: "--with-fpectl" # Required by Raspbian Stretch / PiWheels
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-rc2-11-gbea86c1"
EXPECTED_TENSORFLOW_VERSION: "TensorFlow: v1.12.0-10-ge232881"
command:
- "/bin/bash"

View File

@ -7,7 +7,7 @@ build:
apt-get -qq -y install ${python.packages_trusty.apt}
args:
tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/tc-train-tests.sh 2.7.14:mu"
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.bea86c1e884730cf7f8615eb24d31872c198c766.cpu/artifacts/public/convert_graphdef_memmapped_format"
convert_graphdef: "https://index.taskcluster.net/v1/task/project.deepspeech.tensorflow.pip.r1.12.e232881c5a18204d946d4feba3c5aaa2d2c7dba0.cpu/artifacts/public/convert_graphdef_memmapped_format"
metadata:
name: "DeepSpeech Linux AMD64 CPU upstream training Py2.7 mu"
description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 using upstream TensorFlow Python 2.7 mu, CPU only, optimized version"

View File

@ -151,12 +151,12 @@ assert_correct_multi_ldc93s1()
assert_correct_ldc93s1_prodmodel()
{
assert_correct_inference "$1" "she had out in greasy wash water all year"
assert_correct_inference "$1" "she had a due in greasy wash water year"
}
assert_correct_ldc93s1_prodmodel_stereo_44k()
{
assert_correct_inference "$1" "she had out and greasy wash water all year"
assert_correct_inference "$1" "she had a due in greasy wash water year"
}
assert_correct_warning_upsampling()
@ -413,6 +413,21 @@ do_deepspeech_binary_build()
deepspeech
}
do_deepspeech_ndk_build()
{
arch_abi=$1
cd ${DS_DSDIR}/native_client/
${ANDROID_NDK_HOME}/ndk-build \
APP_PLATFORM=android-21 \
APP_BUILD_SCRIPT=$(pwd)/Android.mk \
NDK_PROJECT_PATH=$(pwd) \
APP_STL=c++_shared \
TFDIR=${DS_TFDIR} \
TARGET_ARCH_ABI=${arch_abi}
}
# Hack to extract Ubuntu's 16.04 libssl 1.0.2 packages and use them during the
# local build of Python.
#
@ -437,8 +452,8 @@ maybe_ssl102_py37()
mkdir -p ${PY37_OPENSSL_DIR}
wget -P ${TASKCLUSTER_TMP_DIR} \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.13_amd64.deb \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.13_amd64.deb
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.14_amd64.deb \
http://${TASKCLUSTER_WORKER_GROUP}.ec2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.14_amd64.deb
for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do
dpkg -x ${deb} ${PY37_OPENSSL_DIR}
@ -636,22 +651,42 @@ package_native_client()
echo "Please specify artifact name."
fi;
if [ -f "${tensorflow_dir}/bazel-bin/native_client/libdeepspeech_model.so" ]; then
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech_model.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
else
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
fi;
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
}
package_native_client_ndk()
{
deepspeech_dir=${DS_DSDIR}
artifacts_dir=${TASKCLUSTER_ARTIFACTS}
artifact_name=$1
arch_abi=$2
if [ ! -d ${deepspeech_dir} -o ! -d ${artifacts_dir} ]; then
echo "Missing directory. Please check:"
echo "deepspeech_dir=${deepspeech_dir}"
echo "artifacts_dir=${artifacts_dir}"
exit 1
fi;
if [ -z "${artifact_name}" ]; then
echo "Please specify artifact name."
fi;
if [ -z "${arch_abi}" ]; then
echo "Please specify arch abi."
fi;
tar -cf - \
-C ${deepspeech_dir}/native_client/libs/${arch_abi}/ deepspeech \
-C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libdeepspeech.so \
-C ${deepspeech_dir}/native_client/libs/${arch_abi}/ libc++_shared.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
}

View File

@ -15,10 +15,7 @@ def audiofile_to_input_vector(audio_filename, numcep, numcontext):
fs, audio = wav.read(audio_filename)
# Get mfcc coefficients
features = mfcc(audio, samplerate=fs, numcep=numcep)
# We only keep every second feature (BiRNN stride = 2)
features = features[::2]
features = mfcc(audio, samplerate=fs, numcep=numcep, winlen=0.032, winstep=0.02, winfunc=np.hamming)
# Add empty initial and final contexts
empty_context = np.zeros((numcontext, numcep), dtype=features.dtype)

View File

@ -29,14 +29,18 @@ else:
print("### Reading in the following transcript files: ###")
print(inFiles)
allText = set()
for inFile in (inFiles):
with open(inFile, 'r') as csvFile:
reader = csv.reader(csvFile)
for row in reader:
allText |= set(str(row[2]))
csvFile.close()
try:
for row in reader:
allText |= set(str(row[2]))
except IndexError as ie:
print("Your input file",inFile,"is not formatted properly. Check if there are 3 columns with the 3rd containing the transcript")
sys.exit(-1)
finally:
csvFile.close()
print("### The following unique characters were found in your transcripts: ###")
print(list(allText))

View File

@ -111,7 +111,7 @@ def create_flags():
# Initialization
tf.app.flags.DEFINE_integer ('random_seed', 4567, 'default random seed that is used to initialize variables')
tf.app.flags.DEFINE_integer ('random_seed', 4568, 'default random seed that is used to initialize variables')
# Early Stopping