diff --git a/native_client/README.md b/native_client/README.md index 5bef4ef1..6748cd40 100644 --- a/native_client/README.md +++ b/native_client/README.md @@ -90,6 +90,62 @@ cd ../DeepSpeech/native_client make deepspeech ``` +### Cross-building for RPi3 ARMv7 / LePotato ARM64 + +We do support cross-compilation ; please refer to our `mozilla/tensorflow` fork, where we define the following `--config` flags: + - `--config=rpi3` and `--config=rpi3_opt` for Raspbian / ARMv7 + - `--config=rpi3-armv8` and `--config=rpi3-armv8_opt` for ARMBian / ARM64 + +So your command line for RPi3 / ARMv7 should look like: +``` +bazel build --config=monolithic --config=rpi3 --config=rpi3_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie +``` + +And your command line for LePotato / ARM64 should look like: +``` +bazel build --config=monolithic --config=rpi3-armv8 --config=rpi3-armv8_opt -c opt --copt=-O3 --copt=-fvisibility=hidden //native_client:libdeepspeech.so //native_client:generate_trie +``` + +While we test only on RPi3 Raspbian Stretch / LePotato ARMBian stretch, anything compatible with `armv7-a cortex-a53` / `armv8-a cortex-a53` should be fine. + +The `deepspeech` binary can also be cross-built, with `TARGET=rpi3` or `TARGET=rpi3-armv8`. This might require you to setup a system tree using the tool `multistrap` and the multitrap configuration files: `native_client/multistrap_armbian64_stretch.conf` and `native_client/multistrap_raspbian_stretch.conf`. +The path of the system tree can be overridden from the default values defined in `definitions.mk` through `RASPBIAN` make variable. + +``` +cd ../DeepSpeech/native_client +make TARGET= deepspeech +``` + +### Android devices + +We have preliminary support for Android relying on TensorFlow Lite, with upcoming Java / JNI bindinds. For more details on how to experiment with those, please refer to `native_client/java/README.md`. + +Please refer to TensorFlow documentation on how to setup the environment to build for Android (SDK and NDK required). + +You can build the `libdeepspeech.so` using (ARMv7): + +``` +bazel build --config=monolithic --config=android --config=android_arm --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so +``` + +Or (ARM64): +``` +bazel build --config=monolithic --config=android --config=android_arm64 --action_env ANDROID_NDK_API_LEVEL=21 --cxxopt=-std=c++11 --copt=-D_GLIBCXX_USE_C99 //native_client:libdeepspeech.so +``` + +Building the `deepspeech` binary will happen through `ndk-build` (ARMv7): + +``` +cd ../DeepSpeech/native_client +$ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflow/ TARGET_ARCH_ABI=armeabi-v7a +``` + +And (ARM64): +``` +cd ../DeepSpeech/native_client +$ANDROID_NDK_HOME/ndk-build APP_PLATFORM=android-21 APP_BUILD_SCRIPT=$(pwd)/Android.mk NDK_PROJECT_PATH=$(pwd) APP_STL=c++_shared TFDIR=$(pwd)/../../tensorflowx/ TARGET_ARCH_ABI=arm64-v8a +``` + ## Installing After building, the library files and binary can optionally be installed to a system path for ease of development. This is also a required step for bindings generation. diff --git a/native_client/java/.gitignore b/native_client/java/.gitignore new file mode 100644 index 00000000..fd45b12f --- /dev/null +++ b/native_client/java/.gitignore @@ -0,0 +1,11 @@ +*.iml +.gradle +/local.properties +/.idea/caches/build_file_checksums.ser +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +.DS_Store +/build +/captures +.externalNativeBuild diff --git a/native_client/java/.idea/codeStyles/Project.xml b/native_client/java/.idea/codeStyles/Project.xml new file mode 100644 index 00000000..30aa626c --- /dev/null +++ b/native_client/java/.idea/codeStyles/Project.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/native_client/java/.idea/gradle.xml b/native_client/java/.idea/gradle.xml new file mode 100644 index 00000000..7ac24c77 --- /dev/null +++ b/native_client/java/.idea/gradle.xml @@ -0,0 +1,18 @@ + + + + + + \ No newline at end of file diff --git a/native_client/java/.idea/misc.xml b/native_client/java/.idea/misc.xml new file mode 100644 index 00000000..b0c7b20c --- /dev/null +++ b/native_client/java/.idea/misc.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/native_client/java/.idea/runConfigurations.xml b/native_client/java/.idea/runConfigurations.xml new file mode 100644 index 00000000..7f68460d --- /dev/null +++ b/native_client/java/.idea/runConfigurations.xml @@ -0,0 +1,12 @@ + + + + + + \ No newline at end of file diff --git a/native_client/java/Makefile b/native_client/java/Makefile new file mode 100644 index 00000000..e84895ab --- /dev/null +++ b/native_client/java/Makefile @@ -0,0 +1,19 @@ +.PHONY: clean apk-clean + +include ../definitions.mk + +LIBDEEPSPEECH_SO ?= ${TFDIR}/bazel-bin/native_client/libdeepspeech.so + +all: apk + +clean: apk-clean + rm -rf *.java jni/deepspeech_wrap.cpp + +apk-clean: + ./gradlew clean + +apk: apk-clean bindings + LIBDEEPSPEECH_SO=$(LIBDEEPSPEECH_SO) ./gradlew build + +bindings: clean + swig -c++ -java -package deepspeech.mozilla.org.deepspeech -outdir app/src/main/java/deepspeech/mozilla/org/deepspeech/ -o jni/deepspeech_wrap.cpp jni/deepspeech.i diff --git a/native_client/java/README.md b/native_client/java/README.md new file mode 100644 index 00000000..6b743f5b --- /dev/null +++ b/native_client/java/README.md @@ -0,0 +1,32 @@ +DeepSpeech Java / Android bindings +================================== + +This is still preliminary work. Please refer to `native_client/README.md` for +building `libdeepspeech.so` and `deepspeech` binary for Android on ARMv7 and +ARM64 arch. + +Running `deepspeech` via adb +============================ +You should use `adb push` to send data to device, please refer to Android +documentation on how to use that. + +Please push DeepSpeech data to `/sdcard/deepspeech/`, including: + - `output_graph.tflite` which is the TF Lite model + - `alphabet.txt` + - `lm.binary` and `trie` files, if you want to use the language model ; please + be aware that too big language model will make the device run out of memory + +Then, push binaries from `native_client.tar.xz` to `/data/local/tmp/ds`: + - `deepspeech` + - `libdeepspeech.so` + - `libc++_shared.so` + +You should then be able to run as usual, using a shell from `adb shell`: +``` +user@device$ cd /data/local/tmp/ds/ +user@device$ LD_LIBRARY_PATH=$(pwd)/ ./deepspeech [...] +``` + +Please note that Android linker does not support `rpath` so you have to set +`LD_LIBRARY_PATH`. Properly wrapped / packaged bindings does embed the library +at a place the linker knows where to search, so Android apps will be fine. diff --git a/native_client/java/app/.gitignore b/native_client/java/app/.gitignore new file mode 100644 index 00000000..796b96d1 --- /dev/null +++ b/native_client/java/app/.gitignore @@ -0,0 +1 @@ +/build diff --git a/native_client/java/app/CMakeLists.txt b/native_client/java/app/CMakeLists.txt new file mode 100644 index 00000000..e4a51f0e --- /dev/null +++ b/native_client/java/app/CMakeLists.txt @@ -0,0 +1,58 @@ +# For more information about using CMake with Android Studio, read the +# documentation: https://d.android.com/studio/projects/add-native-code.html + +# Sets the minimum version of CMake required to build the native library. + +cmake_minimum_required(VERSION 3.4.1) + +# Creates and names a library, sets it as either STATIC +# or SHARED, and provides the relative paths to its source code. +# You can define multiple libraries, and CMake builds them for you. +# Gradle automatically packages shared libraries with your APK. + +add_library( # Sets the name of the library. + deepspeech-jni + + # Sets the library as a shared library. + SHARED + + # Provides a relative path to your source file(s). + ../jni/deepspeech_wrap.cpp ) + +add_library( deepspeech-lib + SHARED + IMPORTED ) + +set_target_properties( deepspeech-lib + PROPERTIES IMPORTED_LOCATION $ENV{LIBDEEPSPEECH_SO} ) + +add_custom_command( TARGET deepspeech-jni POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + $ENV{LIBDEEPSPEECH_SO} + ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libdeepspeech.so ) + +# Searches for a specified prebuilt library and stores the path as a +# variable. Because CMake includes system libraries in the search path by +# default, you only need to specify the name of the public NDK library +# you want to add. CMake verifies that the library exists before +# completing its build. + +find_library( # Sets the name of the path variable. + log-lib + + # Specifies the name of the NDK library that + # you want CMake to locate. + log ) + +# Specifies libraries CMake should link to your target library. You +# can link multiple libraries, such as libraries you define in this +# build script, prebuilt third-party libraries, or system libraries. + +target_link_libraries( # Specifies the target library. + deepspeech-jni + + deepspeech-lib + + # Links the target library to the log library + # included in the NDK. + ${log-lib} ) diff --git a/native_client/java/app/build.gradle b/native_client/java/app/build.gradle new file mode 100644 index 00000000..2e8fa66c --- /dev/null +++ b/native_client/java/app/build.gradle @@ -0,0 +1,41 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 27 + defaultConfig { + applicationId "deepspeech.mozilla.org.deepspeech" + minSdkVersion 21 + targetSdkVersion 27 + versionCode 1 + versionName "1.0" + testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" + ndk { + abiFilters 'armeabi-v7a', 'arm64-v8a' + } + externalNativeBuild { + cmake { + cppFlags "" + } + } + } + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + } + } + externalNativeBuild { + cmake { + path "CMakeLists.txt" + } + } +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation 'com.android.support:appcompat-v7:27.1.1' + implementation 'com.android.support.constraint:constraint-layout:1.1.3' + testImplementation 'junit:junit:4.12' + androidTestImplementation 'com.android.support.test:runner:1.0.2' + androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2' +} diff --git a/native_client/java/app/proguard-rules.pro b/native_client/java/app/proguard-rules.pro new file mode 100644 index 00000000..f1b42451 --- /dev/null +++ b/native_client/java/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/native_client/java/app/src/androidTest/java/deepspeech/mozilla/org/deepspeech/ExampleInstrumentedTest.java b/native_client/java/app/src/androidTest/java/deepspeech/mozilla/org/deepspeech/ExampleInstrumentedTest.java new file mode 100644 index 00000000..729e7b79 --- /dev/null +++ b/native_client/java/app/src/androidTest/java/deepspeech/mozilla/org/deepspeech/ExampleInstrumentedTest.java @@ -0,0 +1,26 @@ +package deepspeech.mozilla.org.deepspeech; + +import android.content.Context; +import android.support.test.InstrumentationRegistry; +import android.support.test.runner.AndroidJUnit4; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import static org.junit.Assert.*; + +/** + * Instrumented test, which will execute on an Android device. + * + * @see Testing documentation + */ +@RunWith(AndroidJUnit4.class) +public class ExampleInstrumentedTest { + @Test + public void useAppContext() { + // Context of the app under test. + Context appContext = InstrumentationRegistry.getTargetContext(); + + assertEquals("deepspeech.mozilla.org.deepspeech", appContext.getPackageName()); + } +} diff --git a/native_client/java/app/src/main/AndroidManifest.xml b/native_client/java/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000..b55fcb1e --- /dev/null +++ b/native_client/java/app/src/main/AndroidManifest.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + diff --git a/native_client/java/app/src/main/cpp/native-lib.cpp b/native_client/java/app/src/main/cpp/native-lib.cpp new file mode 100644 index 00000000..6e815b4b --- /dev/null +++ b/native_client/java/app/src/main/cpp/native-lib.cpp @@ -0,0 +1,10 @@ +#include +#include + +extern "C" JNIEXPORT jstring JNICALL +Java_deepspeech_mozilla_org_deepspeech_DeepSpeechActivity_stringFromJNI( + JNIEnv* env, + jobject /* this */) { + std::string hello = "Hello from C++"; + return env->NewStringUTF(hello.c_str()); +} diff --git a/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/DeepSpeechActivity.java b/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/DeepSpeechActivity.java new file mode 100644 index 00000000..0635f604 --- /dev/null +++ b/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/DeepSpeechActivity.java @@ -0,0 +1,177 @@ +package deepspeech.mozilla.org.deepspeech; + +import android.support.v7.app.AppCompatActivity; +import android.os.Bundle; + +import android.view.View; +import android.widget.TextView; +import android.widget.EditText; +import android.widget.Button; + +import android.media.MediaPlayer; + +import java.io.RandomAccessFile; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.ByteBuffer; + +public class DeepSpeechActivity extends AppCompatActivity { + + // Used to load the 'native-lib' library on application startup. + static { + System.loadLibrary("deepspeech-jni"); + System.loadLibrary("deepspeech"); + } + + Model _m = null; + + EditText _tfliteModel; + EditText _alphabet; + EditText _audioFile; + + TextView _decodedString; + TextView _tfliteStatus; + + Button _startInference; + + final int N_CEP = 26; + final int N_CONTEXT = 9; + final int BEAM_WIDTH = 50; + final float LM_WEIGHT = 1.50f; + final float VALID_WORD_COUNT_WEIGHT = 2.10f; + + private char readLEChar(RandomAccessFile f) throws IOException { + byte b1 = f.readByte(); + byte b2 = f.readByte(); + return (char)((b2 << 8) | b1); + } + + private int readLEInt(RandomAccessFile f) throws IOException { + byte b1 = f.readByte(); + byte b2 = f.readByte(); + byte b3 = f.readByte(); + byte b4 = f.readByte(); + return (int)((b1 & 0xFF) | (b2 & 0xFF) << 8 | (b3 & 0xFF) << 16 | (b4 & 0xFF) << 24); + } + + private void newModel(String tfliteModel, String alphabet) { + this._tfliteStatus.setText("Creating model"); + if (this._m == null) { + this._m = new Model(tfliteModel, N_CEP, N_CONTEXT, alphabet, BEAM_WIDTH); + } + } + + private void doInference(String audioFile) { + long inferenceExecTime = 0; + + this._startInference.setEnabled(false); + + this.newModel(this._tfliteModel.getText().toString(), this._alphabet.getText().toString()); + + this._tfliteStatus.setText("Extracting audio features ..."); + + try { + RandomAccessFile wave = new RandomAccessFile(audioFile, "r"); + + wave.seek(20); char audioFormat = this.readLEChar(wave); + assert (audioFormat == 1); // 1 is PCM + // tv_audioFormat.setText("audioFormat=" + (audioFormat == 1 ? "PCM" : "!PCM")); + + wave.seek(22); char numChannels = this.readLEChar(wave); + assert (numChannels == 1); // MONO + // tv_numChannels.setText("numChannels=" + (numChannels == 1 ? "MONO" : "!MONO")); + + wave.seek(24); int sampleRate = this.readLEInt(wave); + assert (sampleRate == 16000); // 16000 Hz + // tv_sampleRate.setText("sampleRate=" + (sampleRate == 16000 ? "16kHz" : "!16kHz")); + + wave.seek(34); char bitsPerSample = this.readLEChar(wave); + assert (bitsPerSample == 16); // 16 bits per sample + // tv_bitsPerSample.setText("bitsPerSample=" + (bitsPerSample == 16 ? "16-bits" : "!16-bits" )); + + wave.seek(40); int bufferSize = this.readLEInt(wave); + assert (bufferSize > 0); + // tv_bufferSize.setText("bufferSize=" + bufferSize); + + wave.seek(44); + byte[] bytes = new byte[bufferSize]; + wave.readFully(bytes); + + short[] shorts = new short[bytes.length/2]; + // to turn bytes to shorts as either big endian or little endian. + ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(shorts); + + this._tfliteStatus.setText("Running inference ..."); + + long inferenceStartTime = System.currentTimeMillis(); + + String decoded = this._m.stt(shorts, shorts.length, sampleRate); + + inferenceExecTime = System.currentTimeMillis() - inferenceStartTime; + + this._decodedString.setText(decoded); + + } catch (FileNotFoundException ex) { + + } catch (IOException ex) { + + } finally { + + } + + this._tfliteStatus.setText("Finished! Took " + inferenceExecTime + "ms"); + + this._startInference.setEnabled(true); + } + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_deep_speech); + + this._decodedString = (TextView) findViewById(R.id.decodedString); + this._tfliteStatus = (TextView) findViewById(R.id.tfliteStatus); + + this._tfliteModel = (EditText) findViewById(R.id.tfliteModel); + this._alphabet = (EditText) findViewById(R.id.alphabet); + this._audioFile = (EditText) findViewById(R.id.audioFile); + + this._tfliteModel.setText("/sdcard/deepspeech/output_graph.tflite"); + this._tfliteStatus.setText("Ready, waiting ..."); + + this._alphabet.setText("/sdcard/deepspeech/alphabet.txt"); + this._audioFile.setText("/sdcard/deepspeech/audio.wav"); + + this._startInference = (Button) findViewById(R.id.btnStartInference); + } + + public void onClick_inference_handler(View v) { + this.playAudioFile(); + this.doInference(this._audioFile.getText().toString()); + } + + public void playAudioFile() { + try { + MediaPlayer mediaPlayer = new MediaPlayer(); + mediaPlayer.setDataSource(this._audioFile.getText().toString()); + mediaPlayer.prepare(); + mediaPlayer.start(); + } catch (IOException ex) { + + } + } + + public void onClick_audio_handler(View v) { + this.playAudioFile(); + } + + @Override + protected void onDestroy() { + super.onDestroy(); + + if (this._m != null) { + this._m.destroyModel(); + } + } +} diff --git a/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/Model.java b/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/Model.java new file mode 100644 index 00000000..303f03d7 --- /dev/null +++ b/native_client/java/app/src/main/java/deepspeech/mozilla/org/deepspeech/Model.java @@ -0,0 +1,44 @@ +package deepspeech.mozilla.org.deepspeech; + +public class Model { + + // FIXME: We should have something better than those SWIGTYPE_* + SWIGTYPE_p_p_ModelState _mspp; + SWIGTYPE_p_ModelState _msp; + + public Model(String modelPath, int n_cep, int n_context, String alphabetPath, int beam_width) { + this._mspp = impl.new_modelstatep(); + impl.CreateModel(modelPath, n_cep, n_context, alphabetPath, beam_width, this._mspp); + this._msp = impl.modelstatep_value(this._mspp); + } + + public void destroyModel() { + impl.DestroyModel(this._msp); + } + + public void enableDecoderWihLM(String alphabet, String lm, String trie, float lm_weight, float valid_word_count_weight) { + impl.EnableDecoderWithLM(this._msp, alphabet, lm, trie, lm_weight, valid_word_count_weight); + } + + public String stt(short[] buffer, int buffer_size, int sample_rate) { + return impl.SpeechToText(this._msp, buffer, buffer_size, sample_rate); + } + + public SWIGTYPE_p_StreamingState setupStream(int prealloc_frames, int sample_rate) { + SWIGTYPE_p_p_StreamingState ssp = impl.new_streamingstatep(); + impl.SetupStream(this._msp, prealloc_frames, sample_rate, ssp); + return impl.streamingstatep_value(ssp); + } + + public void feedAudioContent(SWIGTYPE_p_StreamingState ctx, short[] buffer, int buffer_size) { + impl.FeedAudioContent(ctx, buffer, buffer_size); + } + + public String intermediateDecode(SWIGTYPE_p_StreamingState ctx) { + return impl.IntermediateDecode(ctx); + } + + public String finishStream(SWIGTYPE_p_StreamingState ctx) { + return impl.FinishStream(ctx); + } +} diff --git a/native_client/java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/native_client/java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 00000000..1f6bb290 --- /dev/null +++ b/native_client/java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + diff --git a/native_client/java/app/src/main/res/drawable/ic_launcher_background.xml b/native_client/java/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 00000000..0d025f9b --- /dev/null +++ b/native_client/java/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/native_client/java/app/src/main/res/layout/activity_deep_speech.xml b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml new file mode 100644 index 00000000..82fb4fe3 --- /dev/null +++ b/native_client/java/app/src/main/res/layout/activity_deep_speech.xml @@ -0,0 +1,192 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +