Android demo app for speech recognition
PiperOrigin-RevId: 165714459
This commit is contained in:
parent
a6729325a3
commit
7d01f89cc3
10
WORKSPACE
10
WORKSPACE
@ -80,3 +80,13 @@ new_http_archive(
|
||||
"http://download.tensorflow.org/models/stylize_v1.zip",
|
||||
],
|
||||
)
|
||||
|
||||
new_http_archive(
|
||||
name = "speech_commands",
|
||||
build_file = "models.BUILD",
|
||||
sha256 = "c3ec4fea3158eb111f1d932336351edfe8bd515bb6e87aad4f25dbad0a600d0c",
|
||||
urls = [
|
||||
"http://storage.googleapis.com/download.tensorflow.org/models/speech_commands_v0.01.zip",
|
||||
"http://download.tensorflow.org/models/speech_commands_v0.01.zip",
|
||||
],
|
||||
)
|
||||
|
||||
@ -73,8 +73,9 @@ HOST_INCLUDES := \
|
||||
-I. \
|
||||
-I$(MAKEFILE_DIR)/downloads/ \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(MAKEFILE_DIR)/downloads/fft2d \
|
||||
-I$(HOST_GENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
HOST_INCLUDES += -I$(MAKEFILE_DIR)/gen/protobuf-host/include
|
||||
@ -156,6 +157,7 @@ INCLUDES := \
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(MAKEFILE_DIR)/downloads/fft2d \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
ifeq ($(HAS_GEN_HOST_PROTOC),true)
|
||||
@ -237,6 +239,7 @@ ifeq ($(TARGET),ANDROID)
|
||||
$(error "NDK_ROOT is not defined.")
|
||||
endif
|
||||
CXX := $(CC_PREFIX) $(NDK_ROOT)/toolchains/arm-linux-androideabi-4.9/prebuilt/$(OS_PATH)-x86_64/bin/arm-linux-androideabi-g++
|
||||
CC := $(CC_PREFIX) $(NDK_ROOT)/toolchains/arm-linux-androideabi-4.9/prebuilt/$(OS_PATH)-x86_64/bin/arm-linux-androideabi-gcc
|
||||
CXXFLAGS +=\
|
||||
--sysroot $(NDK_ROOT)/platforms/android-21/arch-arm \
|
||||
-Wno-narrowing \
|
||||
@ -244,7 +247,6 @@ ifeq ($(TARGET),ANDROID)
|
||||
-mfloat-abi=softfp \
|
||||
-mfpu=neon \
|
||||
-fPIE
|
||||
|
||||
INCLUDES = \
|
||||
-I$(NDK_ROOT)/sources/android/support/include \
|
||||
-I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/include \
|
||||
@ -254,6 +256,7 @@ ifeq ($(TARGET),ANDROID)
|
||||
-I$(MAKEFILE_DIR)/downloads/eigen \
|
||||
-I$(MAKEFILE_DIR)/downloads/gemmlowp \
|
||||
-I$(MAKEFILE_DIR)/downloads/nsync/public \
|
||||
-I$(MAKEFILE_DIR)/downloads/fft2d \
|
||||
-I$(MAKEFILE_DIR)/gen/protobuf/include \
|
||||
-I$(PROTOGENDIR) \
|
||||
-I$(PBTGENDIR)
|
||||
@ -507,6 +510,7 @@ $(wildcard tensorflow/core/grappler/clusters/single_machine.*)
|
||||
TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS))
|
||||
# Add in any extra files that don't fit the patterns easily
|
||||
TF_CC_SRCS += tensorflow/core/platform/default/gpu_tracer.cc
|
||||
TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c
|
||||
# Also include the op and kernel definitions.
|
||||
TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt)
|
||||
PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt)
|
||||
@ -529,7 +533,8 @@ tensorflow/core/kernels/hexagon/hexagon_remote_fused_graph_executor_build.cc
|
||||
endif
|
||||
|
||||
# File names of the intermediate files target compilation generates.
|
||||
TF_CC_OBJS := $(addprefix $(OBJDIR), $(TF_CC_SRCS:.cc=.o))
|
||||
TF_CC_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(TF_CC_SRCS))))
|
||||
PBT_GEN_FILES := $(addprefix $(PBTGENDIR), $(PBT_CC_SRCS))
|
||||
PBT_OBJS := $(addprefix $(OBJDIR), $(PBT_CC_SRCS:.cc=.o))
|
||||
PROTO_CC_SRCS := $(addprefix $(PROTOGENDIR), $(PROTO_SRCS:.proto=.pb.cc))
|
||||
@ -567,6 +572,14 @@ $(OBJDIR)%.o: %.cc | $(PBT_GEN_FILES)
|
||||
$(CXX) $(CXXFLAGS) $(DEPFLAGS) $(INCLUDES) -c $< -o $@
|
||||
@mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d
|
||||
|
||||
# Matches on plain C files.
|
||||
$(OBJDIR)%.o: %.c
|
||||
@mkdir -p $(dir $@)
|
||||
@mkdir -p $(dir $(DEPDIR)$*)
|
||||
$(CXX) $(patsubst --std=c++11,--std=c99, $(CXXFLAGS)) -x c $(DEPFLAGS) \
|
||||
$(INCLUDES) -c $< -o $@
|
||||
@mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d
|
||||
|
||||
# Compiles C++ source files that have been generated by protoc.
|
||||
$(OBJDIR)%.pb.o: $(PROTOGENDIR)%.pb.cc
|
||||
@mkdir -p $(dir $@)
|
||||
|
||||
@ -25,6 +25,7 @@ GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.g
|
||||
NSYNC_URL="$(grep -o 'http.*github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
PROTOBUF_URL="$(grep -o 'http.*github.com/google/protobuf/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
RE2_URL="$(grep -o 'http.*github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
FFT2D_URL="$(grep -o 'http.*fft\.tgz' "${BZL_FILE_PATH}" | grep -v bazel-mirror | head -n1)"
|
||||
|
||||
# TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
|
||||
# so work around it by patching the source.
|
||||
@ -60,6 +61,7 @@ download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
|
||||
download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
|
||||
download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
|
||||
download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
|
||||
download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
|
||||
|
||||
replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
|
||||
"${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
|
||||
|
||||
@ -38,6 +38,8 @@ tensorflow/core/kernels/stack_ops.cc
|
||||
tensorflow/core/kernels/split_op.cc
|
||||
tensorflow/core/kernels/split_v_op.cc
|
||||
tensorflow/core/kernels/split_lib_cpu.cc
|
||||
tensorflow/core/kernels/spectrogram_op.cc
|
||||
tensorflow/core/kernels/spectrogram.cc
|
||||
tensorflow/core/kernels/sparse_to_dense_op.cc
|
||||
tensorflow/core/kernels/sparse_matmul_op.cc
|
||||
tensorflow/core/kernels/softsign_op.cc
|
||||
@ -100,6 +102,10 @@ tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc
|
||||
tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc
|
||||
tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc
|
||||
tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc
|
||||
tensorflow/core/kernels/mfcc_op.cc
|
||||
tensorflow/core/kernels/mfcc_mel_filterbank.cc
|
||||
tensorflow/core/kernels/mfcc_dct.cc
|
||||
tensorflow/core/kernels/mfcc.cc
|
||||
tensorflow/core/kernels/maxpooling_op.cc
|
||||
tensorflow/core/kernels/matmul_op.cc
|
||||
tensorflow/core/kernels/lrn_op.cc
|
||||
@ -117,6 +123,7 @@ tensorflow/core/kernels/fill_functor.cc
|
||||
tensorflow/core/kernels/fifo_queue.cc
|
||||
tensorflow/core/kernels/fake_quant_ops.cc
|
||||
tensorflow/core/kernels/example_parsing_ops.cc
|
||||
tensorflow/core/kernels/encode_wav_op.cc
|
||||
tensorflow/core/kernels/dynamic_stitch_op.cc
|
||||
tensorflow/core/kernels/dynamic_partition_op.cc
|
||||
tensorflow/core/kernels/decode_bmp_op.cc
|
||||
@ -124,6 +131,7 @@ tensorflow/core/kernels/depthtospace_op.cc
|
||||
tensorflow/core/kernels/spacetodepth_op.cc
|
||||
tensorflow/core/kernels/dense_update_ops.cc
|
||||
tensorflow/core/kernels/deep_conv2d.cc
|
||||
tensorflow/core/kernels/decode_wav_op.cc
|
||||
tensorflow/core/kernels/xsmm_conv2d.cc
|
||||
tensorflow/core/kernels/cwise_ops_common.cc
|
||||
tensorflow/core/kernels/cwise_op_tanh.cc
|
||||
|
||||
@ -981,6 +981,8 @@ cc_library(
|
||||
deps = [
|
||||
":protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"//third_party/fft2d:fft2d_headers",
|
||||
"@fft2d//:fft2d",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
"@nsync//:nsync_cpp",
|
||||
],
|
||||
|
||||
@ -4322,6 +4322,9 @@ filegroup(
|
||||
"gemm_functors.h",
|
||||
"image_resizer_state.h",
|
||||
"maxpooling_op.h",
|
||||
"mfcc.h",
|
||||
"mfcc_dct.h",
|
||||
"mfcc_mel_filterbank.h",
|
||||
"mirror_pad_op.h",
|
||||
"mirror_pad_op_cpu_impl.h",
|
||||
"pad_op.h",
|
||||
@ -4338,6 +4341,7 @@ filegroup(
|
||||
"softsign_op.h",
|
||||
"spacetobatch_functor.h",
|
||||
"spacetodepth_op.h",
|
||||
"spectrogram.h",
|
||||
"tensor_array.h",
|
||||
"tile_functor.h",
|
||||
"tile_ops_cpu_impl.h",
|
||||
@ -4411,10 +4415,12 @@ filegroup(
|
||||
"cwise_op_squared_difference.cc",
|
||||
"cwise_op_sub.cc",
|
||||
"cwise_op_tanh.cc",
|
||||
"decode_wav_op.cc",
|
||||
"deep_conv2d.cc",
|
||||
"deep_conv2d.h",
|
||||
"depthwise_conv_op.cc",
|
||||
"dynamic_partition_op.cc",
|
||||
"encode_wav_op.cc",
|
||||
"fake_quant_ops.cc",
|
||||
"fifo_queue.cc",
|
||||
"fused_batch_norm_op.cc",
|
||||
@ -4443,6 +4449,10 @@ filegroup(
|
||||
"logging_ops.cc",
|
||||
"lrn_op.cc",
|
||||
"maxpooling_op.cc",
|
||||
"mfcc.cc",
|
||||
"mfcc_dct.cc",
|
||||
"mfcc_mel_filterbank.cc",
|
||||
"mfcc_op.cc",
|
||||
"mirror_pad_op.cc",
|
||||
"mirror_pad_op_cpu_impl_1.cc",
|
||||
"mirror_pad_op_cpu_impl_2.cc",
|
||||
@ -4478,6 +4488,8 @@ filegroup(
|
||||
"spacetobatch_op.cc",
|
||||
"spacetodepth_op.cc",
|
||||
"sparse_to_dense_op.cc",
|
||||
"spectrogram.cc",
|
||||
"spectrogram_op.cc",
|
||||
"stack_ops.cc",
|
||||
"string_join_op.cc",
|
||||
"summary_op.cc",
|
||||
@ -4614,6 +4626,8 @@ cc_library(
|
||||
"//tensorflow/core:android_tensorflow_lib_lite",
|
||||
"//tensorflow/core:protos_cc",
|
||||
"//third_party/eigen3",
|
||||
"//third_party/fft2d:fft2d_headers",
|
||||
"@fft2d//:fft2d",
|
||||
"@gemmlowp//:gemmlowp",
|
||||
],
|
||||
alwayslink = 1,
|
||||
|
||||
@ -214,6 +214,41 @@ of the other .wav files in that same folder to see how well it does.
|
||||
The scores are between zero and one, and higher values mean the model is more
|
||||
confident in its prediction.
|
||||
|
||||
## Running the Model in an Android App
|
||||
|
||||
The easiest way to see how this model works in a real application is to download
|
||||
[the prebuilt Android demo
|
||||
applications](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#prebuilt-components)
|
||||
and install them on your phone. You'll see 'TF Speech' appear in your app list,
|
||||
and opening it will show you the same list of action words we've just trained
|
||||
our model on, starting with "Yes" and "No". Once you've given the app permission
|
||||
to use the microphone, you should be able to try saying those words and see them
|
||||
highlighted in the UI when the model recognizes one of them.
|
||||
|
||||
You can also build this application yourself, since it's open source and
|
||||
[available as part of the TensorFlow repository on
|
||||
github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#building-in-android-studio-using-the-tensorflow-aar-from-jcenter).
|
||||
By default it downloads [a pretrained model from
|
||||
tensorflow.org](http://download.tensorflow.org/models/speech_commands_v0.01.zip),
|
||||
but you can easily [replace it with a model you've trained
|
||||
yourself](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android#install-model-files-optional).
|
||||
If you do this, you'll need to make sure that the constants in [the main
|
||||
SpeechActivity Java source
|
||||
file](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java)
|
||||
like `SAMPLE_RATE` and `SAMPLE_DURATION` match any changes you've made to the
|
||||
defaults while training. You'll also see that there's a [Java version of the
|
||||
RecognizeCommands
|
||||
module](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android/src/org/tensorflow/demo/RecognizeCommands.java)
|
||||
that's very similar to the C++ version in this tutorial. If you've tweaked
|
||||
parameters for that, you can also update them in SpeechActivity to get the same
|
||||
results as in your server testing.
|
||||
|
||||
The demo app updates its UI list of results automatically based on the labels
|
||||
text file you copy into assets alongside your frozen graph, which means you can
|
||||
easily try out different models without needing to make any code changes. You
|
||||
will need to updaye `LABEL_FILENAME` and `MODEL_FILENAME` to point to the files
|
||||
you've added if you change the paths though.
|
||||
|
||||
## How does this Model Work?
|
||||
|
||||
The architecture used in this tutorial is based on some described in the paper
|
||||
@ -341,13 +376,14 @@ aren't detected (high precision). The numbers from the tool give you an idea of
|
||||
how your model will perform in an application, and you can try tweaking the
|
||||
signal averaging parameters to tune it to give the kind of performance you want.
|
||||
To understand what the right parameters are for your application, you can look
|
||||
at generating an [ROC curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic)
|
||||
to help you understand the tradeoffs.
|
||||
at generating an [ROC
|
||||
curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic) to help
|
||||
you understand the tradeoffs.
|
||||
|
||||
## RecognizeCommands
|
||||
|
||||
The streaming accuracy tool uses a simple decoder contained in a small
|
||||
C++ class called
|
||||
The streaming accuracy tool uses a simple decoder contained in a small C++ class
|
||||
called
|
||||
[RecognizeCommands](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/speech_commands/recognize_commands.h).
|
||||
This class is fed the output of running the TensorFlow model over time, it
|
||||
averages the signals, and returns information about a label when it has enough
|
||||
@ -480,7 +516,8 @@ variations in starting time in the training data, and is controlled with the
|
||||
`--time_shift_ms` flag, which defaults to 100ms. Increasing this value will
|
||||
provide more variation, but at the risk of cutting off important parts of the
|
||||
audio. A related way of augmenting the data with realistic distortions is by
|
||||
using [time stretching and pitch scaling](https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling),
|
||||
using [time stretching and pitch
|
||||
scaling](https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling),
|
||||
but that's outside the scope of this tutorial.
|
||||
|
||||
## Customizing the Model
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
<uses-feature android:name="android.hardware.camera" />
|
||||
<uses-feature android:name="android.hardware.camera.autofocus" />
|
||||
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"/>
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
|
||||
<uses-sdk
|
||||
android:minSdkVersion="21"
|
||||
@ -59,6 +60,15 @@
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
|
||||
<activity android:name="org.tensorflow.demo.SpeechActivity"
|
||||
android:screenOrientation="portrait"
|
||||
android:label="@string/activity_name_speech">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
|
||||
@ -93,6 +93,7 @@ filegroup(
|
||||
srcs = [
|
||||
"@inception5h//:model_files",
|
||||
"@mobile_ssd//:model_files",
|
||||
"@speech_commands//:model_files",
|
||||
"@stylize//:model_files",
|
||||
],
|
||||
)
|
||||
|
||||
@ -8,10 +8,11 @@ devices.
|
||||
The demos in this folder are designed to give straightforward samples of using
|
||||
TensorFlow in mobile applications.
|
||||
|
||||
Inference is done using the [TensorFlow Android Inference Interface](../../../tensorflow/contrib/android),
|
||||
which may be built separately if you want a standalone library to drop into your
|
||||
existing application. Object tracking and efficient YUV -> RGB conversion are
|
||||
handled by `libtensorflow_demo.so`.
|
||||
Inference is done using the [TensorFlow Android Inference
|
||||
Interface](../../../tensorflow/contrib/android), which may be built separately
|
||||
if you want a standalone library to drop into your existing application. Object
|
||||
tracking and efficient YUV -> RGB conversion are handled by
|
||||
`libtensorflow_demo.so`.
|
||||
|
||||
A device running Android 5.0 (API 21) or higher is required to run the demo due
|
||||
to the use of the camera2 API, although the native libraries themselves can run
|
||||
@ -33,6 +34,12 @@ on API >= 14 devices.
|
||||
Uses a model based on [A Learned Representation For Artistic
|
||||
Style](https://arxiv.org/abs/1610.07629) to restyle the camera preview
|
||||
image to that of a number of different artists.
|
||||
4. [TF
|
||||
Speech](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java):
|
||||
Runs a simple speech recognition model built by the [audio training
|
||||
tutorial](https://www.tensorflow.org/tutorials/image_retraining). Listens
|
||||
for a small set of words, and highlights them in the UI when they are
|
||||
recognized.
|
||||
|
||||
<img src="sample_images/classify1.jpg" width="30%"><img src="sample_images/stylize1.jpg" width="30%"><img src="sample_images/detect1.jpg" width="30%">
|
||||
|
||||
@ -51,20 +58,22 @@ for more details.
|
||||
|
||||
## Running the Demo
|
||||
|
||||
Once the app is installed it can be started via the "TF Classify", "TF Detect"
|
||||
and "TF Stylize" icons, which have the orange TensorFlow logo as their icon.
|
||||
Once the app is installed it can be started via the "TF Classify", "TF Detect",
|
||||
"TF Stylize", and "TF Speech" icons, which have the orange TensorFlow logo as
|
||||
their icon.
|
||||
|
||||
While running the activities, pressing the volume keys on your device will
|
||||
toggle debug visualizations on/off, rendering additional info to the screen
|
||||
that may be useful for development purposes.
|
||||
toggle debug visualizations on/off, rendering additional info to the screen that
|
||||
may be useful for development purposes.
|
||||
|
||||
## Building in Android Studio using the TensorFlow AAR from JCenter
|
||||
|
||||
The simplest way to compile the demo app yourself, and try out changes to the
|
||||
project code is to use AndroidStudio. Simply set this `android` directory as the project root.
|
||||
project code is to use AndroidStudio. Simply set this `android` directory as the
|
||||
project root.
|
||||
|
||||
Then edit the `build.gradle` file and change the value of `nativeBuildSystem`
|
||||
to `'none'` so that the project is built in the simplest way possible:
|
||||
Then edit the `build.gradle` file and change the value of `nativeBuildSystem` to
|
||||
`'none'` so that the project is built in the simplest way possible:
|
||||
|
||||
```None
|
||||
def nativeBuildSystem = 'none'
|
||||
@ -77,8 +86,8 @@ Note: Currently, in this build mode, YUV -> RGB is done using a less efficient
|
||||
Java implementation, and object tracking is not available in the "TF Detect"
|
||||
activity. Setting the build system to `'cmake'` currently only builds
|
||||
`libtensorflow_demo.so`, which provides fast YUV -> RGB conversion and object
|
||||
tracking, while still acquiring TensorFlow support via the downloaded AAR, so
|
||||
it may be a lightweight way to enable these features.
|
||||
tracking, while still acquiring TensorFlow support via the downloaded AAR, so it
|
||||
may be a lightweight way to enable these features.
|
||||
|
||||
For any project that does not include custom low level TensorFlow code, this is
|
||||
likely sufficient.
|
||||
@ -104,50 +113,51 @@ protobuf compilation.
|
||||
|
||||
NOTE: Bazel does not currently support building for Android on Windows. Full
|
||||
support for gradle/cmake builds is coming soon, but in the meantime we suggest
|
||||
that Windows users download the
|
||||
[prebuilt binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)
|
||||
instead.
|
||||
that Windows users download the [prebuilt
|
||||
binaries](https://ci.tensorflow.org/view/Nightly/job/nightly-android/) instead.
|
||||
|
||||
##### Install Bazel and Android Prerequisites
|
||||
|
||||
Bazel is the primary build system for TensorFlow. To build with Bazel,
|
||||
it and the Android NDK and SDK must be installed on your system.
|
||||
Bazel is the primary build system for TensorFlow. To build with Bazel, it and
|
||||
the Android NDK and SDK must be installed on your system.
|
||||
|
||||
1. Install the latest version of Bazel as per the instructions [on the Bazel website](https://bazel.build/versions/master/docs/install.html).
|
||||
2. The Android NDK is required to build the native (C/C++) TensorFlow code.
|
||||
The current recommended version is 12b, which may be found
|
||||
[here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
|
||||
3. The Android SDK and build tools may be obtained
|
||||
[here](https://developer.android.com/tools/revisions/build-tools.html),
|
||||
or alternatively as part of
|
||||
[Android Studio](https://developer.android.com/studio/index.html). Build
|
||||
tools API >= 23 is required to build the TF Android demo (though it will
|
||||
run on API >= 21 devices).
|
||||
1. Install the latest version of Bazel as per the instructions [on the Bazel
|
||||
website](https://bazel.build/versions/master/docs/install.html).
|
||||
2. The Android NDK is required to build the native (C/C++) TensorFlow code. The
|
||||
current recommended version is 12b, which may be found
|
||||
[here](https://developer.android.com/ndk/downloads/older_releases.html#ndk-12b-downloads).
|
||||
3. The Android SDK and build tools may be obtained
|
||||
[here](https://developer.android.com/tools/revisions/build-tools.html), or
|
||||
alternatively as part of [Android
|
||||
Studio](https://developer.android.com/studio/index.html). Build tools API >=
|
||||
23 is required to build the TF Android demo (though it will run on API >= 21
|
||||
devices).
|
||||
|
||||
##### Edit WORKSPACE
|
||||
|
||||
The Android entries in [`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36)
|
||||
must be uncommented with the paths filled in appropriately depending on where
|
||||
you installed the NDK and SDK. Otherwise an error such as:
|
||||
"The external label '//external:android/sdk' is not bound to anything" will
|
||||
be reported.
|
||||
The Android entries in
|
||||
[`<workspace_root>/WORKSPACE`](../../../WORKSPACE#L19-L36) must be uncommented
|
||||
with the paths filled in appropriately depending on where you installed the NDK
|
||||
and SDK. Otherwise an error such as: "The external label
|
||||
'//external:android/sdk' is not bound to anything" will be reported.
|
||||
|
||||
Also edit the API levels for the SDK in WORKSPACE to the highest level you
|
||||
have installed in your SDK. This must be >= 23 (this is completely independent
|
||||
of the API level of the demo, which is defined in AndroidManifest.xml).
|
||||
The NDK API level may remain at 14.
|
||||
Also edit the API levels for the SDK in WORKSPACE to the highest level you have
|
||||
installed in your SDK. This must be >= 23 (this is completely independent of the
|
||||
API level of the demo, which is defined in AndroidManifest.xml). The NDK API
|
||||
level may remain at 14.
|
||||
|
||||
##### Install Model Files (optional)
|
||||
|
||||
The TensorFlow `GraphDef`s that contain the model definitions and weights
|
||||
are not packaged in the repo because of their size. They are downloaded
|
||||
The TensorFlow `GraphDef`s that contain the model definitions and weights are
|
||||
not packaged in the repo because of their size. They are downloaded
|
||||
automatically and packaged with the APK by Bazel via a new_http_archive defined
|
||||
in `WORKSPACE` during the build process, and by Gradle via download-models.gradle.
|
||||
in `WORKSPACE` during the build process, and by Gradle via
|
||||
download-models.gradle.
|
||||
|
||||
**Optional**: If you wish to place the models in your assets manually,
|
||||
remove all of the `model_files` entries from the `assets`
|
||||
list in `tensorflow_demo` found in the `[BUILD](BUILD)` file. Then download
|
||||
and extract the archives yourself to the `assets` directory in the source tree:
|
||||
**Optional**: If you wish to place the models in your assets manually, remove
|
||||
all of the `model_files` entries from the `assets` list in `tensorflow_demo`
|
||||
found in the `[BUILD](BUILD)` file. Then download and extract the archives
|
||||
yourself to the `assets` directory in the source tree:
|
||||
|
||||
```bash
|
||||
BASE_URL=https://storage.googleapis.com/download.tensorflow.org/models
|
||||
@ -162,27 +172,23 @@ This will extract the models and their associated metadata files to the local
|
||||
assets/ directory.
|
||||
|
||||
If you are using Gradle, make sure to remove download-models.gradle reference
|
||||
from build.gradle after your manually download models; otherwise gradle
|
||||
might download models again and overwrite your models.
|
||||
from build.gradle after your manually download models; otherwise gradle might
|
||||
download models again and overwrite your models.
|
||||
|
||||
##### Build
|
||||
|
||||
After editing your WORKSPACE file to update the SDK/NDK configuration,
|
||||
you may build the APK. Run this from your workspace root:
|
||||
After editing your WORKSPACE file to update the SDK/NDK configuration, you may
|
||||
build the APK. Run this from your workspace root:
|
||||
|
||||
```bash
|
||||
bazel build -c opt //tensorflow/examples/android:tensorflow_demo
|
||||
```
|
||||
|
||||
If you get build errors about protocol buffers, run
|
||||
`git submodule update --init` and make sure that you've modified your WORKSPACE
|
||||
file as instructed, then try building again.
|
||||
|
||||
##### Install
|
||||
|
||||
Make sure that adb debugging is enabled on your Android 5.0 (API 21) or
|
||||
later device, then after building use the following command from your workspace
|
||||
root to install the APK:
|
||||
Make sure that adb debugging is enabled on your Android 5.0 (API 21) or later
|
||||
device, then after building use the following command from your workspace root
|
||||
to install the APK:
|
||||
|
||||
```bash
|
||||
adb install -r bazel-bin/tensorflow/examples/android/tensorflow_demo.apk
|
||||
|
||||
@ -11,7 +11,8 @@
|
||||
// LINT.IfChange
|
||||
def models = ['inception5h.zip',
|
||||
'object_detection/ssd_mobilenet_v1_android_export.zip',
|
||||
'stylize_v1.zip']
|
||||
'stylize_v1.zip',
|
||||
'speech_commands_conv_actions.zip']
|
||||
// LINT.ThenChange(//tensorflow/examples/android/BUILD)
|
||||
|
||||
// Root URL for model archives
|
||||
|
||||
19
tensorflow/examples/android/res/drawable/border.xml
Normal file
19
tensorflow/examples/android/res/drawable/border.xml
Normal file
@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<shape xmlns:android="http://schemas.android.com/apk/res/android" android:shape="rectangle" >
|
||||
<solid android:color="#00000000" />
|
||||
<stroke android:width="1dip" android:color="#cccccc" />
|
||||
</shape>
|
||||
55
tensorflow/examples/android/res/layout/activity_speech.xml
Normal file
55
tensorflow/examples/android/res/layout/activity_speech.xml
Normal file
@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<FrameLayout
|
||||
xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
tools:context="org.tensorflow.demo.SpeechActivity">
|
||||
|
||||
<TextView
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Say one of the words below!"
|
||||
android:id="@+id/textView"
|
||||
android:textAlignment="center"
|
||||
android:layout_gravity="top"
|
||||
android:textSize="24dp"
|
||||
android:layout_marginTop="10dp"
|
||||
android:layout_marginLeft="10dp"
|
||||
/>
|
||||
|
||||
<ListView
|
||||
android:id="@+id/list_view"
|
||||
android:layout_width="240dp"
|
||||
android:layout_height="wrap_content"
|
||||
android:background="@drawable/border"
|
||||
android:layout_gravity="top|center_horizontal"
|
||||
android:textAlignment="center"
|
||||
android:layout_marginTop="100dp"
|
||||
/>
|
||||
|
||||
<Button
|
||||
android:id="@+id/quit"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="Quit"
|
||||
android:layout_gravity="bottom|center_horizontal"
|
||||
android:layout_marginBottom="10dp"
|
||||
/>
|
||||
|
||||
</FrameLayout>
|
||||
25
tensorflow/examples/android/res/layout/list_text_item.xml
Normal file
25
tensorflow/examples/android/res/layout/list_text_item.xml
Normal file
@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<TextView
|
||||
xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:id="@+id/list_text_item"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="wrap_content"
|
||||
android:text="TextView"
|
||||
android:textSize="24dp"
|
||||
android:textAlignment="center"
|
||||
android:gravity="center_horizontal"
|
||||
/>
|
||||
@ -20,4 +20,5 @@
|
||||
<string name="activity_name_classification">TF Classify</string>
|
||||
<string name="activity_name_detection">TF Detect</string>
|
||||
<string name="activity_name_stylize">TF Stylize</string>
|
||||
<string name="activity_name_speech">TF Speech</string>
|
||||
</resources>
|
||||
|
||||
@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.tensorflow.demo;
|
||||
|
||||
import android.util.Log;
|
||||
import android.util.Pair;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Deque;
|
||||
import java.util.List;
|
||||
|
||||
/** Reads in results from an instantaneous audio recognition model and smoothes them over time. */
|
||||
public class RecognizeCommands {
|
||||
// Configuration settings.
|
||||
private List<String> labels = new ArrayList<String>();
|
||||
private long averageWindowDurationMs;
|
||||
private float detectionThreshold;
|
||||
private int suppressionMs;
|
||||
private int minimumCount;
|
||||
private long minimumTimeBetweenSamplesMs;
|
||||
|
||||
// Working variables.
|
||||
private Deque<Pair<Long, float[]>> previousResults = new ArrayDeque<Pair<Long, float[]>>();
|
||||
private String previousTopLabel;
|
||||
private int labelsCount;
|
||||
private long previousTopLabelTime;
|
||||
private float previousTopLabelScore;
|
||||
|
||||
private static final String SILENCE_LABEL = "_silence_";
|
||||
private static final long MINIMUM_TIME_FRACTION = 4;
|
||||
|
||||
public RecognizeCommands(
|
||||
List<String> inLabels,
|
||||
long inAverageWindowDurationMs,
|
||||
float inDetectionThreshold,
|
||||
int inSuppressionMS,
|
||||
int inMinimumCount,
|
||||
long inMinimumTimeBetweenSamplesMS) {
|
||||
labels = inLabels;
|
||||
averageWindowDurationMs = inAverageWindowDurationMs;
|
||||
detectionThreshold = inDetectionThreshold;
|
||||
suppressionMs = inSuppressionMS;
|
||||
minimumCount = inMinimumCount;
|
||||
labelsCount = inLabels.size();
|
||||
previousTopLabel = SILENCE_LABEL;
|
||||
previousTopLabelTime = Long.MIN_VALUE;
|
||||
previousTopLabelScore = 0.0f;
|
||||
minimumTimeBetweenSamplesMs = inMinimumTimeBetweenSamplesMS;
|
||||
}
|
||||
|
||||
/** Holds information about what's been recognized. */
|
||||
public static class RecognitionResult {
|
||||
public final String foundCommand;
|
||||
public final float score;
|
||||
public final boolean isNewCommand;
|
||||
|
||||
public RecognitionResult(String inFoundCommand, float inScore, boolean inIsNewCommand) {
|
||||
foundCommand = inFoundCommand;
|
||||
score = inScore;
|
||||
isNewCommand = inIsNewCommand;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ScoreForSorting implements Comparable<ScoreForSorting> {
|
||||
public final float score;
|
||||
public final int index;
|
||||
|
||||
public ScoreForSorting(float inScore, int inIndex) {
|
||||
score = inScore;
|
||||
index = inIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ScoreForSorting other) {
|
||||
if (this.score > other.score) {
|
||||
return -1;
|
||||
} else if (this.score < other.score) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public RecognitionResult processLatestResults(float[] currentResults, long currentTimeMS) {
|
||||
if (currentResults.length != labelsCount) {
|
||||
throw new RuntimeException(
|
||||
"The results for recognition should contain "
|
||||
+ labelsCount
|
||||
+ " elements, but there are "
|
||||
+ currentResults.length);
|
||||
}
|
||||
|
||||
if ((!previousResults.isEmpty()) && (currentTimeMS < previousResults.getFirst().first)) {
|
||||
throw new RuntimeException(
|
||||
"You must feed results in increasing time order, but received a timestamp of "
|
||||
+ currentTimeMS
|
||||
+ " that was earlier than the previous one of "
|
||||
+ previousResults.getFirst().first);
|
||||
}
|
||||
|
||||
final int howManyResults = previousResults.size();
|
||||
// Ignore any results that are coming in too frequently.
|
||||
if (howManyResults > 1) {
|
||||
final long timeSinceMostRecent = currentTimeMS - previousResults.getLast().first;
|
||||
if (timeSinceMostRecent < minimumTimeBetweenSamplesMs) {
|
||||
return new RecognitionResult(previousTopLabel, previousTopLabelScore, false);
|
||||
}
|
||||
}
|
||||
|
||||
// Add the latest results to the head of the queue.
|
||||
previousResults.addLast(new Pair<Long, float[]>(currentTimeMS, currentResults));
|
||||
|
||||
// Prune any earlier results that are too old for the averaging window.
|
||||
final long timeLimit = currentTimeMS - averageWindowDurationMs;
|
||||
while (previousResults.getFirst().first < timeLimit) {
|
||||
previousResults.removeFirst();
|
||||
}
|
||||
|
||||
// If there are too few results, assume the result will be unreliable and
|
||||
// bail.
|
||||
final long earliestTime = previousResults.getFirst().first;
|
||||
final long samplesDuration = currentTimeMS - earliestTime;
|
||||
if ((howManyResults < minimumCount)
|
||||
|| (samplesDuration < (averageWindowDurationMs / MINIMUM_TIME_FRACTION))) {
|
||||
Log.v("RecognizeResult", "Too few results");
|
||||
return new RecognitionResult(previousTopLabel, 0.0f, false);
|
||||
}
|
||||
|
||||
// Calculate the average score across all the results in the window.
|
||||
float[] averageScores = new float[labelsCount];
|
||||
for (Pair<Long, float[]> previousResult : previousResults) {
|
||||
final float[] scoresTensor = previousResult.second;
|
||||
int i = 0;
|
||||
while (i < scoresTensor.length) {
|
||||
averageScores[i] += scoresTensor[i] / howManyResults;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the averaged results in descending score order.
|
||||
ScoreForSorting[] sortedAverageScores = new ScoreForSorting[labelsCount];
|
||||
for (int i = 0; i < labelsCount; ++i) {
|
||||
sortedAverageScores[i] = new ScoreForSorting(averageScores[i], i);
|
||||
}
|
||||
Arrays.sort(sortedAverageScores);
|
||||
|
||||
// See if the latest top score is enough to trigger a detection.
|
||||
final int currentTopIndex = sortedAverageScores[0].index;
|
||||
final String currentTopLabel = labels.get(currentTopIndex);
|
||||
final float currentTopScore = sortedAverageScores[0].score;
|
||||
// If we've recently had another label trigger, assume one that occurs too
|
||||
// soon afterwards is a bad result.
|
||||
long timeSinceLastTop;
|
||||
if (previousTopLabel.equals(SILENCE_LABEL) || (previousTopLabelTime == Long.MIN_VALUE)) {
|
||||
timeSinceLastTop = Long.MAX_VALUE;
|
||||
} else {
|
||||
timeSinceLastTop = currentTimeMS - previousTopLabelTime;
|
||||
}
|
||||
boolean isNewCommand;
|
||||
if ((currentTopScore > detectionThreshold) && (timeSinceLastTop > suppressionMs)) {
|
||||
previousTopLabel = currentTopLabel;
|
||||
previousTopLabelTime = currentTimeMS;
|
||||
previousTopLabelScore = currentTopScore;
|
||||
isNewCommand = true;
|
||||
} else {
|
||||
isNewCommand = false;
|
||||
}
|
||||
return new RecognitionResult(currentTopLabel, currentTopScore, isNewCommand);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,353 @@
|
||||
/*
|
||||
* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* Demonstrates how to run an audio recognition model in Android.
|
||||
|
||||
This example loads a simple speech recognition model trained by the tutorial at
|
||||
https://www.tensorflow.org/tutorials/audio_training
|
||||
|
||||
The model files should be downloaded automatically from the TensorFlow website,
|
||||
but if you have a custom model you can update the LABEL_FILENAME and
|
||||
MODEL_FILENAME constants to point to your own files.
|
||||
|
||||
The example application displays a list view with all of the known audio labels,
|
||||
and highlights each one when it thinks it has detected one through the
|
||||
microphone. The averaging of results to give a more reliable signal happens in
|
||||
the RecognizeCommands helper class.
|
||||
*/
|
||||
|
||||
package org.tensorflow.demo;
|
||||
|
||||
import android.animation.ValueAnimator;
|
||||
import android.app.Activity;
|
||||
import android.content.pm.PackageManager;
|
||||
import android.media.AudioFormat;
|
||||
import android.media.AudioRecord;
|
||||
import android.media.MediaRecorder;
|
||||
import android.os.Bundle;
|
||||
import android.util.Log;
|
||||
import android.view.View;
|
||||
import android.widget.ArrayAdapter;
|
||||
import android.widget.Button;
|
||||
import android.widget.ListView;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import org.tensorflow.contrib.android.TensorFlowInferenceInterface;
|
||||
import org.tensorflow.demo.R;
|
||||
|
||||
/**
|
||||
* An activity that listens for audio and then uses a TensorFlow model to detect particular classes,
|
||||
* by default a small set of action words.
|
||||
*/
|
||||
public class SpeechActivity extends Activity {
|
||||
|
||||
// Constants that control the behavior of the recognition code and model
|
||||
// settings. See the audio recognition tutorial for a detailed explanation of
|
||||
// all these, but you should customize them to match your training settings if
|
||||
// you are running your own model.
|
||||
private static final int SAMPLE_RATE = 16000;
|
||||
private static final int SAMPLE_DURATION_MS = 1000;
|
||||
private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000);
|
||||
private static final long AVERAGE_WINDOW_DURATION_MS = 500;
|
||||
private static final float DETECTION_THRESHOLD = 0.70f;
|
||||
private static final int SUPPRESSION_MS = 1500;
|
||||
private static final int MINIMUM_COUNT = 3;
|
||||
private static final long MINIMUM_TIME_BETWEEN_SAMPLES_MS = 30;
|
||||
private static final String LABEL_FILENAME = "file:///android_asset/conv_actions_labels.txt";
|
||||
private static final String MODEL_FILENAME = "file:///android_asset/conv_actions_frozen.pb";
|
||||
private static final String INPUT_DATA_NAME = "decoded_sample_data:0";
|
||||
private static final String SAMPLE_RATE_NAME = "decoded_sample_data:1";
|
||||
private static final String OUTPUT_SCORES_NAME = "labels_softmax";
|
||||
|
||||
// UI elements.
|
||||
private static final int REQUEST_RECORD_AUDIO = 13;
|
||||
private Button quitButton;
|
||||
private ListView labelsListView;
|
||||
private static final String LOG_TAG = SpeechActivity.class.getSimpleName();
|
||||
|
||||
// Working variables.
|
||||
short[] recordingBuffer = new short[RECORDING_LENGTH];
|
||||
int recordingOffset = 0;
|
||||
boolean shouldContinue = true;
|
||||
private Thread recordingThread;
|
||||
boolean shouldContinueRecognition = true;
|
||||
private Thread recognitionThread;
|
||||
private final ReentrantLock recordingBufferLock = new ReentrantLock();
|
||||
private TensorFlowInferenceInterface inferenceInterface;
|
||||
private List<String> labels = new ArrayList<String>();
|
||||
private List<String> displayedLabels = new ArrayList<>();
|
||||
private RecognizeCommands recognizeCommands = null;
|
||||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
// Set up the UI.
|
||||
super.onCreate(savedInstanceState);
|
||||
setContentView(R.layout.activity_speech);
|
||||
quitButton = (Button) findViewById(R.id.quit);
|
||||
quitButton.setOnClickListener(
|
||||
new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View view) {
|
||||
moveTaskToBack(true);
|
||||
android.os.Process.killProcess(android.os.Process.myPid());
|
||||
System.exit(1);
|
||||
}
|
||||
});
|
||||
labelsListView = (ListView) findViewById(R.id.list_view);
|
||||
|
||||
// Load the labels for the model, but only display those that don't start
|
||||
// with an underscore.
|
||||
String actualFilename = LABEL_FILENAME.split("file:///android_asset/")[1];
|
||||
Log.i(LOG_TAG, "Reading labels from: " + actualFilename);
|
||||
BufferedReader br = null;
|
||||
try {
|
||||
br = new BufferedReader(new InputStreamReader(getAssets().open(actualFilename)));
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
labels.add(line);
|
||||
if (line.charAt(0) != '_') {
|
||||
displayedLabels.add(line.substring(0, 1).toUpperCase() + line.substring(1));
|
||||
}
|
||||
}
|
||||
br.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Problem reading label file!", e);
|
||||
}
|
||||
|
||||
// Build a list view based on these labels.
|
||||
ArrayAdapter<String> arrayAdapter =
|
||||
new ArrayAdapter<String>(this, R.layout.list_text_item, displayedLabels);
|
||||
labelsListView.setAdapter(arrayAdapter);
|
||||
|
||||
// Set up an object to smooth recognition results to increase accuracy.
|
||||
recognizeCommands =
|
||||
new RecognizeCommands(
|
||||
labels,
|
||||
AVERAGE_WINDOW_DURATION_MS,
|
||||
DETECTION_THRESHOLD,
|
||||
SUPPRESSION_MS,
|
||||
MINIMUM_COUNT,
|
||||
MINIMUM_TIME_BETWEEN_SAMPLES_MS);
|
||||
|
||||
// Load the TensorFlow model.
|
||||
inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME);
|
||||
|
||||
// Start the recording and recognition threads.
|
||||
requestMicrophonePermission();
|
||||
startRecognition();
|
||||
}
|
||||
|
||||
private void requestMicrophonePermission() {
|
||||
requestPermissions(
|
||||
new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRequestPermissionsResult(
|
||||
int requestCode, String[] permissions, int[] grantResults) {
|
||||
if (requestCode == REQUEST_RECORD_AUDIO
|
||||
&& grantResults.length > 0
|
||||
&& grantResults[0] == PackageManager.PERMISSION_GRANTED) {
|
||||
startRecording();
|
||||
startRecognition();
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void startRecording() {
|
||||
if (recordingThread != null) {
|
||||
return;
|
||||
}
|
||||
shouldContinue = true;
|
||||
recordingThread =
|
||||
new Thread(
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
record();
|
||||
}
|
||||
});
|
||||
recordingThread.start();
|
||||
}
|
||||
|
||||
public synchronized void stopRecording() {
|
||||
if (recordingThread == null) {
|
||||
return;
|
||||
}
|
||||
shouldContinue = false;
|
||||
recordingThread = null;
|
||||
}
|
||||
|
||||
private void record() {
|
||||
android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
|
||||
|
||||
// Estimate the buffer size we'll need for this device.
|
||||
int bufferSize =
|
||||
AudioRecord.getMinBufferSize(
|
||||
SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT);
|
||||
if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
|
||||
bufferSize = SAMPLE_RATE * 2;
|
||||
}
|
||||
short[] audioBuffer = new short[bufferSize / 2];
|
||||
|
||||
AudioRecord record =
|
||||
new AudioRecord(
|
||||
MediaRecorder.AudioSource.DEFAULT,
|
||||
SAMPLE_RATE,
|
||||
AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT,
|
||||
bufferSize);
|
||||
|
||||
if (record.getState() != AudioRecord.STATE_INITIALIZED) {
|
||||
Log.e(LOG_TAG, "Audio Record can't initialize!");
|
||||
return;
|
||||
}
|
||||
|
||||
record.startRecording();
|
||||
|
||||
Log.v(LOG_TAG, "Start recording");
|
||||
|
||||
// Loop, gathering audio data and copying it to a round-robin buffer.
|
||||
while (shouldContinue) {
|
||||
int numberRead = record.read(audioBuffer, 0, audioBuffer.length);
|
||||
int maxLength = recordingBuffer.length;
|
||||
int newRecordingOffset = recordingOffset + numberRead;
|
||||
int secondCopyLength = Math.max(0, newRecordingOffset - maxLength);
|
||||
int firstCopyLength = numberRead - secondCopyLength;
|
||||
// We store off all the data for the recognition thread to access. The ML
|
||||
// thread will copy out of this buffer into its own, while holding the
|
||||
// lock, so this should be thread safe.
|
||||
recordingBufferLock.lock();
|
||||
try {
|
||||
System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, firstCopyLength);
|
||||
System.arraycopy(audioBuffer, firstCopyLength, recordingBuffer, 0, secondCopyLength);
|
||||
recordingOffset = newRecordingOffset % maxLength;
|
||||
} finally {
|
||||
recordingBufferLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
record.stop();
|
||||
record.release();
|
||||
}
|
||||
|
||||
public synchronized void startRecognition() {
|
||||
if (recognitionThread != null) {
|
||||
return;
|
||||
}
|
||||
shouldContinueRecognition = true;
|
||||
recognitionThread =
|
||||
new Thread(
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
recognize();
|
||||
}
|
||||
});
|
||||
recognitionThread.start();
|
||||
}
|
||||
|
||||
public synchronized void stopRecognition() {
|
||||
if (recognitionThread == null) {
|
||||
return;
|
||||
}
|
||||
shouldContinueRecognition = false;
|
||||
recognitionThread = null;
|
||||
}
|
||||
|
||||
private void recognize() {
|
||||
Log.v(LOG_TAG, "Start recognition");
|
||||
|
||||
short[] inputBuffer = new short[RECORDING_LENGTH];
|
||||
float[] floatInputBuffer = new float[RECORDING_LENGTH];
|
||||
float[] outputScores = new float[labels.size()];
|
||||
String[] outputScoresNames = new String[] {OUTPUT_SCORES_NAME};
|
||||
int[] sampleRateList = new int[] {SAMPLE_RATE};
|
||||
|
||||
// Loop, grabbing recorded data and running the recognition model on it.
|
||||
while (shouldContinueRecognition) {
|
||||
// The recording thread places data in this round-robin buffer, so lock to
|
||||
// make sure there's no writing happening and then copy it to our own
|
||||
// local version.
|
||||
recordingBufferLock.lock();
|
||||
try {
|
||||
int maxLength = recordingBuffer.length;
|
||||
int firstCopyLength = maxLength - recordingOffset;
|
||||
int secondCopyLength = recordingOffset;
|
||||
System.arraycopy(recordingBuffer, recordingOffset, inputBuffer, 0, firstCopyLength);
|
||||
System.arraycopy(recordingBuffer, 0, inputBuffer, firstCopyLength, secondCopyLength);
|
||||
} finally {
|
||||
recordingBufferLock.unlock();
|
||||
}
|
||||
|
||||
// We need to feed in float values between -1.0f and 1.0f, so divide the
|
||||
// signed 16-bit inputs.
|
||||
for (int i = 0; i < RECORDING_LENGTH; ++i) {
|
||||
floatInputBuffer[i] = inputBuffer[i] / 32767.0f;
|
||||
}
|
||||
|
||||
// Run the model.
|
||||
inferenceInterface.feed(SAMPLE_RATE_NAME, sampleRateList);
|
||||
inferenceInterface.feed(INPUT_DATA_NAME, floatInputBuffer, RECORDING_LENGTH, 1);
|
||||
inferenceInterface.run(outputScoresNames);
|
||||
inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores);
|
||||
|
||||
// Use the smoother to figure out if we've had a real recognition event.
|
||||
long currentTime = System.currentTimeMillis();
|
||||
final RecognizeCommands.RecognitionResult result =
|
||||
recognizeCommands.processLatestResults(outputScores, currentTime);
|
||||
|
||||
runOnUiThread(
|
||||
new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
// If we do have a new command, highlight the right list entry.
|
||||
if (!result.foundCommand.startsWith("_") && result.isNewCommand) {
|
||||
int labelIndex = -1;
|
||||
for (int i = 0; i < labels.size(); ++i) {
|
||||
if (labels.get(i).equals(result.foundCommand)) {
|
||||
labelIndex = i;
|
||||
}
|
||||
}
|
||||
final View labelView = (View) labelsListView.getChildAt(labelIndex - 2);
|
||||
ValueAnimator colorAnimation =
|
||||
ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff);
|
||||
colorAnimation.setDuration(750);
|
||||
colorAnimation.addUpdateListener(
|
||||
new ValueAnimator.AnimatorUpdateListener() {
|
||||
@Override
|
||||
public void onAnimationUpdate(ValueAnimator animator) {
|
||||
labelView.setBackgroundColor((int) animator.getAnimatedValue());
|
||||
}
|
||||
});
|
||||
colorAnimation.start();
|
||||
}
|
||||
}
|
||||
});
|
||||
try {
|
||||
// We don't need to run too frequently, so snooze for a bit.
|
||||
Thread.sleep(MINIMUM_TIME_BETWEEN_SAMPLES_MS);
|
||||
} catch (InterruptedException e) {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
Log.v(LOG_TAG, "End recognition");
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user