Port TensorFlow Lite micro_speech example to Arduino and add Colab for training

PiperOrigin-RevId: 261360597
This commit is contained in:
Daniel Situnayake 2019-08-02 11:39:46 -07:00 committed by TensorFlower Gardener
parent 4deb12d63f
commit 91bcbd6922
13 changed files with 1014 additions and 148 deletions

View File

@ -1,4 +1,4 @@
# Speech Commands Example
This is a basic speech recognition example. For more information, see the
tutorial at https://www.tensorflow.org/versions/master/tutorials/audio_recognition.
tutorial at https://www.tensorflow.org/tutorials/sequences/audio_recognition.

View File

@ -86,8 +86,8 @@ FLAGS = None
def main(_):
# We want to see all the logging messages for this tutorial.
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
# Set the verbosity based on flags (default is INFO, so we see all messages)
tf.compat.v1.logging.set_verbosity(FLAGS.verbosity)
# Start a new TensorFlow session.
sess = tf.compat.v1.InteractiveSession()
@ -454,5 +454,33 @@ if __name__ == '__main__':
default='mfcc',
help='Spectrogram processing mode. Can be "mfcc", "average", or "micro"')
# Function used to parse --verbosity argument
def verbosity_arg(value):
"""Parses verbosity argument.
Args:
value: A member of tf.logging.
Raises:
ArgumentTypeError: Not an expected value.
"""
value = value.upper()
if value == 'INFO':
return tf.compat.v1.logging.INFO
elif value == 'DEBUG':
return tf.compat.v1.logging.DEBUG
elif value == 'ERROR':
return tf.compat.v1.logging.ERROR
elif value == 'FATAL':
return tf.compat.v1.logging.FATAL
elif value == 'WARN':
return tf.compat.v1.logging.WARN
else:
raise argparse.ArgumentTypeError('Not an expected value')
parser.add_argument(
'--verbosity',
type=verbosity_arg,
default=tf.compat.v1.logging.INFO,
help='Log verbosity. Can be "INFO", "DEBUG", "ERROR", "FATAL", or "WARN"')
FLAGS, unparsed = parser.parse_known_args()
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)

View File

@ -100,6 +100,7 @@ class TrainTest(test.TestCase):
'background_frequency': 0.8,
'eval_step_interval': 1,
'save_step_interval': 1,
'verbosity': tf.compat.v1.logging.INFO
}
return DictStruct(**flags)

View File

@ -34,5 +34,5 @@ extern "C" void DebugLog(const char* s) {
DEBUG_SERIAL_OBJECT.begin(9600);
is_initialized = true;
}
DEBUG_SERIAL_OBJECT.println(s);
DEBUG_SERIAL_OBJECT.print(s);
}

View File

@ -1,81 +1,444 @@
# Micro Speech Example
# Micro Speech example
This examples shows how you can use TensorFlow Lite to run a 20 kilobyte neural
This example shows how you can use TensorFlow Lite to run a 20 kilobyte neural
network model to recognize keywords in speech. It's designed to run on systems
with very small amounts of memory such as microcontrollers and DSPs. The code
itself also has a small footprint (for example around 22 kilobytes on a Cortex
with very small amounts of memory such as microcontrollers and DSPs.
The example application listens to its surroundings with a microphone and
indicates when it has detected a word by lighting an LED or displaying data on a
screen, depending on the capabilities of the device.
The code has a small footprint (for example around 22 kilobytes on a Cortex
M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to
run on systems like an STM32F103 with only 20 kilobytes of total SRAM and 64
kilobytes of Flash.
## Table of Contents
## Table of contents
- [Getting Started](#getting-started)
- [Getting Started on a Microcontroller](#getting-started-on-a-microcontroller)
- [Calculating the Input to the Neural Network](#calculating-the-input-to-the-neural-network)
- [Creating Your Own Model](#creating-your-own-model)
- [Getting started](#getting-started)
- [Run on macOS](#run-on-macos)
- [Deploy to Arduino](#deploy-to-arduino)
- [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge)
- [Deploy to STM32F746](#deploy-to-STM32F746)
- [Calculating the input to the neural network](#calculating-the-input-to-the-neural-network)
- [Create your own model](#create-your-own-model)
## Getting Started
To compile and test this example on a desktop Linux or MacOS machine, download
## Getting started
This code has been tested on the following devices:
* [SparkFun Edge](https://sparkfun.com/products/15170)
* [Arduino Nano 33 BLE Sense](https://store.arduino.cc/usa/nano-33-ble-sense-with-headers)
* [ST Microelectronics STM32F746G Discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/)
This readme contains instructions for building the code on Linux and macOS, and
deploying the code to the above microcontroller platforms and macOS.
### Build the tests
To compile and test this example on a desktop Linux or macOS machine, download
[the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd`
into the source directory from a terminal, and then run the following command:
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile
make -f tensorflow/lite/experimental/micro/tools/make/Makefile test_micro_speech_test
```
This will take a few minutes, and downloads frameworks the code uses like
[CMSIS](https://developer.arm.com/embedded/cmsis) and
[flatbuffers](https://google.github.io/flatbuffers/). Once that process has
finished, run:
finished, you should see a series of files get compiled, followed by some
logging output from a test, which should conclude with `~~~ALL TESTS PASSED~~~`.
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile test_micro_speech
```
If you see this, it means that a small program has been built and run that loads
the trained TensorFlow model, runs some example inputs through it, and got the
expected outputs.
You should see a series of files get compiled, followed by some logging output
from a test, which should conclude with `~~~ALL TESTS PASSED~~~`. If you see
this, it means that a small program has been built and run that loads a trained
TensorFlow model, runs some example inputs through it, and got the expected
outputs. This particular test runs spectrograms generated from recordings of
people saying "Yes" and "No", and checks that the network correctly identifies
them.
To understand how TensorFlow Lite does this, you can look at the `TestInvoke()`
function in
[micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc).
It's a fairly small amount of code, creating an interpreter, getting a handle to
a model that's been compiled into the program, and then invoking the interpreter
To understand how TensorFlow Lite does this, you can look at the source in
[hello_world_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc).
It's a fairly small amount of code that creates an interpreter, gets a handle to
a model that's been compiled into the program, and then invokes the interpreter
with the model and sample inputs.
## Getting Started on a Microcontroller
### Run on macOS
Once you have downloaded the dependencies and got the x86/Linux build working,
you can try building a version for the STM32F103 'bluepill' device. The
following command will build the test and then run it on an emulator, assuming
you have Docker installed:
The example contains an audio provider compatible with macOS. If you have access
to a Mac, you can run the example on your development machine.
*On Mac OS you need to have ARM compiler installed, one way of doing so is with
brew: brew install caskroom/cask/gcc-arm-embedded*
First, use the following command to build it:
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test_micro_speech
make -f tensorflow/lite/experimental/micro/tools/make/Makefile micro_speech
```
If you have a real device
[(see here for how to set one up)](https://github.com/google/stm32_bare_lib/tree/master/README.md)
you can then convert the ELF file into a a `.bin` format executable to load onto
it by running:
Once the build completes, you can run the example with the following command:
```
arm-none-eabi-objcopy \
tensorflow/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test \
tensorflow/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test.bin \
--output binary
tensorflow/lite/experimental/micro/tools/make/gen/osx_x86_64/bin/micro_speech
```
## Calculating the Input to the Neural Network
You might see a pop-up asking for microphone access. If so, grant it, and the
program will start.
Try saying "yes" and "no". You should see output that looks like the following:
```
Heard yes (201) @4056ms
Heard no (205) @6448ms
Heard unknown (201) @13696ms
Heard yes (205) @15000ms
Heard yes (205) @16856ms
Heard unknown (204) @18704ms
Heard no (206) @21000ms
```
The number after each detected word is its score. By default, the recognize
commands component only considers matches as valid if their score is over 200,
so all of the scores you see will be at least 200.
The number after the score is the number of milliseconds since the program was
started.
If you don't see any output, make sure your Mac's internal microphone is
selected in the Mac's *Sound* menu, and that its input volume is turned up high
enough.
## Deploy to Arduino
The following instructions will help you build and deploy this sample
to [Arduino](https://www.arduino.cc/) devices.
The sample has been tested with the following devices:
- [Arduino Nano 33 BLE Sense](https://store.arduino.cc/usa/nano-33-ble-sense-with-headers)
(this ).
The Arduino Nano 33 BLE Sense is currently the only Arduino with a built-in
microphone. If you're using a different Arduino board and attaching your own
microphone, you'll need to implement your own +audio_provider.cc+. It also has a
built-in LED, which is used to indicate that a word has been recognized.
### Obtain and import the library
To use this sample application with Arduino, we've created an Arduino library
that includes it as an example that you can open in the Arduino IDE.
Download the current nightly build of the library: [micro_speech.zip](https://storage.googleapis.com/tensorflow-nightly/github/tensorflow/tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/micro_speech/micro_speech.zip)
Next, import this zip file into the Arduino IDE by going to
`Sketch -> Include Library -> Add .ZIP Library...`.
#### Build the library
If you need to build the library from source (for example, if you're making
modifications to the code), run this command to generate a zip file containing
the required source files:
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=arduino TAGS="" generate_micro_speech_arduino_library_zip
```
A zip file will be created at the following location:
```
tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/micro_speech/micro_speech.zip
```
You can then import this zip file into the Arduino IDE by going to
`Sketch -> Include Library -> Add .ZIP Library...`.
### Load and run the example
Once the library has been added, go to `File -> Examples`. You should see an
example near the bottom of the list named `TensorFlowLite:micro_speech`. Select
it and click `micro_speech` to load the example.
Use the Arduino IDE to build and upload the example. Once it is running, you
should see the built-in LED on your device flashing. Saying the word "yes" will
cause the LED to remain on for 3 seconds. The current model has fairly low
accuracy, so you may have to repeat "yes" a few times.
The program also outputs inference results to the serial port, which appear as
follows:
```
Heard yes (201) @4056ms
Heard no (205) @6448ms
Heard unknown (201) @13696ms
Heard yes (205) @15000ms
```
The number after each detected word is its score. By default, the program only
considers matches as valid if their score is over 200, so all of the scores you
see will be at least 200.
When the program is run, it waits 5 seconds for a USB-serial connection to be
available. If there is no connection available, it will not output data. To see
the serial output in the Arduino desktop IDE, do the following:
1. Open the Arduino IDE
1. Connect the Arduino board to your computer via USB
1. Press the reset button on the Arduino board
1. Within 5 seconds, go to `Tools -> Serial Monitor` in the Arduino IDE. You may
have to try several times, since the board will take a moment to connect.
If you don't see any output, repeat the process again.
## Deploy to SparkFun Edge
The following instructions will help you build and deploy this sample on the
[SparkFun Edge development board](https://sparkfun.com/products/15170).
The program will toggle the blue LED on and off with each inference. It will
switch on the yellow LED when a "yes" is heard, the red LED when a "no" is
heard, and the green LED when an unknown command is heard.
The [AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow)
walks through the deployment process in detail. The steps are also
summarized below.
### Compile the binary
The following command will download the required dependencies and then compile a
binary for the SparkFun Edge:
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=sparkfun_edge micro_speech_bin
```
The binary will be created in the following location:
```
tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/micro_speech.bin
```
### Sign the binary
The binary must be signed with cryptographic keys to be deployed to the device.
We'll now run some commands that will sign our binary so it can be flashed to
the SparkFun Edge. The scripts we are using come from the Ambiq SDK, which is
downloaded when the `Makefile` is run.
Enter the following command to set up some dummy cryptographic keys we can use
for development:
```
cp tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info0.py \
tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info.py
```
Next, run the following command to create a signed binary:
```
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_image_blob.py \
--bin tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/micro_speech.bin \
--load-address 0xC000 \
--magic-num 0xCB \
-o main_nonsecure_ota \
--version 0x0
```
This will create the file `main_nonsecure_ota.bin`. We'll now run another
command to create a final version of the file that can be used to flash our
device with the bootloader script we will use in the next step:
```
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_wireupdate_blob.py \
--load-address 0x20000 \
--bin main_nonsecure_ota.bin \
-i 6 \
-o main_nonsecure_wire \
--options 0x1
```
You should now have a file called `main_nonsecure_wire.bin` in the directory
where you ran the commands. This is the file we'll be flashing to the device.
### Flash the binary
Next, attach the board to your computer via a USB-to-serial adapter.
**Note:** If you're using the [SparkFun Serial Basic Breakout](https://www.sparkfun.com/products/15096),
you should [install the latest drivers](https://learn.sparkfun.com/tutorials/sparkfun-serial-basic-ch340c-hookup-guide#drivers-if-you-need-them)
before you continue.
Once connected, assign the USB device name to an environment variable:
```
export DEVICENAME=put your device name here
```
Set another variable with the baud rate:
```
export BAUD_RATE=921600
```
Now, hold the button marked `14` on the device. While still holding the button,
hit the button marked `RST`. Continue holding the button marked `14` while
running the following command:
```
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/uart_wired_update.py \
-b ${BAUD_RATE} ${DEVICENAME} \
-r 1 \
-f main_nonsecure_wire.bin \
-i 6
```
You should see a long stream of output as the binary is flashed to the device.
Once you see the following lines, flashing is complete:
```
Sending Reset Command.
Done.
```
If you don't see these lines, flashing may have failed. Try running through the
steps in [Flash the binary](#flash-the-binary) again (you can skip over setting
the environment variables). If you continue to run into problems, follow the
[AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow)
codelab, which includes more comprehensive instructions for the flashing
process.
The binary should now be deployed to the device. Hit the button marked `RST` to
reboot the board.
You should see the device's blue LED flashing. The yellow LED should light when
a "yes" is heard, the red LED when a "no" is heard, and the green LED when an
unknown command is heard. The current model has fairly low accuracy, so you may
have to repeat "yes" a few times.
Debug information is logged by the board while the program is running. To view
it, establish a serial connection to the board using a baud rate of `115200`.
On OSX and Linux, the following command should work:
```
screen ${DEVICENAME} 115200
```
You will see a line output for every word that is detected:
```
Heard yes (201) @4056ms
Heard no (205) @6448ms
Heard unknown (201) @13696ms
Heard yes (205) @15000ms
```
The number after each detected word is its score. By default, the program only
considers matches as valid if their score is over 200, so all of the scores you
see will be at least 200.
To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately
followed by the `K` key, then hit the `Y` key.
## Deploy to STM32F746
The following instructions will help you build and deploy the sample to the
[STM32F7 discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/)
using [ARM Mbed](https://github.com/ARMmbed/mbed-cli).
Before we begin, you'll need the following:
- STM32F7 discovery kit board
- Mini-USB cable
- ARM Mbed CLI ([installation instructions](https://os.mbed.com/docs/mbed-os/v5.12/tools/installation-and-setup.html))
- Python 2.7 and pip
Since Mbed requires a special folder structure for projects, we'll first run a
command to generate a subfolder containing the required source files in this
structure:
```
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=mbed TAGS="CMSIS disco_f746ng" generate_micro_speech_mbed_project
```
This will result in the creation of a new folder:
```
tensorflow/lite/experimental/micro/tools/make/gen/mbed_cortex-m4/prj/hello_world/mbed
```
This folder contains all of the example's dependencies structured in the correct
way for Mbed to be able to build it.
Change into the directory and run the following commands, making sure you are
using Python 2.7.15.
First, tell Mbed that the current directory is the root of an Mbed project:
```
mbed config root .
```
Next, tell Mbed to download the dependencies and prepare to build:
```
mbed deploy
```
By default, Mbed will build the project using C++98. However, TensorFlow Lite
requires C++11. Run the following Python snippet to modify the Mbed
configuration files so that it uses C++11:
```
python -c 'import fileinput, glob;
for filename in glob.glob("mbed-os/tools/profiles/*.json"):
for line in fileinput.input(filename, inplace=True):
print line.replace("\"-std=gnu++98\"","\"-std=c++11\", \"-fpermissive\"")'
```
Finally, run the following command to compile:
```
mbed compile -m DISCO_F746NG -t GCC_ARM
```
This should result in a binary at the following path:
```
./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin
```
To deploy, plug in your STM board and copy the file to it. On macOS, you can do
this with the following command:
```
cp ./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin /Volumes/DIS_F746NG/
```
Copying the file will initiate the flashing process.
The inference results are logged by the board while the program is running.
To view it, establish a serial connection to the board
using a baud rate of `9600`. On OSX and Linux, the following command should
work, replacing `/dev/tty.devicename` with the name of your device as it appears
in `/dev`:
```
screen /dev/tty.devicename 9600
```
You will see a line output for every word that is detected:
```
Heard yes (201) @4056ms
Heard no (205) @6448ms
Heard unknown (201) @13696ms
Heard yes (205) @15000ms
```
The number after each detected word is its score. By default, the program only
considers matches as valid if their score is over 200, so all of the scores you
see will be at least 200.
To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately
followed by the `K` key, then hit the `Y` key.
## Calculating the input to the neural network
The TensorFlow Lite model doesn't take in raw audio sample data. Instead it
works with spectrograms, which are two dimensional arrays that are made up of
@ -88,54 +451,91 @@ yet included in this sample code.
The recipe for creating the spectrogram data is that each frequency slice is
created by running an FFT across a 30ms section of the audio sample data. The
input samples are treated as being between -1 and +1 as real values (encoded as
-32,768 and 32,767 in 16-bit signed integer samples). This results in an FFT
with 256 entries. Every sequence of six entries is averaged together, giving a
total of 43 frequency buckets in the final slice. The results are stored as
unsigned eight-bit values, where 0 represents a real number of zero, and 255
represents 127.5 as a real number. Each adjacent frequency entry is stored in
ascending memory order (frequency bucket 0 at data[0], bucket 1 at data [1],
etc). The window for the frequency analysis is then moved forward by 20ms, and
the process repeated, storing the results in the next memory row (for example
bucket 0 in this moved window would be in data[43 + 0], etc). This process
happens 49 times in total, producing a single channel image that is 43 pixels
wide, and 49 rows high. Here's an illustration of the process:
-32,768 and 32,767 in 16-bit signed integer samples).
This results in an FFT with 256 entries. Every sequence of six entries is
averaged together, giving a total of 43 frequency buckets in the final slice.
The results are stored as unsigned eight-bit values, where 0 represents a real
number of zero, and 255 represents 127.5 as a real number.
Each adjacent frequency entry is stored in ascending memory order (frequency
bucket 0 at data[0], bucket 1 at data [1], etc). The window for the frequency
analysis is then moved forward by 20ms, and the process repeated, storing the
results in the next memory row (for example bucket 0 in this moved window would
be in data[43 + 0], etc). This process happens 49 times in total, producing a
single channel image that is 43 pixels wide, and 49 rows high.
Here's an illustration of the process:
![spectrogram diagram](https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram_diagram.png)
The test data files have been generated by running the following commands:
The test data files have been generated by running the following commands. See
the training instructions below to learn how to set up the environment to run
them.
```
bazel run tensorflow/examples/speech_commands:wav_to_features -- \
--input_wav=${HOME}/speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav \
--output_c_file=yes_features_data.cc \
python tensorflow/tensorflow/examples/speech_commands/wav_to_features.py \
--input_wav=/content/speech_dataset/yes/f2e59fea_nohash_1.wav \
--output_c_file=/content/yes_features_data.cc \
--window_stride=20 --preprocess=average --quantize=1
bazel run tensorflow/examples/speech_commands:wav_to_features -- \
--input_wav=${HOME}/speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav \
--output_c_file=no_features_data.cc \
python tensorflow/tensorflow/examples/speech_commands/wav_to_features.py \
--input_wav=/content/speech_dataset/no/f9643d42_nohash_4.wav \
--output_c_file=/content/no_features_data.cc \
--window_stride=20 --preprocess=average --quantize=1
```
## Creating Your Own Model
## Train your own model
The neural network model used in this example was built using the
[TensorFlow speech commands tutorial](https://www.tensorflow.org/tutorials/sequences/audio_recognition).
You can retrain it to recognize any combination of words from this list:
If you would like to create your own, you can start by training a model with the
following commands. Note that this will begin a full build of TensorFlow from
source; it is not currently possible to use the TensorFlow pip package. Due to
the complexity of setting up a build environment, it's easiest to run these
commands in a
```
yes
no
up
down
left
right
on
off
stop
go
```
### Use Google Colaboratory
The easiest way to train your own speech model is by running [`train_speech_model.ipynb`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb)
in Google Colaboratory. This avoids the need to install dependencies, and allows
the use of GPUs for training. Total training time will be 1.5-2hrs.
We strongly recommend trying this approach first.
### Use your local machine
You can use the following commands to train the model on your own machine.
It may be easiest to run these commands in a
[TensorFlow Docker container](https://www.tensorflow.org/install/docker). A full
build may take a couple of hours.
You must currently use the TensorFlow Nightly `pip` package. This version is
confirmed to work:
```
tf-nightly-gpu==1.15.0.dev20190729
```
To begin training, run the following:
```
bazel run -c opt --copt=-mavx2 --copt=-mfma \
tensorflow/examples/speech_commands:train -- \
python tensorflow/tensorflow/examples/speech_commands/train.py \
--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 \
--quantize=1 --verbosity=WARN --how_many_training_steps="15000,3000" \
--learning_rate="0.001,0.0001" --summaries_dir=/tmp/retrain_logs \
--data_dir=/tmp/speech_dataset --train_dir=/tmp/speech_commands_train
```
If you see a compiling error on older machines, try leaving out the `--copt`
@ -144,7 +544,7 @@ extensions. The training process is likely to take a couple of hours. Once it
has completed, the next step is to freeze the variables:
```
bazel run tensorflow/examples/speech_commands:freeze -- \
python tensorflow/tensorflow/examples/speech_commands/freeze.py \
--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \
--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb \
--start_checkpoint=/tmp/speech_commands_train/tiny_conv.ckpt-18000
@ -153,10 +553,10 @@ bazel run tensorflow/examples/speech_commands:freeze -- \
The next step is to create a TensorFlow Lite file from the frozen graph:
```
bazel run tensorflow/lite/toco:toco -- \
--input_file=/tmp/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \
--input_shapes=1,49,40,1 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \
--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_values=9.8077
toco \
--graph_def_file=/content/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \
--input_shapes=1,1960 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \
--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_dev_values=9.8077
```
Finally, convert the file into a C source file that can be compiled into an
@ -166,45 +566,7 @@ embedded system:
xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_micro_features_model_data.cc
```
Next, we need to update `tiny_conv_micro_features_model_data.cc` so that it is
compatible with the `micro_features` sample code.
First, open the file. The top two lines should look approximately as follows
(the exact hex values may be different):
```cpp
unsigned char _tmp_tiny_conv_tflite[] = {
0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
```
You need to add the include from the following snippet, and tweak the variable
declaration. Dont change the hex values, though:
```cpp
#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h"
const unsigned char g_tiny_conv_micro_features_model_data[] = {
0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
```
Next, go to the very bottom of the file and find the variable named
`_tmp_tiny_conv_tflite_len`.
```cpp
unsigned int _tmp_tiny_conv_tflite_len = 19800;
```
Change the declaration as follows, but do not change the number assigned to it,
even if your number is different from the one in this guide.
```cpp
const int g_tiny_conv_micro_features_model_data_len = 19800;
```
Finally, save the file, then copy the `tiny_conv_micro_features_model_data.cc`
file into the `micro_features/` subdirectory of your `tf_microspeech/` project.
### Creating Your Own Model With Google Cloud
### Use Google Cloud
If want to train your model in Google Cloud you can do so by using
pre-configured Deep Learning images.
@ -231,28 +593,8 @@ As soon as instance has been created you can SSH to it(as a jupyter user!):
gcloud compute ssh "jupyter@${INSTANCE_NAME}"
```
now install Bazel:
```
wget https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-installer-linux-x86_64.sh
sudo bash ./bazel-0.15.0-installer-linux-x86_64.sh
source /usr/local/lib/bazel/bin/bazel-complete.bash
sudo ln /usr/local/bin/bazel /usr/bin/bazel
```
and finally run the build:
```
# TensorFlow already pre-baked on the image
cd src/tensorflow
bazel run -c opt --copt=-mavx2 --copt=-mfma \
tensorflow/examples/speech_commands:train -- \
--model_architecture=tiny_conv --window_stride=20 --preprocess=average \
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1
```
After build is over follow the rest of the instructions from this tutorial. And
finally do not forget to remove the instance when training is done:
Finally, follow the instructions in the previous section to train the model. Do
not forget to remove the instance when training is done:
```
gcloud compute instances delete "${INSTANCE_NAME}" --zone="${ZONE}"

View File

@ -0,0 +1,118 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
#include "PDM.h"
#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
namespace {
bool g_is_audio_initialized = false;
// An internal buffer able to fit 16x our sample size
constexpr int kAudioCaptureBufferSize = DEFAULT_PDM_BUFFER_SIZE * 16;
int16_t g_audio_capture_buffer[kAudioCaptureBufferSize];
// A buffer that holds our output
int16_t g_audio_output_buffer[kMaxAudioSampleSize];
// Mark as volatile so we can check in a while loop to see if
// any samples have arrived yet.
volatile int32_t g_latest_audio_timestamp = 0;
} // namespace
void CaptureSamples() {
// This is how many bytes of new data we have each time this is called
const int number_of_samples = DEFAULT_PDM_BUFFER_SIZE;
// Calculate what timestamp the last audio sample represents
const int32_t time_in_ms =
g_latest_audio_timestamp +
(number_of_samples / (kAudioSampleFrequency / 1000));
// Determine the index, in the history of all samples, of the last sample
const int32_t start_sample_offset =
g_latest_audio_timestamp * (kAudioSampleFrequency / 1000);
// Determine the index of this sample in our ring buffer
const int capture_index = start_sample_offset % kAudioCaptureBufferSize;
// Read the data to the correct place in our buffer
PDM.read(g_audio_capture_buffer + capture_index, DEFAULT_PDM_BUFFER_SIZE);
// This is how we let the outside world know that new audio data has arrived.
g_latest_audio_timestamp = time_in_ms;
}
TfLiteStatus InitAudioRecording(tflite::ErrorReporter* error_reporter) {
// Hook up the callback that will be called with each sample
PDM.onReceive(CaptureSamples);
// Start listening for audio: MONO @ 16KHz with gain at 20
PDM.begin(1, kAudioSampleFrequency);
PDM.setGain(20);
// Block until we have our first audio sample
while (!g_latest_audio_timestamp) {
}
return kTfLiteOk;
}
TfLiteStatus GetAudioSamples(tflite::ErrorReporter* error_reporter,
int start_ms, int duration_ms,
int* audio_samples_size, int16_t** audio_samples) {
// Set everything up to start receiving audio
if (!g_is_audio_initialized) {
TfLiteStatus init_status = InitAudioRecording(error_reporter);
if (init_status != kTfLiteOk) {
return init_status;
}
g_is_audio_initialized = true;
}
// This next part should only be called when the main thread notices that the
// latest audio sample data timestamp has changed, so that there's new data
// in the capture ring buffer. The ring buffer will eventually wrap around and
// overwrite the data, but the assumption is that the main thread is checking
// often enough and the buffer is large enough that this call will be made
// before that happens.
// Determine the index, in the history of all samples, of the first
// sample we want
const int start_offset = start_ms * (kAudioSampleFrequency / 1000);
// Determine how many samples we want in total
const int duration_sample_count =
duration_ms * (kAudioSampleFrequency / 1000);
for (int i = 0; i < duration_sample_count; ++i) {
// For each sample, transform its index in the history of all samples into
// its index in g_audio_capture_buffer
const int capture_index = (start_offset + i) % kAudioCaptureBufferSize;
// Write the sample to the output buffer
g_audio_output_buffer[i] = g_audio_capture_buffer[capture_index];
}
// Set pointers to provide access to the audio
*audio_samples_size = kMaxAudioSampleSize;
*audio_samples = g_audio_output_buffer;
return kTfLiteOk;
}
int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }

View File

@ -0,0 +1,61 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/micro/examples/micro_speech/command_responder.h"
#include "Arduino.h"
// Toggles the LED every inference, and keeps it on for ~2 seconds if a "yes"
// was heard
void RespondToCommand(tflite::ErrorReporter* error_reporter,
int32_t current_time, const char* found_command,
uint8_t score, bool is_new_command) {
static bool is_initialized = false;
if (!is_initialized) {
pinMode(LED_BUILTIN, OUTPUT);
is_initialized = true;
}
static int32_t last_yes_time = 0;
static int count = 0;
if (is_new_command) {
error_reporter->Report("Heard %s (%d) @%dms", found_command, score,
current_time);
// If we heard a "yes", switch on an LED and store the time.
if (found_command[0] == 'y') {
last_yes_time = current_time;
digitalWrite(LED_BUILTIN, HIGH);
}
}
// If last_yes_time is non-zero but was >3 seconds ago, zero it
// and switch off the LED.
if (last_yes_time != 0) {
if (last_yes_time < (current_time - 3000)) {
last_yes_time = 0;
digitalWrite(LED_BUILTIN, LOW);
}
// If it is non-zero but <3 seconds ago, do nothing.
return;
}
// Otherwise, toggle the LED every time an inference is performed.
++count;
if (count & 1) {
digitalWrite(LED_BUILTIN, HIGH);
} else {
digitalWrite(LED_BUILTIN, LOW);
}
}

View File

@ -95,8 +95,10 @@ TfLiteStatus FeatureProvider::PopulateFeatureData(
const int32_t slice_start_ms = (new_step * kFeatureSliceStrideMs);
int16_t* audio_samples = nullptr;
int audio_samples_size = 0;
GetAudioSamples(error_reporter, slice_start_ms, kFeatureSliceDurationMs,
&audio_samples_size, &audio_samples);
// TODO(petewarden): Fix bug that leads to non-zero slice_start_ms
GetAudioSamples(error_reporter, (slice_start_ms > 0 ? slice_start_ms : 0),
kFeatureSliceDurationMs, &audio_samples_size,
&audio_samples);
if (audio_samples_size < kMaxAudioSampleSize) {
error_reporter->Report("Audio data size %d too small, want %d",
audio_samples_size, kMaxAudioSampleSize);

View File

@ -0,0 +1,293 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "speech_commands training",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "pO4-CY_TCZZS",
"colab_type": "text"
},
"source": [
"# Train a Simple Audio Recognition model for microcontroller use"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BaFfr7DHRmGF",
"colab_type": "text"
},
"source": [
"This notebook demonstrates how to train a 20kb [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model for [TensorFlow Lite for Microcontrollers](https://tensorflow.org/lite/microcontrollers/overview). It will produce the same model used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech) example application.\n",
"\n",
"The model is designed to be used with [Google Colaboratory](https://colab.research.google.com).\n",
"\n",
"<table class=\\\"tfo-notebook-buttons\\\" align=\\\"left\\\">\n",
" <td>\n",
" <a target=\\\"_blank\\\" href=\\\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb\\\"><img src=\\\"https://www.tensorflow.org/images/colab_logo_32px.png\\\" />Run in Google Colab</a>\n",
" </td>\n",
" <td>\n",
" <a target=\\\"_blank\\\" href=\\\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb\\\"><img src=\\\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\\\" />View source on GitHub</a>\n",
" </td>\n",
"</table>\n",
"\n",
"\n",
"\n",
"The notebook runs Python scripts to train and freeze the model, and uses the TensorFlow Lite converter to convert it for use with TensorFlow Lite for Microcontrollers.\n",
"\n",
"**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and selecting **GPU**. Training 18,000 iterations will take 1.5-2 hours on a GPU runtime.\n",
"\n",
"## Configure training\n",
"\n",
"The following `os.environ` lines can be customized to set the words that will be trained for, and the steps and learning rate of the training. The default values will result in the same model that is used in the micro_speech example. Run the cell to set the configuration:"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ludfxbNIaegy",
"colab_type": "code",
"colab": {}
},
"source": [
"import os\n",
"\n",
"# A comma-delimited list of the words you want to train for.\n",
"# The options are: yes,no,up,down,left,right,on,off,stop,go\n",
"# All other words will be used to train an \"unknown\" category.\n",
"os.environ[\"WANTED_WORDS\"] = \"yes,no\"\n",
"\n",
"# The number of steps and learning rates can be specified as comma-separated\n",
"# lists to define the rate at each stage. For example,\n",
"# TRAINING_STEPS=15000,3000 and LEARNING_RATE=0.001,0.0001\n",
"# will run 18,000 training loops in total, with a rate of 0.001 for the first\n",
"# 15,000, and 0.0001 for the final 3,000.\n",
"os.environ[\"TRAINING_STEPS\"]=\"15000,3000\"\n",
"os.environ[\"LEARNING_RATE\"]=\"0.001,0.0001\"\n",
"\n",
"# Calculate the total number of steps, which is used to identify the checkpoint\n",
"# file name.\n",
"total_steps = sum(map(lambda string: int(string),\n",
" os.environ[\"TRAINING_STEPS\"].split(\",\")))\n",
"os.environ[\"TOTAL_STEPS\"] = str(total_steps)\n",
"\n",
"# Print the configuration to confirm it\n",
"!echo \"Training these words: ${WANTED_WORDS}\"\n",
"!echo \"Training steps in each stage: ${TRAINING_STEPS}\"\n",
"!echo \"Learning rate in each stage: ${LEARNING_RATE}\"\n",
"!echo \"Total number of training steps: ${TOTAL_STEPS}\"\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "gCgeOpvY9pAi",
"colab_type": "text"
},
"source": [
"## Install dependencies\n",
"\n",
"Next, we'll install a GPU build of TensorFlow, so we can use GPU acceleration for training. We also clone the TensorFlow repository, which contains the scripts that train and freeze the model."
]
},
{
"cell_type": "code",
"metadata": {
"id": "Nd1iM1o2ymvA",
"colab_type": "code",
"colab": {}
},
"source": [
"# Install the nightly build\n",
"!pip install -q tf-nightly-gpu==1.15.0.dev20190729\n",
"!git clone https://github.com/dansitu/tensorflow"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "aV_0qkYh98LD",
"colab_type": "text"
},
"source": [
"## Load TensorBoard\n",
"\n",
"Now, set up TensorBoard so that we can graph our accuracy and loss as training proceeds."
]
},
{
"cell_type": "code",
"metadata": {
"id": "yZArmzT85SLq",
"colab_type": "code",
"colab": {}
},
"source": [
"# Delete any old logs from previous runs\n",
"!rm -rf /content/retrain_logs\n",
"# Load TensorBoard\n",
"%load_ext tensorboard\n",
"%tensorboard --logdir /content/retrain_logs"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "x1J96Ron-O4R",
"colab_type": "text"
},
"source": [
"## Begin training\n",
"\n",
"Next, run the following script to begin training. The script will first download the training data:"
]
},
{
"cell_type": "code",
"metadata": {
"id": "VJsEZx6lynbY",
"colab_type": "code",
"colab": {}
},
"source": [
"!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n",
"--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \\\n",
"--wanted_words=${WANTED_WORDS} --silence_percentage=25 --unknown_percentage=25 \\\n",
"--quantize=1 --verbosity=WARN --how_many_training_steps=${TRAINING_STEPS} \\\n",
"--learning_rate=${LEARNING_RATE} --summaries_dir=/content/retrain_logs \\\n",
"--data_dir=/content/speech_dataset --train_dir=/content/speech_commands_train \\\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "XQUJLrdS-ftl",
"colab_type": "text"
},
"source": [
"## Freeze the graph\n",
"\n",
"Once training is complete, run the following cell to freeze the graph."
]
},
{
"cell_type": "code",
"metadata": {
"id": "xyc3_eLh9sAg",
"colab_type": "code",
"colab": {}
},
"source": [
"!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n",
"--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \\\n",
"--wanted_words=${WANTED_WORDS} --quantize=1 --output_file=/content/tiny_conv.pb \\\n",
"--start_checkpoint=/content/speech_commands_train/tiny_conv.ckpt-${TOTAL_STEPS}"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "_DBGDxVI-nKG",
"colab_type": "text"
},
"source": [
"## Convert the model\n",
"\n",
"Run this cell to use the TensorFlow Lite converter to convert the frozen graph into the TensorFlow Lite format, fully quantized for use with embedded devices."
]
},
{
"cell_type": "code",
"metadata": {
"id": "lBj_AyCh1cC0",
"colab_type": "code",
"colab": {}
},
"source": [
"!toco \\\n",
"--graph_def_file=/content/tiny_conv.pb --output_file=/content/tiny_conv.tflite \\\n",
"--input_shapes=1,1960 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \\\n",
"--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_dev_values=9.8077"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "dt6Zqbxu-wIi",
"colab_type": "text"
},
"source": [
"The following cell will print the model size, which will be under 20 kilobytes."
]
},
{
"cell_type": "code",
"metadata": {
"id": "XohZOTjR8ZyE",
"colab_type": "code",
"colab": {}
},
"source": [
"import os\n",
"model_size = os.path.getsize(\"/content/tiny_conv.tflite\")\n",
"print(\"Model is %d bytes\" % model_size)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "2pQnN0i_-0L2",
"colab_type": "text"
},
"source": [
"Finally, we use xxd to transform the model into a source file that can be included in a C++ project and loaded by TensorFlow Lite for Microcontrollers."
]
},
{
"cell_type": "code",
"metadata": {
"id": "eoYyh0VU8pca",
"colab_type": "code",
"colab": {}
},
"source": [
"# Install xxd if it is not available\n",
"!apt-get -qq install xxd\n",
"# Save the file as a C source file\n",
"!xxd -i /content/tiny_conv.tflite > /content/tiny_conv.cc\n",
"# Print the source file\n",
"!cat /content/tiny_conv.cc"
],
"execution_count": 0,
"outputs": []
}
]
}

View File

@ -25,4 +25,4 @@ curl -L -O "https://downloads.arduino.cc/arduino-cli/arduino-cli-latest-linux64.
tar xjf arduino-cli-latest-linux64.tar.bz2
/tmp/arduino-cli core update-index
/tmp/arduino-cli core install arduino:sam
/tmp/arduino-cli core install arduino:mbed

View File

@ -23,15 +23,22 @@ set -e
ARDUINO_HOME_DIR=${HOME}/Arduino
ARDUINO_LIBRARIES_DIR=${ARDUINO_HOME_DIR}/libraries
ARDUINO_CLI_TOOL=/tmp/arduino-cli
# Necessary due to bug in arduino-cli that allows it to build files in pwd
TEMP_BUILD_DIR=/tmp/tflite-arduino-build
LIBRARY_ZIP=${1}
rm -rf ${ARDUINO_LIBRARIES_DIR}
rm -rf ${TEMP_BUILD_DIR}
mkdir -p ${ARDUINO_HOME_DIR}/libraries
mkdir -p ${TEMP_BUILD_DIR}
unzip -q ${LIBRARY_ZIP} -d ${ARDUINO_LIBRARIES_DIR}
# Change into this dir before running the tests
cd ${TEMP_BUILD_DIR}
for f in ${ARDUINO_LIBRARIES_DIR}/*/examples/*/*.ino; do
${ARDUINO_CLI_TOOL} compile --fqbn arduino:sam:arduino_due_x $f
${ARDUINO_CLI_TOOL} compile --fqbn arduino:mbed:nano33ble $f
done

View File

@ -137,7 +137,7 @@ $(PRJDIR)$(2)/arduino/src/third_party/kissfft/kiss_fft.h: tensorflow/lite/experi
@mkdir -p $$(dir $$@)
@python tensorflow/lite/experimental/micro/tools/make/transform_arduino_source.py \
--third_party_headers="$(4)" < $$< | \
sed -E 's/<string.h>/<string.h>\n#include <stdint.h>/g' > $$@
sed -E 's@#include <string.h>@//#include <string.h> /* Patched by helper_functions.inc for Arduino compatibility */@g' > $$@
$(PRJDIR)$(2)/arduino/%: tensorflow/lite/experimental/micro/tools/make/templates/%
@mkdir -p $$(dir $$@)

View File

@ -18,11 +18,25 @@ limitations under the License.
// Include an empty header so that Arduino knows to build the TF Lite library.
#include <TensorFlowLite.h>
// TensorFlow Lite defines its own main function
extern int tflite_micro_main(int argc, char* argv[]);
// So the example works with or without a serial connection,
// wait to see one for 5 seconds before giving up.
void waitForSerial() {
int start = millis();
while(!Serial) {
int diff = millis() - start;
if (diff > 5000) break;
}
}
// Runs once when the program starts
void setup() {
waitForSerial();
tflite_micro_main(0, NULL);
}
// Leave the loop unused
void loop() {
}