Port TensorFlow Lite micro_speech example to Arduino and add Colab for training
PiperOrigin-RevId: 261360597
This commit is contained in:
parent
4deb12d63f
commit
91bcbd6922
@ -1,4 +1,4 @@
|
||||
# Speech Commands Example
|
||||
|
||||
This is a basic speech recognition example. For more information, see the
|
||||
tutorial at https://www.tensorflow.org/versions/master/tutorials/audio_recognition.
|
||||
tutorial at https://www.tensorflow.org/tutorials/sequences/audio_recognition.
|
||||
|
@ -86,8 +86,8 @@ FLAGS = None
|
||||
|
||||
|
||||
def main(_):
|
||||
# We want to see all the logging messages for this tutorial.
|
||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
|
||||
# Set the verbosity based on flags (default is INFO, so we see all messages)
|
||||
tf.compat.v1.logging.set_verbosity(FLAGS.verbosity)
|
||||
|
||||
# Start a new TensorFlow session.
|
||||
sess = tf.compat.v1.InteractiveSession()
|
||||
@ -454,5 +454,33 @@ if __name__ == '__main__':
|
||||
default='mfcc',
|
||||
help='Spectrogram processing mode. Can be "mfcc", "average", or "micro"')
|
||||
|
||||
# Function used to parse --verbosity argument
|
||||
def verbosity_arg(value):
|
||||
"""Parses verbosity argument.
|
||||
|
||||
Args:
|
||||
value: A member of tf.logging.
|
||||
Raises:
|
||||
ArgumentTypeError: Not an expected value.
|
||||
"""
|
||||
value = value.upper()
|
||||
if value == 'INFO':
|
||||
return tf.compat.v1.logging.INFO
|
||||
elif value == 'DEBUG':
|
||||
return tf.compat.v1.logging.DEBUG
|
||||
elif value == 'ERROR':
|
||||
return tf.compat.v1.logging.ERROR
|
||||
elif value == 'FATAL':
|
||||
return tf.compat.v1.logging.FATAL
|
||||
elif value == 'WARN':
|
||||
return tf.compat.v1.logging.WARN
|
||||
else:
|
||||
raise argparse.ArgumentTypeError('Not an expected value')
|
||||
parser.add_argument(
|
||||
'--verbosity',
|
||||
type=verbosity_arg,
|
||||
default=tf.compat.v1.logging.INFO,
|
||||
help='Log verbosity. Can be "INFO", "DEBUG", "ERROR", "FATAL", or "WARN"')
|
||||
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
|
@ -100,6 +100,7 @@ class TrainTest(test.TestCase):
|
||||
'background_frequency': 0.8,
|
||||
'eval_step_interval': 1,
|
||||
'save_step_interval': 1,
|
||||
'verbosity': tf.compat.v1.logging.INFO
|
||||
}
|
||||
return DictStruct(**flags)
|
||||
|
||||
|
@ -34,5 +34,5 @@ extern "C" void DebugLog(const char* s) {
|
||||
DEBUG_SERIAL_OBJECT.begin(9600);
|
||||
is_initialized = true;
|
||||
}
|
||||
DEBUG_SERIAL_OBJECT.println(s);
|
||||
DEBUG_SERIAL_OBJECT.print(s);
|
||||
}
|
||||
|
@ -1,81 +1,444 @@
|
||||
# Micro Speech Example
|
||||
# Micro Speech example
|
||||
|
||||
This examples shows how you can use TensorFlow Lite to run a 20 kilobyte neural
|
||||
This example shows how you can use TensorFlow Lite to run a 20 kilobyte neural
|
||||
network model to recognize keywords in speech. It's designed to run on systems
|
||||
with very small amounts of memory such as microcontrollers and DSPs. The code
|
||||
itself also has a small footprint (for example around 22 kilobytes on a Cortex
|
||||
with very small amounts of memory such as microcontrollers and DSPs.
|
||||
|
||||
The example application listens to its surroundings with a microphone and
|
||||
indicates when it has detected a word by lighting an LED or displaying data on a
|
||||
screen, depending on the capabilities of the device.
|
||||
|
||||
The code has a small footprint (for example around 22 kilobytes on a Cortex
|
||||
M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to
|
||||
run on systems like an STM32F103 with only 20 kilobytes of total SRAM and 64
|
||||
kilobytes of Flash.
|
||||
|
||||
## Table of Contents
|
||||
## Table of contents
|
||||
|
||||
- [Getting Started](#getting-started)
|
||||
- [Getting Started on a Microcontroller](#getting-started-on-a-microcontroller)
|
||||
- [Calculating the Input to the Neural Network](#calculating-the-input-to-the-neural-network)
|
||||
- [Creating Your Own Model](#creating-your-own-model)
|
||||
- [Getting started](#getting-started)
|
||||
- [Run on macOS](#run-on-macos)
|
||||
- [Deploy to Arduino](#deploy-to-arduino)
|
||||
- [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge)
|
||||
- [Deploy to STM32F746](#deploy-to-STM32F746)
|
||||
- [Calculating the input to the neural network](#calculating-the-input-to-the-neural-network)
|
||||
- [Create your own model](#create-your-own-model)
|
||||
|
||||
## Getting Started
|
||||
|
||||
To compile and test this example on a desktop Linux or MacOS machine, download
|
||||
## Getting started
|
||||
|
||||
This code has been tested on the following devices:
|
||||
|
||||
* [SparkFun Edge](https://sparkfun.com/products/15170)
|
||||
* [Arduino Nano 33 BLE Sense](https://store.arduino.cc/usa/nano-33-ble-sense-with-headers)
|
||||
* [ST Microelectronics STM32F746G Discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/)
|
||||
|
||||
This readme contains instructions for building the code on Linux and macOS, and
|
||||
deploying the code to the above microcontroller platforms and macOS.
|
||||
|
||||
### Build the tests
|
||||
|
||||
To compile and test this example on a desktop Linux or macOS machine, download
|
||||
[the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd`
|
||||
into the source directory from a terminal, and then run the following command:
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile test_micro_speech_test
|
||||
```
|
||||
|
||||
This will take a few minutes, and downloads frameworks the code uses like
|
||||
[CMSIS](https://developer.arm.com/embedded/cmsis) and
|
||||
[flatbuffers](https://google.github.io/flatbuffers/). Once that process has
|
||||
finished, run:
|
||||
finished, you should see a series of files get compiled, followed by some
|
||||
logging output from a test, which should conclude with `~~~ALL TESTS PASSED~~~`.
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile test_micro_speech
|
||||
```
|
||||
If you see this, it means that a small program has been built and run that loads
|
||||
the trained TensorFlow model, runs some example inputs through it, and got the
|
||||
expected outputs.
|
||||
|
||||
You should see a series of files get compiled, followed by some logging output
|
||||
from a test, which should conclude with `~~~ALL TESTS PASSED~~~`. If you see
|
||||
this, it means that a small program has been built and run that loads a trained
|
||||
TensorFlow model, runs some example inputs through it, and got the expected
|
||||
outputs. This particular test runs spectrograms generated from recordings of
|
||||
people saying "Yes" and "No", and checks that the network correctly identifies
|
||||
them.
|
||||
|
||||
To understand how TensorFlow Lite does this, you can look at the `TestInvoke()`
|
||||
function in
|
||||
[micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc).
|
||||
It's a fairly small amount of code, creating an interpreter, getting a handle to
|
||||
a model that's been compiled into the program, and then invoking the interpreter
|
||||
To understand how TensorFlow Lite does this, you can look at the source in
|
||||
[hello_world_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc).
|
||||
It's a fairly small amount of code that creates an interpreter, gets a handle to
|
||||
a model that's been compiled into the program, and then invokes the interpreter
|
||||
with the model and sample inputs.
|
||||
|
||||
## Getting Started on a Microcontroller
|
||||
### Run on macOS
|
||||
|
||||
Once you have downloaded the dependencies and got the x86/Linux build working,
|
||||
you can try building a version for the STM32F103 'bluepill' device. The
|
||||
following command will build the test and then run it on an emulator, assuming
|
||||
you have Docker installed:
|
||||
The example contains an audio provider compatible with macOS. If you have access
|
||||
to a Mac, you can run the example on your development machine.
|
||||
|
||||
*On Mac OS you need to have ARM compiler installed, one way of doing so is with
|
||||
brew: brew install caskroom/cask/gcc-arm-embedded*
|
||||
First, use the following command to build it:
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test_micro_speech
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile micro_speech
|
||||
```
|
||||
|
||||
If you have a real device
|
||||
[(see here for how to set one up)](https://github.com/google/stm32_bare_lib/tree/master/README.md)
|
||||
you can then convert the ELF file into a a `.bin` format executable to load onto
|
||||
it by running:
|
||||
Once the build completes, you can run the example with the following command:
|
||||
|
||||
```
|
||||
arm-none-eabi-objcopy \
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test \
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test.bin \
|
||||
--output binary
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/osx_x86_64/bin/micro_speech
|
||||
```
|
||||
|
||||
## Calculating the Input to the Neural Network
|
||||
You might see a pop-up asking for microphone access. If so, grant it, and the
|
||||
program will start.
|
||||
|
||||
Try saying "yes" and "no". You should see output that looks like the following:
|
||||
|
||||
```
|
||||
Heard yes (201) @4056ms
|
||||
Heard no (205) @6448ms
|
||||
Heard unknown (201) @13696ms
|
||||
Heard yes (205) @15000ms
|
||||
Heard yes (205) @16856ms
|
||||
Heard unknown (204) @18704ms
|
||||
Heard no (206) @21000ms
|
||||
```
|
||||
|
||||
The number after each detected word is its score. By default, the recognize
|
||||
commands component only considers matches as valid if their score is over 200,
|
||||
so all of the scores you see will be at least 200.
|
||||
|
||||
The number after the score is the number of milliseconds since the program was
|
||||
started.
|
||||
|
||||
If you don't see any output, make sure your Mac's internal microphone is
|
||||
selected in the Mac's *Sound* menu, and that its input volume is turned up high
|
||||
enough.
|
||||
|
||||
## Deploy to Arduino
|
||||
|
||||
The following instructions will help you build and deploy this sample
|
||||
to [Arduino](https://www.arduino.cc/) devices.
|
||||
|
||||
The sample has been tested with the following devices:
|
||||
|
||||
- [Arduino Nano 33 BLE Sense](https://store.arduino.cc/usa/nano-33-ble-sense-with-headers)
|
||||
(this ).
|
||||
|
||||
The Arduino Nano 33 BLE Sense is currently the only Arduino with a built-in
|
||||
microphone. If you're using a different Arduino board and attaching your own
|
||||
microphone, you'll need to implement your own +audio_provider.cc+. It also has a
|
||||
built-in LED, which is used to indicate that a word has been recognized.
|
||||
|
||||
### Obtain and import the library
|
||||
|
||||
To use this sample application with Arduino, we've created an Arduino library
|
||||
that includes it as an example that you can open in the Arduino IDE.
|
||||
|
||||
Download the current nightly build of the library: [micro_speech.zip](https://storage.googleapis.com/tensorflow-nightly/github/tensorflow/tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/micro_speech/micro_speech.zip)
|
||||
|
||||
Next, import this zip file into the Arduino IDE by going to
|
||||
`Sketch -> Include Library -> Add .ZIP Library...`.
|
||||
|
||||
#### Build the library
|
||||
|
||||
If you need to build the library from source (for example, if you're making
|
||||
modifications to the code), run this command to generate a zip file containing
|
||||
the required source files:
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=arduino TAGS="" generate_micro_speech_arduino_library_zip
|
||||
```
|
||||
|
||||
A zip file will be created at the following location:
|
||||
|
||||
```
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/arduino_x86_64/prj/micro_speech/micro_speech.zip
|
||||
```
|
||||
|
||||
You can then import this zip file into the Arduino IDE by going to
|
||||
`Sketch -> Include Library -> Add .ZIP Library...`.
|
||||
|
||||
### Load and run the example
|
||||
|
||||
Once the library has been added, go to `File -> Examples`. You should see an
|
||||
example near the bottom of the list named `TensorFlowLite:micro_speech`. Select
|
||||
it and click `micro_speech` to load the example.
|
||||
|
||||
Use the Arduino IDE to build and upload the example. Once it is running, you
|
||||
should see the built-in LED on your device flashing. Saying the word "yes" will
|
||||
cause the LED to remain on for 3 seconds. The current model has fairly low
|
||||
accuracy, so you may have to repeat "yes" a few times.
|
||||
|
||||
The program also outputs inference results to the serial port, which appear as
|
||||
follows:
|
||||
|
||||
```
|
||||
Heard yes (201) @4056ms
|
||||
Heard no (205) @6448ms
|
||||
Heard unknown (201) @13696ms
|
||||
Heard yes (205) @15000ms
|
||||
```
|
||||
|
||||
The number after each detected word is its score. By default, the program only
|
||||
considers matches as valid if their score is over 200, so all of the scores you
|
||||
see will be at least 200.
|
||||
|
||||
When the program is run, it waits 5 seconds for a USB-serial connection to be
|
||||
available. If there is no connection available, it will not output data. To see
|
||||
the serial output in the Arduino desktop IDE, do the following:
|
||||
|
||||
1. Open the Arduino IDE
|
||||
1. Connect the Arduino board to your computer via USB
|
||||
1. Press the reset button on the Arduino board
|
||||
1. Within 5 seconds, go to `Tools -> Serial Monitor` in the Arduino IDE. You may
|
||||
have to try several times, since the board will take a moment to connect.
|
||||
|
||||
If you don't see any output, repeat the process again.
|
||||
|
||||
## Deploy to SparkFun Edge
|
||||
|
||||
The following instructions will help you build and deploy this sample on the
|
||||
[SparkFun Edge development board](https://sparkfun.com/products/15170).
|
||||
|
||||
The program will toggle the blue LED on and off with each inference. It will
|
||||
switch on the yellow LED when a "yes" is heard, the red LED when a "no" is
|
||||
heard, and the green LED when an unknown command is heard.
|
||||
|
||||
The [AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow)
|
||||
walks through the deployment process in detail. The steps are also
|
||||
summarized below.
|
||||
|
||||
### Compile the binary
|
||||
|
||||
The following command will download the required dependencies and then compile a
|
||||
binary for the SparkFun Edge:
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=sparkfun_edge micro_speech_bin
|
||||
```
|
||||
|
||||
The binary will be created in the following location:
|
||||
|
||||
```
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/micro_speech.bin
|
||||
```
|
||||
|
||||
### Sign the binary
|
||||
|
||||
The binary must be signed with cryptographic keys to be deployed to the device.
|
||||
We'll now run some commands that will sign our binary so it can be flashed to
|
||||
the SparkFun Edge. The scripts we are using come from the Ambiq SDK, which is
|
||||
downloaded when the `Makefile` is run.
|
||||
|
||||
Enter the following command to set up some dummy cryptographic keys we can use
|
||||
for development:
|
||||
|
||||
```
|
||||
cp tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info0.py \
|
||||
tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/keys_info.py
|
||||
```
|
||||
|
||||
Next, run the following command to create a signed binary:
|
||||
|
||||
```
|
||||
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_image_blob.py \
|
||||
--bin tensorflow/lite/experimental/micro/tools/make/gen/sparkfun_edge_cortex-m4/bin/micro_speech.bin \
|
||||
--load-address 0xC000 \
|
||||
--magic-num 0xCB \
|
||||
-o main_nonsecure_ota \
|
||||
--version 0x0
|
||||
```
|
||||
|
||||
This will create the file `main_nonsecure_ota.bin`. We'll now run another
|
||||
command to create a final version of the file that can be used to flash our
|
||||
device with the bootloader script we will use in the next step:
|
||||
|
||||
```
|
||||
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/create_cust_wireupdate_blob.py \
|
||||
--load-address 0x20000 \
|
||||
--bin main_nonsecure_ota.bin \
|
||||
-i 6 \
|
||||
-o main_nonsecure_wire \
|
||||
--options 0x1
|
||||
```
|
||||
|
||||
You should now have a file called `main_nonsecure_wire.bin` in the directory
|
||||
where you ran the commands. This is the file we'll be flashing to the device.
|
||||
|
||||
### Flash the binary
|
||||
|
||||
Next, attach the board to your computer via a USB-to-serial adapter.
|
||||
|
||||
**Note:** If you're using the [SparkFun Serial Basic Breakout](https://www.sparkfun.com/products/15096),
|
||||
you should [install the latest drivers](https://learn.sparkfun.com/tutorials/sparkfun-serial-basic-ch340c-hookup-guide#drivers-if-you-need-them)
|
||||
before you continue.
|
||||
|
||||
Once connected, assign the USB device name to an environment variable:
|
||||
|
||||
```
|
||||
export DEVICENAME=put your device name here
|
||||
```
|
||||
|
||||
Set another variable with the baud rate:
|
||||
|
||||
```
|
||||
export BAUD_RATE=921600
|
||||
```
|
||||
|
||||
Now, hold the button marked `14` on the device. While still holding the button,
|
||||
hit the button marked `RST`. Continue holding the button marked `14` while
|
||||
running the following command:
|
||||
|
||||
```
|
||||
python3 tensorflow/lite/experimental/micro/tools/make/downloads/AmbiqSuite-Rel2.0.0/tools/apollo3_scripts/uart_wired_update.py \
|
||||
-b ${BAUD_RATE} ${DEVICENAME} \
|
||||
-r 1 \
|
||||
-f main_nonsecure_wire.bin \
|
||||
-i 6
|
||||
```
|
||||
|
||||
You should see a long stream of output as the binary is flashed to the device.
|
||||
Once you see the following lines, flashing is complete:
|
||||
|
||||
```
|
||||
Sending Reset Command.
|
||||
Done.
|
||||
```
|
||||
|
||||
If you don't see these lines, flashing may have failed. Try running through the
|
||||
steps in [Flash the binary](#flash-the-binary) again (you can skip over setting
|
||||
the environment variables). If you continue to run into problems, follow the
|
||||
[AI on a microcontroller with TensorFlow Lite and SparkFun Edge](https://codelabs.developers.google.com/codelabs/sparkfun-tensorflow)
|
||||
codelab, which includes more comprehensive instructions for the flashing
|
||||
process.
|
||||
|
||||
The binary should now be deployed to the device. Hit the button marked `RST` to
|
||||
reboot the board.
|
||||
|
||||
You should see the device's blue LED flashing. The yellow LED should light when
|
||||
a "yes" is heard, the red LED when a "no" is heard, and the green LED when an
|
||||
unknown command is heard. The current model has fairly low accuracy, so you may
|
||||
have to repeat "yes" a few times.
|
||||
|
||||
Debug information is logged by the board while the program is running. To view
|
||||
it, establish a serial connection to the board using a baud rate of `115200`.
|
||||
On OSX and Linux, the following command should work:
|
||||
|
||||
```
|
||||
screen ${DEVICENAME} 115200
|
||||
```
|
||||
|
||||
You will see a line output for every word that is detected:
|
||||
|
||||
```
|
||||
Heard yes (201) @4056ms
|
||||
Heard no (205) @6448ms
|
||||
Heard unknown (201) @13696ms
|
||||
Heard yes (205) @15000ms
|
||||
```
|
||||
|
||||
The number after each detected word is its score. By default, the program only
|
||||
considers matches as valid if their score is over 200, so all of the scores you
|
||||
see will be at least 200.
|
||||
|
||||
To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately
|
||||
followed by the `K` key, then hit the `Y` key.
|
||||
|
||||
## Deploy to STM32F746
|
||||
|
||||
The following instructions will help you build and deploy the sample to the
|
||||
[STM32F7 discovery kit](https://os.mbed.com/platforms/ST-Discovery-F746NG/)
|
||||
using [ARM Mbed](https://github.com/ARMmbed/mbed-cli).
|
||||
|
||||
Before we begin, you'll need the following:
|
||||
|
||||
- STM32F7 discovery kit board
|
||||
- Mini-USB cable
|
||||
- ARM Mbed CLI ([installation instructions](https://os.mbed.com/docs/mbed-os/v5.12/tools/installation-and-setup.html))
|
||||
- Python 2.7 and pip
|
||||
|
||||
Since Mbed requires a special folder structure for projects, we'll first run a
|
||||
command to generate a subfolder containing the required source files in this
|
||||
structure:
|
||||
|
||||
```
|
||||
make -f tensorflow/lite/experimental/micro/tools/make/Makefile TARGET=mbed TAGS="CMSIS disco_f746ng" generate_micro_speech_mbed_project
|
||||
```
|
||||
|
||||
This will result in the creation of a new folder:
|
||||
|
||||
```
|
||||
tensorflow/lite/experimental/micro/tools/make/gen/mbed_cortex-m4/prj/hello_world/mbed
|
||||
```
|
||||
|
||||
This folder contains all of the example's dependencies structured in the correct
|
||||
way for Mbed to be able to build it.
|
||||
|
||||
Change into the directory and run the following commands, making sure you are
|
||||
using Python 2.7.15.
|
||||
|
||||
First, tell Mbed that the current directory is the root of an Mbed project:
|
||||
|
||||
```
|
||||
mbed config root .
|
||||
```
|
||||
|
||||
Next, tell Mbed to download the dependencies and prepare to build:
|
||||
|
||||
```
|
||||
mbed deploy
|
||||
```
|
||||
|
||||
By default, Mbed will build the project using C++98. However, TensorFlow Lite
|
||||
requires C++11. Run the following Python snippet to modify the Mbed
|
||||
configuration files so that it uses C++11:
|
||||
|
||||
```
|
||||
python -c 'import fileinput, glob;
|
||||
for filename in glob.glob("mbed-os/tools/profiles/*.json"):
|
||||
for line in fileinput.input(filename, inplace=True):
|
||||
print line.replace("\"-std=gnu++98\"","\"-std=c++11\", \"-fpermissive\"")'
|
||||
|
||||
```
|
||||
|
||||
Finally, run the following command to compile:
|
||||
|
||||
```
|
||||
mbed compile -m DISCO_F746NG -t GCC_ARM
|
||||
```
|
||||
|
||||
This should result in a binary at the following path:
|
||||
|
||||
```
|
||||
./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin
|
||||
```
|
||||
|
||||
To deploy, plug in your STM board and copy the file to it. On macOS, you can do
|
||||
this with the following command:
|
||||
|
||||
```
|
||||
cp ./BUILD/DISCO_F746NG/GCC_ARM/mbed.bin /Volumes/DIS_F746NG/
|
||||
```
|
||||
|
||||
Copying the file will initiate the flashing process.
|
||||
|
||||
The inference results are logged by the board while the program is running.
|
||||
To view it, establish a serial connection to the board
|
||||
using a baud rate of `9600`. On OSX and Linux, the following command should
|
||||
work, replacing `/dev/tty.devicename` with the name of your device as it appears
|
||||
in `/dev`:
|
||||
|
||||
```
|
||||
screen /dev/tty.devicename 9600
|
||||
```
|
||||
|
||||
You will see a line output for every word that is detected:
|
||||
|
||||
```
|
||||
Heard yes (201) @4056ms
|
||||
Heard no (205) @6448ms
|
||||
Heard unknown (201) @13696ms
|
||||
Heard yes (205) @15000ms
|
||||
```
|
||||
|
||||
The number after each detected word is its score. By default, the program only
|
||||
considers matches as valid if their score is over 200, so all of the scores you
|
||||
see will be at least 200.
|
||||
|
||||
To stop viewing the debug output with `screen`, hit `Ctrl+A`, immediately
|
||||
followed by the `K` key, then hit the `Y` key.
|
||||
|
||||
## Calculating the input to the neural network
|
||||
|
||||
The TensorFlow Lite model doesn't take in raw audio sample data. Instead it
|
||||
works with spectrograms, which are two dimensional arrays that are made up of
|
||||
@ -88,54 +451,91 @@ yet included in this sample code.
|
||||
The recipe for creating the spectrogram data is that each frequency slice is
|
||||
created by running an FFT across a 30ms section of the audio sample data. The
|
||||
input samples are treated as being between -1 and +1 as real values (encoded as
|
||||
-32,768 and 32,767 in 16-bit signed integer samples). This results in an FFT
|
||||
with 256 entries. Every sequence of six entries is averaged together, giving a
|
||||
total of 43 frequency buckets in the final slice. The results are stored as
|
||||
unsigned eight-bit values, where 0 represents a real number of zero, and 255
|
||||
represents 127.5 as a real number. Each adjacent frequency entry is stored in
|
||||
ascending memory order (frequency bucket 0 at data[0], bucket 1 at data [1],
|
||||
etc). The window for the frequency analysis is then moved forward by 20ms, and
|
||||
the process repeated, storing the results in the next memory row (for example
|
||||
bucket 0 in this moved window would be in data[43 + 0], etc). This process
|
||||
happens 49 times in total, producing a single channel image that is 43 pixels
|
||||
wide, and 49 rows high. Here's an illustration of the process:
|
||||
-32,768 and 32,767 in 16-bit signed integer samples).
|
||||
|
||||
This results in an FFT with 256 entries. Every sequence of six entries is
|
||||
averaged together, giving a total of 43 frequency buckets in the final slice.
|
||||
The results are stored as unsigned eight-bit values, where 0 represents a real
|
||||
number of zero, and 255 represents 127.5 as a real number.
|
||||
|
||||
Each adjacent frequency entry is stored in ascending memory order (frequency
|
||||
bucket 0 at data[0], bucket 1 at data [1], etc). The window for the frequency
|
||||
analysis is then moved forward by 20ms, and the process repeated, storing the
|
||||
results in the next memory row (for example bucket 0 in this moved window would
|
||||
be in data[43 + 0], etc). This process happens 49 times in total, producing a
|
||||
single channel image that is 43 pixels wide, and 49 rows high.
|
||||
|
||||
Here's an illustration of the process:
|
||||
|
||||
data:image/s3,"s3://crabby-images/08aeb/08aeb417efb2931f913a7dfdeb0f5673b7b13635" alt="spectrogram diagram"
|
||||
|
||||
The test data files have been generated by running the following commands:
|
||||
The test data files have been generated by running the following commands. See
|
||||
the training instructions below to learn how to set up the environment to run
|
||||
them.
|
||||
|
||||
```
|
||||
bazel run tensorflow/examples/speech_commands:wav_to_features -- \
|
||||
--input_wav=${HOME}/speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav \
|
||||
--output_c_file=yes_features_data.cc \
|
||||
python tensorflow/tensorflow/examples/speech_commands/wav_to_features.py \
|
||||
--input_wav=/content/speech_dataset/yes/f2e59fea_nohash_1.wav \
|
||||
--output_c_file=/content/yes_features_data.cc \
|
||||
--window_stride=20 --preprocess=average --quantize=1
|
||||
|
||||
bazel run tensorflow/examples/speech_commands:wav_to_features -- \
|
||||
--input_wav=${HOME}/speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav \
|
||||
--output_c_file=no_features_data.cc \
|
||||
python tensorflow/tensorflow/examples/speech_commands/wav_to_features.py \
|
||||
--input_wav=/content/speech_dataset/no/f9643d42_nohash_4.wav \
|
||||
--output_c_file=/content/no_features_data.cc \
|
||||
--window_stride=20 --preprocess=average --quantize=1
|
||||
```
|
||||
|
||||
## Creating Your Own Model
|
||||
## Train your own model
|
||||
|
||||
The neural network model used in this example was built using the
|
||||
[TensorFlow speech commands tutorial](https://www.tensorflow.org/tutorials/sequences/audio_recognition).
|
||||
You can retrain it to recognize any combination of words from this list:
|
||||
|
||||
If you would like to create your own, you can start by training a model with the
|
||||
following commands. Note that this will begin a full build of TensorFlow from
|
||||
source; it is not currently possible to use the TensorFlow pip package. Due to
|
||||
the complexity of setting up a build environment, it's easiest to run these
|
||||
commands in a
|
||||
```
|
||||
yes
|
||||
no
|
||||
up
|
||||
down
|
||||
left
|
||||
right
|
||||
on
|
||||
off
|
||||
stop
|
||||
go
|
||||
```
|
||||
|
||||
### Use Google Colaboratory
|
||||
|
||||
The easiest way to train your own speech model is by running [`train_speech_model.ipynb`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb)
|
||||
in Google Colaboratory. This avoids the need to install dependencies, and allows
|
||||
the use of GPUs for training. Total training time will be 1.5-2hrs.
|
||||
|
||||
We strongly recommend trying this approach first.
|
||||
|
||||
### Use your local machine
|
||||
|
||||
You can use the following commands to train the model on your own machine.
|
||||
|
||||
It may be easiest to run these commands in a
|
||||
[TensorFlow Docker container](https://www.tensorflow.org/install/docker). A full
|
||||
build may take a couple of hours.
|
||||
|
||||
You must currently use the TensorFlow Nightly `pip` package. This version is
|
||||
confirmed to work:
|
||||
|
||||
```
|
||||
tf-nightly-gpu==1.15.0.dev20190729
|
||||
```
|
||||
|
||||
To begin training, run the following:
|
||||
|
||||
```
|
||||
bazel run -c opt --copt=-mavx2 --copt=-mfma \
|
||||
tensorflow/examples/speech_commands:train -- \
|
||||
python tensorflow/tensorflow/examples/speech_commands/train.py \
|
||||
--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \
|
||||
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1
|
||||
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 \
|
||||
--quantize=1 --verbosity=WARN --how_many_training_steps="15000,3000" \
|
||||
--learning_rate="0.001,0.0001" --summaries_dir=/tmp/retrain_logs \
|
||||
--data_dir=/tmp/speech_dataset --train_dir=/tmp/speech_commands_train
|
||||
```
|
||||
|
||||
If you see a compiling error on older machines, try leaving out the `--copt`
|
||||
@ -144,7 +544,7 @@ extensions. The training process is likely to take a couple of hours. Once it
|
||||
has completed, the next step is to freeze the variables:
|
||||
|
||||
```
|
||||
bazel run tensorflow/examples/speech_commands:freeze -- \
|
||||
python tensorflow/tensorflow/examples/speech_commands/freeze.py \
|
||||
--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \
|
||||
--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb \
|
||||
--start_checkpoint=/tmp/speech_commands_train/tiny_conv.ckpt-18000
|
||||
@ -153,10 +553,10 @@ bazel run tensorflow/examples/speech_commands:freeze -- \
|
||||
The next step is to create a TensorFlow Lite file from the frozen graph:
|
||||
|
||||
```
|
||||
bazel run tensorflow/lite/toco:toco -- \
|
||||
--input_file=/tmp/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \
|
||||
--input_shapes=1,49,40,1 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \
|
||||
--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_values=9.8077
|
||||
toco \
|
||||
--graph_def_file=/content/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \
|
||||
--input_shapes=1,1960 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \
|
||||
--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_dev_values=9.8077
|
||||
```
|
||||
|
||||
Finally, convert the file into a C source file that can be compiled into an
|
||||
@ -166,45 +566,7 @@ embedded system:
|
||||
xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_micro_features_model_data.cc
|
||||
```
|
||||
|
||||
Next, we need to update `tiny_conv_micro_features_model_data.cc` so that it is
|
||||
compatible with the `micro_features` sample code.
|
||||
|
||||
First, open the file. The top two lines should look approximately as follows
|
||||
(the exact hex values may be different):
|
||||
|
||||
```cpp
|
||||
unsigned char _tmp_tiny_conv_tflite[] = {
|
||||
0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
|
||||
```
|
||||
|
||||
You need to add the include from the following snippet, and tweak the variable
|
||||
declaration. Don’t change the hex values, though:
|
||||
|
||||
```cpp
|
||||
#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/tiny_conv_micro_features_model_data.h"
|
||||
|
||||
const unsigned char g_tiny_conv_micro_features_model_data[] = {
|
||||
0x18, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x0e, 0x00,
|
||||
```
|
||||
|
||||
Next, go to the very bottom of the file and find the variable named
|
||||
`_tmp_tiny_conv_tflite_len`.
|
||||
|
||||
```cpp
|
||||
unsigned int _tmp_tiny_conv_tflite_len = 19800;
|
||||
```
|
||||
|
||||
Change the declaration as follows, but do not change the number assigned to it,
|
||||
even if your number is different from the one in this guide.
|
||||
|
||||
```cpp
|
||||
const int g_tiny_conv_micro_features_model_data_len = 19800;
|
||||
```
|
||||
|
||||
Finally, save the file, then copy the `tiny_conv_micro_features_model_data.cc`
|
||||
file into the `micro_features/` subdirectory of your `tf_microspeech/` project.
|
||||
|
||||
### Creating Your Own Model With Google Cloud
|
||||
### Use Google Cloud
|
||||
|
||||
If want to train your model in Google Cloud you can do so by using
|
||||
pre-configured Deep Learning images.
|
||||
@ -231,28 +593,8 @@ As soon as instance has been created you can SSH to it(as a jupyter user!):
|
||||
gcloud compute ssh "jupyter@${INSTANCE_NAME}"
|
||||
```
|
||||
|
||||
now install Bazel:
|
||||
|
||||
```
|
||||
wget https://github.com/bazelbuild/bazel/releases/download/0.15.0/bazel-0.15.0-installer-linux-x86_64.sh
|
||||
sudo bash ./bazel-0.15.0-installer-linux-x86_64.sh
|
||||
source /usr/local/lib/bazel/bin/bazel-complete.bash
|
||||
sudo ln /usr/local/bin/bazel /usr/bin/bazel
|
||||
```
|
||||
|
||||
and finally run the build:
|
||||
|
||||
```
|
||||
# TensorFlow already pre-baked on the image
|
||||
cd src/tensorflow
|
||||
bazel run -c opt --copt=-mavx2 --copt=-mfma \
|
||||
tensorflow/examples/speech_commands:train -- \
|
||||
--model_architecture=tiny_conv --window_stride=20 --preprocess=average \
|
||||
--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1
|
||||
```
|
||||
|
||||
After build is over follow the rest of the instructions from this tutorial. And
|
||||
finally do not forget to remove the instance when training is done:
|
||||
Finally, follow the instructions in the previous section to train the model. Do
|
||||
not forget to remove the instance when training is done:
|
||||
|
||||
```
|
||||
gcloud compute instances delete "${INSTANCE_NAME}" --zone="${ZONE}"
|
||||
|
@ -0,0 +1,118 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.h"
|
||||
|
||||
#include "PDM.h"
|
||||
#include "tensorflow/lite/experimental/micro/examples/micro_speech/micro_features/micro_model_settings.h"
|
||||
|
||||
namespace {
|
||||
bool g_is_audio_initialized = false;
|
||||
// An internal buffer able to fit 16x our sample size
|
||||
constexpr int kAudioCaptureBufferSize = DEFAULT_PDM_BUFFER_SIZE * 16;
|
||||
int16_t g_audio_capture_buffer[kAudioCaptureBufferSize];
|
||||
// A buffer that holds our output
|
||||
int16_t g_audio_output_buffer[kMaxAudioSampleSize];
|
||||
// Mark as volatile so we can check in a while loop to see if
|
||||
// any samples have arrived yet.
|
||||
volatile int32_t g_latest_audio_timestamp = 0;
|
||||
} // namespace
|
||||
|
||||
void CaptureSamples() {
|
||||
// This is how many bytes of new data we have each time this is called
|
||||
const int number_of_samples = DEFAULT_PDM_BUFFER_SIZE;
|
||||
// Calculate what timestamp the last audio sample represents
|
||||
const int32_t time_in_ms =
|
||||
g_latest_audio_timestamp +
|
||||
(number_of_samples / (kAudioSampleFrequency / 1000));
|
||||
// Determine the index, in the history of all samples, of the last sample
|
||||
const int32_t start_sample_offset =
|
||||
g_latest_audio_timestamp * (kAudioSampleFrequency / 1000);
|
||||
// Determine the index of this sample in our ring buffer
|
||||
const int capture_index = start_sample_offset % kAudioCaptureBufferSize;
|
||||
// Read the data to the correct place in our buffer
|
||||
PDM.read(g_audio_capture_buffer + capture_index, DEFAULT_PDM_BUFFER_SIZE);
|
||||
// This is how we let the outside world know that new audio data has arrived.
|
||||
g_latest_audio_timestamp = time_in_ms;
|
||||
}
|
||||
|
||||
TfLiteStatus InitAudioRecording(tflite::ErrorReporter* error_reporter) {
|
||||
// Hook up the callback that will be called with each sample
|
||||
PDM.onReceive(CaptureSamples);
|
||||
// Start listening for audio: MONO @ 16KHz with gain at 20
|
||||
PDM.begin(1, kAudioSampleFrequency);
|
||||
PDM.setGain(20);
|
||||
// Block until we have our first audio sample
|
||||
while (!g_latest_audio_timestamp) {
|
||||
}
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus GetAudioSamples(tflite::ErrorReporter* error_reporter,
|
||||
int start_ms, int duration_ms,
|
||||
int* audio_samples_size, int16_t** audio_samples) {
|
||||
// Set everything up to start receiving audio
|
||||
if (!g_is_audio_initialized) {
|
||||
TfLiteStatus init_status = InitAudioRecording(error_reporter);
|
||||
if (init_status != kTfLiteOk) {
|
||||
return init_status;
|
||||
}
|
||||
g_is_audio_initialized = true;
|
||||
}
|
||||
// This next part should only be called when the main thread notices that the
|
||||
// latest audio sample data timestamp has changed, so that there's new data
|
||||
// in the capture ring buffer. The ring buffer will eventually wrap around and
|
||||
// overwrite the data, but the assumption is that the main thread is checking
|
||||
// often enough and the buffer is large enough that this call will be made
|
||||
// before that happens.
|
||||
|
||||
// Determine the index, in the history of all samples, of the first
|
||||
// sample we want
|
||||
const int start_offset = start_ms * (kAudioSampleFrequency / 1000);
|
||||
// Determine how many samples we want in total
|
||||
const int duration_sample_count =
|
||||
duration_ms * (kAudioSampleFrequency / 1000);
|
||||
for (int i = 0; i < duration_sample_count; ++i) {
|
||||
// For each sample, transform its index in the history of all samples into
|
||||
// its index in g_audio_capture_buffer
|
||||
const int capture_index = (start_offset + i) % kAudioCaptureBufferSize;
|
||||
// Write the sample to the output buffer
|
||||
g_audio_output_buffer[i] = g_audio_capture_buffer[capture_index];
|
||||
}
|
||||
|
||||
// Set pointers to provide access to the audio
|
||||
*audio_samples_size = kMaxAudioSampleSize;
|
||||
*audio_samples = g_audio_output_buffer;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }
|
@ -0,0 +1,61 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/experimental/micro/examples/micro_speech/command_responder.h"
|
||||
|
||||
#include "Arduino.h"
|
||||
|
||||
// Toggles the LED every inference, and keeps it on for ~2 seconds if a "yes"
|
||||
// was heard
|
||||
void RespondToCommand(tflite::ErrorReporter* error_reporter,
|
||||
int32_t current_time, const char* found_command,
|
||||
uint8_t score, bool is_new_command) {
|
||||
static bool is_initialized = false;
|
||||
if (!is_initialized) {
|
||||
pinMode(LED_BUILTIN, OUTPUT);
|
||||
is_initialized = true;
|
||||
}
|
||||
static int32_t last_yes_time = 0;
|
||||
static int count = 0;
|
||||
|
||||
if (is_new_command) {
|
||||
error_reporter->Report("Heard %s (%d) @%dms", found_command, score,
|
||||
current_time);
|
||||
// If we heard a "yes", switch on an LED and store the time.
|
||||
if (found_command[0] == 'y') {
|
||||
last_yes_time = current_time;
|
||||
digitalWrite(LED_BUILTIN, HIGH);
|
||||
}
|
||||
}
|
||||
|
||||
// If last_yes_time is non-zero but was >3 seconds ago, zero it
|
||||
// and switch off the LED.
|
||||
if (last_yes_time != 0) {
|
||||
if (last_yes_time < (current_time - 3000)) {
|
||||
last_yes_time = 0;
|
||||
digitalWrite(LED_BUILTIN, LOW);
|
||||
}
|
||||
// If it is non-zero but <3 seconds ago, do nothing.
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, toggle the LED every time an inference is performed.
|
||||
++count;
|
||||
if (count & 1) {
|
||||
digitalWrite(LED_BUILTIN, HIGH);
|
||||
} else {
|
||||
digitalWrite(LED_BUILTIN, LOW);
|
||||
}
|
||||
}
|
@ -95,8 +95,10 @@ TfLiteStatus FeatureProvider::PopulateFeatureData(
|
||||
const int32_t slice_start_ms = (new_step * kFeatureSliceStrideMs);
|
||||
int16_t* audio_samples = nullptr;
|
||||
int audio_samples_size = 0;
|
||||
GetAudioSamples(error_reporter, slice_start_ms, kFeatureSliceDurationMs,
|
||||
&audio_samples_size, &audio_samples);
|
||||
// TODO(petewarden): Fix bug that leads to non-zero slice_start_ms
|
||||
GetAudioSamples(error_reporter, (slice_start_ms > 0 ? slice_start_ms : 0),
|
||||
kFeatureSliceDurationMs, &audio_samples_size,
|
||||
&audio_samples);
|
||||
if (audio_samples_size < kMaxAudioSampleSize) {
|
||||
error_reporter->Report("Audio data size %d too small, want %d",
|
||||
audio_samples_size, kMaxAudioSampleSize);
|
||||
|
@ -0,0 +1,293 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"name": "speech_commands training",
|
||||
"version": "0.3.2",
|
||||
"provenance": [],
|
||||
"collapsed_sections": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "pO4-CY_TCZZS",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"# Train a Simple Audio Recognition model for microcontroller use"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BaFfr7DHRmGF",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"This notebook demonstrates how to train a 20kb [Simple Audio Recognition](https://www.tensorflow.org/tutorials/sequences/audio_recognition) model for [TensorFlow Lite for Microcontrollers](https://tensorflow.org/lite/microcontrollers/overview). It will produce the same model used in the [micro_speech](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/micro_speech) example application.\n",
|
||||
"\n",
|
||||
"The model is designed to be used with [Google Colaboratory](https://colab.research.google.com).\n",
|
||||
"\n",
|
||||
"<table class=\\\"tfo-notebook-buttons\\\" align=\\\"left\\\">\n",
|
||||
" <td>\n",
|
||||
" <a target=\\\"_blank\\\" href=\\\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb\\\"><img src=\\\"https://www.tensorflow.org/images/colab_logo_32px.png\\\" />Run in Google Colab</a>\n",
|
||||
" </td>\n",
|
||||
" <td>\n",
|
||||
" <a target=\\\"_blank\\\" href=\\\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/micro_speech/train_speech_model.ipynb\\\"><img src=\\\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\\\" />View source on GitHub</a>\n",
|
||||
" </td>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The notebook runs Python scripts to train and freeze the model, and uses the TensorFlow Lite converter to convert it for use with TensorFlow Lite for Microcontrollers.\n",
|
||||
"\n",
|
||||
"**Training is much faster using GPU acceleration.** Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and selecting **GPU**. Training 18,000 iterations will take 1.5-2 hours on a GPU runtime.\n",
|
||||
"\n",
|
||||
"## Configure training\n",
|
||||
"\n",
|
||||
"The following `os.environ` lines can be customized to set the words that will be trained for, and the steps and learning rate of the training. The default values will result in the same model that is used in the micro_speech example. Run the cell to set the configuration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "ludfxbNIaegy",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# A comma-delimited list of the words you want to train for.\n",
|
||||
"# The options are: yes,no,up,down,left,right,on,off,stop,go\n",
|
||||
"# All other words will be used to train an \"unknown\" category.\n",
|
||||
"os.environ[\"WANTED_WORDS\"] = \"yes,no\"\n",
|
||||
"\n",
|
||||
"# The number of steps and learning rates can be specified as comma-separated\n",
|
||||
"# lists to define the rate at each stage. For example,\n",
|
||||
"# TRAINING_STEPS=15000,3000 and LEARNING_RATE=0.001,0.0001\n",
|
||||
"# will run 18,000 training loops in total, with a rate of 0.001 for the first\n",
|
||||
"# 15,000, and 0.0001 for the final 3,000.\n",
|
||||
"os.environ[\"TRAINING_STEPS\"]=\"15000,3000\"\n",
|
||||
"os.environ[\"LEARNING_RATE\"]=\"0.001,0.0001\"\n",
|
||||
"\n",
|
||||
"# Calculate the total number of steps, which is used to identify the checkpoint\n",
|
||||
"# file name.\n",
|
||||
"total_steps = sum(map(lambda string: int(string),\n",
|
||||
" os.environ[\"TRAINING_STEPS\"].split(\",\")))\n",
|
||||
"os.environ[\"TOTAL_STEPS\"] = str(total_steps)\n",
|
||||
"\n",
|
||||
"# Print the configuration to confirm it\n",
|
||||
"!echo \"Training these words: ${WANTED_WORDS}\"\n",
|
||||
"!echo \"Training steps in each stage: ${TRAINING_STEPS}\"\n",
|
||||
"!echo \"Learning rate in each stage: ${LEARNING_RATE}\"\n",
|
||||
"!echo \"Total number of training steps: ${TOTAL_STEPS}\"\n"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gCgeOpvY9pAi",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"## Install dependencies\n",
|
||||
"\n",
|
||||
"Next, we'll install a GPU build of TensorFlow, so we can use GPU acceleration for training. We also clone the TensorFlow repository, which contains the scripts that train and freeze the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "Nd1iM1o2ymvA",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"# Install the nightly build\n",
|
||||
"!pip install -q tf-nightly-gpu==1.15.0.dev20190729\n",
|
||||
"!git clone https://github.com/dansitu/tensorflow"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "aV_0qkYh98LD",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"## Load TensorBoard\n",
|
||||
"\n",
|
||||
"Now, set up TensorBoard so that we can graph our accuracy and loss as training proceeds."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "yZArmzT85SLq",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"# Delete any old logs from previous runs\n",
|
||||
"!rm -rf /content/retrain_logs\n",
|
||||
"# Load TensorBoard\n",
|
||||
"%load_ext tensorboard\n",
|
||||
"%tensorboard --logdir /content/retrain_logs"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "x1J96Ron-O4R",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"## Begin training\n",
|
||||
"\n",
|
||||
"Next, run the following script to begin training. The script will first download the training data:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "VJsEZx6lynbY",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"!python tensorflow/tensorflow/examples/speech_commands/train.py \\\n",
|
||||
"--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \\\n",
|
||||
"--wanted_words=${WANTED_WORDS} --silence_percentage=25 --unknown_percentage=25 \\\n",
|
||||
"--quantize=1 --verbosity=WARN --how_many_training_steps=${TRAINING_STEPS} \\\n",
|
||||
"--learning_rate=${LEARNING_RATE} --summaries_dir=/content/retrain_logs \\\n",
|
||||
"--data_dir=/content/speech_dataset --train_dir=/content/speech_commands_train \\\n"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "XQUJLrdS-ftl",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"## Freeze the graph\n",
|
||||
"\n",
|
||||
"Once training is complete, run the following cell to freeze the graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "xyc3_eLh9sAg",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"!python tensorflow/tensorflow/examples/speech_commands/freeze.py \\\n",
|
||||
"--model_architecture=tiny_conv --window_stride=20 --preprocess=micro \\\n",
|
||||
"--wanted_words=${WANTED_WORDS} --quantize=1 --output_file=/content/tiny_conv.pb \\\n",
|
||||
"--start_checkpoint=/content/speech_commands_train/tiny_conv.ckpt-${TOTAL_STEPS}"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "_DBGDxVI-nKG",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"## Convert the model\n",
|
||||
"\n",
|
||||
"Run this cell to use the TensorFlow Lite converter to convert the frozen graph into the TensorFlow Lite format, fully quantized for use with embedded devices."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "lBj_AyCh1cC0",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"!toco \\\n",
|
||||
"--graph_def_file=/content/tiny_conv.pb --output_file=/content/tiny_conv.tflite \\\n",
|
||||
"--input_shapes=1,1960 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \\\n",
|
||||
"--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_dev_values=9.8077"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "dt6Zqbxu-wIi",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"The following cell will print the model size, which will be under 20 kilobytes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "XohZOTjR8ZyE",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"import os\n",
|
||||
"model_size = os.path.getsize(\"/content/tiny_conv.tflite\")\n",
|
||||
"print(\"Model is %d bytes\" % model_size)"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "2pQnN0i_-0L2",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"Finally, we use xxd to transform the model into a source file that can be included in a C++ project and loaded by TensorFlow Lite for Microcontrollers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"metadata": {
|
||||
"id": "eoYyh0VU8pca",
|
||||
"colab_type": "code",
|
||||
"colab": {}
|
||||
},
|
||||
"source": [
|
||||
"# Install xxd if it is not available\n",
|
||||
"!apt-get -qq install xxd\n",
|
||||
"# Save the file as a C source file\n",
|
||||
"!xxd -i /content/tiny_conv.tflite > /content/tiny_conv.cc\n",
|
||||
"# Print the source file\n",
|
||||
"!cat /content/tiny_conv.cc"
|
||||
],
|
||||
"execution_count": 0,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
@ -25,4 +25,4 @@ curl -L -O "https://downloads.arduino.cc/arduino-cli/arduino-cli-latest-linux64.
|
||||
tar xjf arduino-cli-latest-linux64.tar.bz2
|
||||
|
||||
/tmp/arduino-cli core update-index
|
||||
/tmp/arduino-cli core install arduino:sam
|
||||
/tmp/arduino-cli core install arduino:mbed
|
||||
|
@ -23,15 +23,22 @@ set -e
|
||||
ARDUINO_HOME_DIR=${HOME}/Arduino
|
||||
ARDUINO_LIBRARIES_DIR=${ARDUINO_HOME_DIR}/libraries
|
||||
ARDUINO_CLI_TOOL=/tmp/arduino-cli
|
||||
# Necessary due to bug in arduino-cli that allows it to build files in pwd
|
||||
TEMP_BUILD_DIR=/tmp/tflite-arduino-build
|
||||
|
||||
LIBRARY_ZIP=${1}
|
||||
|
||||
rm -rf ${ARDUINO_LIBRARIES_DIR}
|
||||
rm -rf ${TEMP_BUILD_DIR}
|
||||
|
||||
mkdir -p ${ARDUINO_HOME_DIR}/libraries
|
||||
mkdir -p ${TEMP_BUILD_DIR}
|
||||
|
||||
unzip -q ${LIBRARY_ZIP} -d ${ARDUINO_LIBRARIES_DIR}
|
||||
|
||||
# Change into this dir before running the tests
|
||||
cd ${TEMP_BUILD_DIR}
|
||||
|
||||
for f in ${ARDUINO_LIBRARIES_DIR}/*/examples/*/*.ino; do
|
||||
${ARDUINO_CLI_TOOL} compile --fqbn arduino:sam:arduino_due_x $f
|
||||
${ARDUINO_CLI_TOOL} compile --fqbn arduino:mbed:nano33ble $f
|
||||
done
|
||||
|
@ -137,7 +137,7 @@ $(PRJDIR)$(2)/arduino/src/third_party/kissfft/kiss_fft.h: tensorflow/lite/experi
|
||||
@mkdir -p $$(dir $$@)
|
||||
@python tensorflow/lite/experimental/micro/tools/make/transform_arduino_source.py \
|
||||
--third_party_headers="$(4)" < $$< | \
|
||||
sed -E 's/<string.h>/<string.h>\n#include <stdint.h>/g' > $$@
|
||||
sed -E 's@#include <string.h>@//#include <string.h> /* Patched by helper_functions.inc for Arduino compatibility */@g' > $$@
|
||||
|
||||
$(PRJDIR)$(2)/arduino/%: tensorflow/lite/experimental/micro/tools/make/templates/%
|
||||
@mkdir -p $$(dir $$@)
|
||||
|
@ -18,11 +18,25 @@ limitations under the License.
|
||||
// Include an empty header so that Arduino knows to build the TF Lite library.
|
||||
#include <TensorFlowLite.h>
|
||||
|
||||
// TensorFlow Lite defines its own main function
|
||||
extern int tflite_micro_main(int argc, char* argv[]);
|
||||
|
||||
// So the example works with or without a serial connection,
|
||||
// wait to see one for 5 seconds before giving up.
|
||||
void waitForSerial() {
|
||||
int start = millis();
|
||||
while(!Serial) {
|
||||
int diff = millis() - start;
|
||||
if (diff > 5000) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Runs once when the program starts
|
||||
void setup() {
|
||||
waitForSerial();
|
||||
tflite_micro_main(0, NULL);
|
||||
}
|
||||
|
||||
// Leave the loop unused
|
||||
void loop() {
|
||||
}
|
Loading…
Reference in New Issue
Block a user