Fixed point implementation for audio preprocessing in TF Lite Micro
PiperOrigin-RevId: 220534796
This commit is contained in:
parent
487a8d7fb2
commit
7003be098c
@ -32,14 +32,36 @@ tflite_micro_cc_test(
|
||||
)
|
||||
|
||||
tflite_micro_cc_test(
|
||||
name = "preprocessor_test",
|
||||
name = "preprocessor_float_test",
|
||||
srcs = [
|
||||
"no_30ms_sample_data.cc",
|
||||
"no_30ms_sample_data.h",
|
||||
"no_power_spectrum_data.cc",
|
||||
"no_power_spectrum_data.h",
|
||||
"preprocessor.cc",
|
||||
"preprocessor.h",
|
||||
"preprocessor_float.cc",
|
||||
"preprocessor_test.cc",
|
||||
"yes_30ms_sample_data.cc",
|
||||
"yes_30ms_sample_data.h",
|
||||
"yes_power_spectrum_data.cc",
|
||||
"yes_power_spectrum_data.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/lite/c:c_api_internal",
|
||||
"//tensorflow/lite/experimental/micro:micro_framework",
|
||||
"//tensorflow/lite/experimental/micro/testing:micro_test",
|
||||
],
|
||||
)
|
||||
|
||||
tflite_micro_cc_test(
|
||||
name = "preprocessor_fixed_test",
|
||||
srcs = [
|
||||
"no_30ms_sample_data.cc",
|
||||
"no_30ms_sample_data.h",
|
||||
"no_power_spectrum_data.cc",
|
||||
"no_power_spectrum_data.h",
|
||||
"preprocessor.h",
|
||||
"preprocessor_fixed.cc",
|
||||
"preprocessor_test.cc",
|
||||
"yes_30ms_sample_data.cc",
|
||||
"yes_30ms_sample_data.h",
|
||||
|
@ -0,0 +1,218 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Reference implementation of the preprocessing pipeline, with the same
|
||||
// results as the audio tutorial at
|
||||
// https://www.tensorflow.org/tutorials/sequences/audio_recognition
|
||||
// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
|
||||
// so 480 values), and extracts a power spectrum of frequencies. There are 43
|
||||
// frequency bands in the result, derived from the original 256 output from the
|
||||
// discrete Fourier transform, and averaged together in groups of 6.
|
||||
// It's expected that most platforms will have optimized versions of the
|
||||
// functions used here, for example replacing the DFT with an FFT, so this
|
||||
// version shouldn't be used where performance is critical.
|
||||
// This implementation uses fixed point for any non-constant calculations,
|
||||
// instead of floating point, to help show how this can work on platforms that
|
||||
// don't have good float support.
|
||||
|
||||
#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace {
|
||||
|
||||
// q format notation: qx.y => 1 sign bit, x-1 integer bits, y fraction bits.
|
||||
// Use standard (non-saturating) arithmetic with signed ints of size x+y bits.
|
||||
// Sacrifice some precision to avoid use of 64-bit ints.
|
||||
|
||||
// q1.15 * q1.15 => q2.30
|
||||
inline int32_t Q1_15_FixedMultiply_Q2_30(int16_t a, int16_t b) {
|
||||
int32_t big_a = a;
|
||||
int32_t big_b = b;
|
||||
return big_a * big_b;
|
||||
}
|
||||
|
||||
// q2.30 * q2.30 => q10.22
|
||||
inline int32_t Q2_30_FixedMultiply_Q10_22(int32_t a, int32_t b) {
|
||||
// q2.30 result
|
||||
int32_t tmp = (a >> 15) * (b >> 15);
|
||||
// q10.22 result
|
||||
return tmp >> 8;
|
||||
}
|
||||
|
||||
// q10.22 * q10.22 => q10.22
|
||||
// Will overflow if product is >= 512.
|
||||
// Largest product in small test set is 465.25
|
||||
inline int32_t Q10_22_FixedMultiply_Q10_22(int32_t a, int32_t b) {
|
||||
// q10.22 result
|
||||
return (a >> 11) * (b >> 11);
|
||||
}
|
||||
|
||||
// float => q2.30
|
||||
// No checking for saturation. Only used for inputs in range [-1, 1].
|
||||
inline int32_t FloatToFixed_Q2_30(float input) {
|
||||
return static_cast<int32_t>(roundf(input * (1 << 30)));
|
||||
}
|
||||
|
||||
// These constants allow us to allocate fixed-sized arrays on the stack for our
|
||||
// working memory.
|
||||
constexpr int kInputSize = 512;
|
||||
constexpr int kAverageWindowSize = 6;
|
||||
constexpr int kOutputSize =
|
||||
((kInputSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
|
||||
|
||||
// Performs a discrete Fourier transform on the real inputs. This corresponds to
|
||||
// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
|
||||
// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
|
||||
// It takes in an array of float real values, and returns a result of the same
|
||||
// length with q10.22 fixed point real and imaginary components interleaved, so
|
||||
// fourier_output[0] is the first real value, fourier_output[1] is the first
|
||||
// imaginary, fourier_output[2] is the second real, and so on.
|
||||
// The calling function should ensure that the array passed in as fourier_output
|
||||
// is at least time_series_size in length. Most optimized FFT implementations
|
||||
// require the length to be a power of two as well, but this version doesn't
|
||||
// enforce that.
|
||||
|
||||
// input: q2.30 fixed point. output: q10.22 fixed point.
|
||||
// Outputs interpreted as q10.22 fixed point are un-scaled.
|
||||
void CalculateDiscreteFourierTransform(int32_t* time_series,
|
||||
int time_series_size,
|
||||
int32_t* fourier_output) {
|
||||
for (int i = 0; i < time_series_size / 2; ++i) {
|
||||
int32_t real = 0;
|
||||
for (int j = 0; j < time_series_size; ++j) {
|
||||
const int32_t real_scale =
|
||||
FloatToFixed_Q2_30(cos(j * i * M_PI * 2 / time_series_size));
|
||||
real += Q2_30_FixedMultiply_Q10_22(time_series[j], real_scale);
|
||||
}
|
||||
int32_t imaginary = 0;
|
||||
for (int j = 0; j < time_series_size; ++j) {
|
||||
const int32_t imaginary_scale =
|
||||
FloatToFixed_Q2_30(sin(j * i * M_PI * 2 / time_series_size));
|
||||
imaginary -= Q2_30_FixedMultiply_Q10_22(time_series[j], imaginary_scale);
|
||||
}
|
||||
fourier_output[(i * 2) + 0] = real;
|
||||
fourier_output[(i * 2) + 1] = imaginary;
|
||||
}
|
||||
}
|
||||
|
||||
// Produces a simple sine curve that is used to ensure frequencies at the center
|
||||
// of the current sample window are weighted more heavily than those at the end.
|
||||
// q1.15 output format.
|
||||
void CalculatePeriodicHann(int window_length, int16_t* window_function) {
|
||||
for (int i = 0; i < window_length; ++i) {
|
||||
const float real_value = (0.5 - 0.5 * cos((2 * M_PI * i) / window_length));
|
||||
int tmp = static_cast<int32_t>(roundf(real_value * (1 << 15)));
|
||||
// Saturate the 0x8000 value to 0x7fff
|
||||
if (tmp > 0x7fff) tmp = 0x7fff;
|
||||
window_function[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
|
||||
const int16_t* input, int input_size, int output_size,
|
||||
uint8_t* output) {
|
||||
// Ensure our input and output data arrays are valid.
|
||||
if (input_size > kInputSize) {
|
||||
error_reporter->Report("Input size %d larger than %d", input_size,
|
||||
kInputSize);
|
||||
return kTfLiteError;
|
||||
}
|
||||
if (output_size != kOutputSize) {
|
||||
error_reporter->Report("Requested output size %d doesn't match %d",
|
||||
output_size, kOutputSize);
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
// Pre-calculate the window function we'll be applying to the input data.
|
||||
// In a real application, we'd calculate this table once in an initialization
|
||||
// function and store it for repeated reuse.
|
||||
// q1.15 format.
|
||||
int16_t window_function[kInputSize];
|
||||
CalculatePeriodicHann(input_size, window_function);
|
||||
|
||||
// Apply the window function to our time series input, and pad it with zeroes
|
||||
// to the next power of two.
|
||||
int32_t fixed_input[kInputSize];
|
||||
for (int i = 0; i < kInputSize; ++i) {
|
||||
if (i < input_size) {
|
||||
// input is int16_t. Treat as q1.15 fixed point value in range [-1,1)
|
||||
// window_function is also q1.15 fixed point number
|
||||
fixed_input[i] =
|
||||
Q1_15_FixedMultiply_Q2_30(input[i], window_function[i]);
|
||||
} else {
|
||||
fixed_input[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Pull the frequency data from the time series sample.
|
||||
// Calculated in q10.22 format from q2.30 inputs.
|
||||
int32_t fourier_values[kInputSize];
|
||||
CalculateDiscreteFourierTransform(fixed_input, kInputSize, fourier_values);
|
||||
|
||||
// We have the complex numbers giving us information about each frequency
|
||||
// band, but all we want to know is how strong each frequency is, so calculate
|
||||
// the squared magnitude by adding together the squares of each component.
|
||||
int32_t power_spectrum[kInputSize / 2];
|
||||
for (int i = 0; i < (kInputSize / 2); ++i) {
|
||||
const int32_t real = fourier_values[(i * 2) + 0];
|
||||
const int32_t imaginary = fourier_values[(i * 2) + 1];
|
||||
// q10.22 results
|
||||
power_spectrum[i] =
|
||||
Q10_22_FixedMultiply_Q10_22(real, real) +
|
||||
Q10_22_FixedMultiply_Q10_22(imaginary, imaginary);
|
||||
}
|
||||
|
||||
// Finally, reduce the size of the output by averaging together six adjacent
|
||||
// frequencies into each slot, producing an array of 43 values.
|
||||
// Power_spectrum numbers are q10.22. Divide by kAverageWindowSize inside
|
||||
// loop to prevent overflow.
|
||||
for (int i = 0; i < kOutputSize; ++i) {
|
||||
int32_t average = 0;
|
||||
for (int j = 0; j < kAverageWindowSize; ++j) {
|
||||
const int index = (i * kAverageWindowSize) + j;
|
||||
if (index < (kInputSize / 2)) {
|
||||
average += power_spectrum[index] / kAverageWindowSize;
|
||||
}
|
||||
}
|
||||
// Quantize the result into eight bits, effectively multiplying by two.
|
||||
// The 127.5 constant here has to match the features_max value defined in
|
||||
// tensorflow/examples/speech_commands/input_data.py, and this also assumes
|
||||
// that features_min is zero.
|
||||
//
|
||||
// q10.22 input
|
||||
// integer output
|
||||
//
|
||||
// output = (input - features_min) *
|
||||
// (output_max - output_min) / (features_max - features_min)
|
||||
// == (input) * (255) / (127.5)
|
||||
// == input * 2
|
||||
// == input << 1
|
||||
// Also want to round to nearest integer and only keep integer bits
|
||||
// => ((input << 1) + 0x200000) >> 22
|
||||
// == (input + 0x100000) >> 21
|
||||
int32_t quantized_average = (average + 0x100000) >> 21;
|
||||
if (quantized_average < 0) {
|
||||
quantized_average = 0;
|
||||
}
|
||||
if (quantized_average > 255) {
|
||||
quantized_average = 255;
|
||||
}
|
||||
output[i] = quantized_average;
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
@ -62,12 +62,19 @@ tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
|
||||
# Test binary for the microcontroller speech model.
|
||||
PREPROCESSOR_TEST_SRCS := \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
|
||||
|
||||
PREPROCESSOR_FLOAT_TEST_SRCS = \
|
||||
$(PREPROCESSOR_TEST_SRCS) \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc
|
||||
|
||||
PREPROCESSOR_FIXED_TEST_SRCS += \
|
||||
$(PREPROCESSOR_TEST_SRCS) \
|
||||
tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc
|
||||
|
||||
MICROLITE_TEST_SRCS := \
|
||||
$(wildcard tensorflow/lite/experimental/micro/*test.cc) \
|
||||
$(wildcard tensorflow/lite/experimental/micro/kernels/*test.cc)
|
||||
@ -91,7 +98,8 @@ include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)
|
||||
|
||||
ALL_SRCS := \
|
||||
$(MICRO_SPEECH_TEST_SRCS) \
|
||||
$(PREPROCESSOR_TEST_SRCS) \
|
||||
$(PREPROCESSOR_FLOAT_TEST_SRCS) \
|
||||
$(PREPROCESSOR_FIXED_TEST_SRCS) \
|
||||
$(MICROLITE_CC_SRCS) \
|
||||
$(MICROLITE_TEST_SRCS)
|
||||
|
||||
@ -104,7 +112,8 @@ LIBDIR := $(GENDIR)lib/
|
||||
MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME)
|
||||
|
||||
MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
|
||||
PREPROCESSOR_TEST_BINARY := $(BINDIR)preprocessor_test
|
||||
PREPROCESSOR_FLOAT_TEST_BINARY := $(BINDIR)preprocessor_float_test
|
||||
PREPROCESSOR_FIXED_TEST_BINARY := $(BINDIR)preprocessor_fixed_test
|
||||
|
||||
CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++
|
||||
CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc
|
||||
@ -113,8 +122,11 @@ AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar
|
||||
MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))
|
||||
|
||||
PREPROCESSOR_TEST_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_TEST_SRCS))))
|
||||
PREPROCESSOR_FLOAT_TEST_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FLOAT_TEST_SRCS))))
|
||||
|
||||
PREPROCESSOR_FIXED_TEST_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FIXED_TEST_SRCS))))
|
||||
|
||||
MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
|
||||
$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
|
||||
@ -158,18 +170,29 @@ micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
|
||||
test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
|
||||
$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
|
||||
|
||||
$(PREPROCESSOR_TEST_BINARY): $(PREPROCESSOR_TEST_OBJS) $(MICROLITE_LIB_PATH)
|
||||
$(PREPROCESSOR_FLOAT_TEST_BINARY): $(PREPROCESSOR_FLOAT_TEST_OBJS) $(MICROLITE_LIB_PATH)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CXX) $(CXXFLAGS) $(INCLUDES) \
|
||||
-o $(PREPROCESSOR_TEST_BINARY) $(PREPROCESSOR_TEST_OBJS) \
|
||||
-o $(PREPROCESSOR_FLOAT_TEST_BINARY) $(PREPROCESSOR_FLOAT_TEST_OBJS) \
|
||||
$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
|
||||
|
||||
preprocessor_test: $(PREPROCESSOR_TEST_BINARY)
|
||||
preprocessor_test_bin: $(PREPROCESSOR_TEST_BINARY).bin
|
||||
preprocessor_float_test: $(PREPROCESSOR_FLOAT_TEST_BINARY)
|
||||
preprocessor_float_test_bin: $(PREPROCESSOR_FLOAT_TEST_BINARY).bin
|
||||
|
||||
test_preprocessor: $(PREPROCESSOR_TEST_BINARY)
|
||||
$(TEST_SCRIPT) $(PREPROCESSOR_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
|
||||
test_preprocessor_float: $(PREPROCESSOR_FLOAT_TEST_BINARY)
|
||||
$(TEST_SCRIPT) $(PREPROCESSOR_FLOAT_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
|
||||
|
||||
$(PREPROCESSOR_FIXED_TEST_BINARY): $(PREPROCESSOR_FIXED_TEST_OBJS) $(MICROLITE_LIB_PATH)
|
||||
@mkdir -p $(dir $@)
|
||||
$(CXX) $(CXXFLAGS) $(INCLUDES) \
|
||||
-o $(PREPROCESSOR_FIXED_TEST_BINARY) $(PREPROCESSOR_FIXED_TEST_OBJS) \
|
||||
$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
|
||||
|
||||
preprocessor_fixed_test: $(PREPROCESSOR_FIXED_TEST_BINARY)
|
||||
preprocessor_fixed_test_bin: $(PREPROCESSOR_FIXED_TEST_BINARY).bin
|
||||
|
||||
test_preprocessor_fixed: $(PREPROCESSOR_FIXED_TEST_BINARY)
|
||||
$(TEST_SCRIPT) $(PREPROCESSOR_FIXED_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
|
||||
|
||||
$(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
|
||||
@mkdir -p $(dir $@)
|
||||
|
Loading…
Reference in New Issue
Block a user