From 7003be098cc8d10191e78f61dae417c7353b03c7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Nov 2018 14:57:31 -0800 Subject: [PATCH] Fixed point implementation for audio preprocessing in TF Lite Micro PiperOrigin-RevId: 220534796 --- .../micro/examples/micro_speech/BUILD | 26 ++- .../micro_speech/preprocessor_fixed.cc | 218 ++++++++++++++++++ ...{preprocessor.cc => preprocessor_float.cc} | 0 .../experimental/micro/tools/make/Makefile | 45 +++- 4 files changed, 276 insertions(+), 13 deletions(-) create mode 100644 tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc rename tensorflow/lite/experimental/micro/examples/micro_speech/{preprocessor.cc => preprocessor_float.cc} (100%) diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD index 69022b611ed..638ae1467a5 100644 --- a/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD +++ b/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD @@ -32,14 +32,36 @@ tflite_micro_cc_test( ) tflite_micro_cc_test( - name = "preprocessor_test", + name = "preprocessor_float_test", srcs = [ "no_30ms_sample_data.cc", "no_30ms_sample_data.h", "no_power_spectrum_data.cc", "no_power_spectrum_data.h", - "preprocessor.cc", "preprocessor.h", + "preprocessor_float.cc", + "preprocessor_test.cc", + "yes_30ms_sample_data.cc", + "yes_30ms_sample_data.h", + "yes_power_spectrum_data.cc", + "yes_power_spectrum_data.h", + ], + deps = [ + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/experimental/micro:micro_framework", + "//tensorflow/lite/experimental/micro/testing:micro_test", + ], +) + +tflite_micro_cc_test( + name = "preprocessor_fixed_test", + srcs = [ + "no_30ms_sample_data.cc", + "no_30ms_sample_data.h", + "no_power_spectrum_data.cc", + "no_power_spectrum_data.h", + "preprocessor.h", + "preprocessor_fixed.cc", "preprocessor_test.cc", "yes_30ms_sample_data.cc", "yes_30ms_sample_data.h", diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc new file mode 100644 index 00000000000..de60c982f3a --- /dev/null +++ b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc @@ -0,0 +1,218 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Reference implementation of the preprocessing pipeline, with the same +// results as the audio tutorial at +// https://www.tensorflow.org/tutorials/sequences/audio_recognition +// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz, +// so 480 values), and extracts a power spectrum of frequencies. There are 43 +// frequency bands in the result, derived from the original 256 output from the +// discrete Fourier transform, and averaged together in groups of 6. +// It's expected that most platforms will have optimized versions of the +// functions used here, for example replacing the DFT with an FFT, so this +// version shouldn't be used where performance is critical. +// This implementation uses fixed point for any non-constant calculations, +// instead of floating point, to help show how this can work on platforms that +// don't have good float support. + +#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h" + +#include + +namespace { + +// q format notation: qx.y => 1 sign bit, x-1 integer bits, y fraction bits. +// Use standard (non-saturating) arithmetic with signed ints of size x+y bits. +// Sacrifice some precision to avoid use of 64-bit ints. + +// q1.15 * q1.15 => q2.30 +inline int32_t Q1_15_FixedMultiply_Q2_30(int16_t a, int16_t b) { + int32_t big_a = a; + int32_t big_b = b; + return big_a * big_b; +} + +// q2.30 * q2.30 => q10.22 +inline int32_t Q2_30_FixedMultiply_Q10_22(int32_t a, int32_t b) { + // q2.30 result + int32_t tmp = (a >> 15) * (b >> 15); + // q10.22 result + return tmp >> 8; +} + +// q10.22 * q10.22 => q10.22 +// Will overflow if product is >= 512. +// Largest product in small test set is 465.25 +inline int32_t Q10_22_FixedMultiply_Q10_22(int32_t a, int32_t b) { + // q10.22 result + return (a >> 11) * (b >> 11); +} + +// float => q2.30 +// No checking for saturation. Only used for inputs in range [-1, 1]. +inline int32_t FloatToFixed_Q2_30(float input) { + return static_cast(roundf(input * (1 << 30))); +} + +// These constants allow us to allocate fixed-sized arrays on the stack for our +// working memory. +constexpr int kInputSize = 512; +constexpr int kAverageWindowSize = 6; +constexpr int kOutputSize = + ((kInputSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize; + +// Performs a discrete Fourier transform on the real inputs. This corresponds to +// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html, +// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft. +// It takes in an array of float real values, and returns a result of the same +// length with q10.22 fixed point real and imaginary components interleaved, so +// fourier_output[0] is the first real value, fourier_output[1] is the first +// imaginary, fourier_output[2] is the second real, and so on. +// The calling function should ensure that the array passed in as fourier_output +// is at least time_series_size in length. Most optimized FFT implementations +// require the length to be a power of two as well, but this version doesn't +// enforce that. + +// input: q2.30 fixed point. output: q10.22 fixed point. +// Outputs interpreted as q10.22 fixed point are un-scaled. +void CalculateDiscreteFourierTransform(int32_t* time_series, + int time_series_size, + int32_t* fourier_output) { + for (int i = 0; i < time_series_size / 2; ++i) { + int32_t real = 0; + for (int j = 0; j < time_series_size; ++j) { + const int32_t real_scale = + FloatToFixed_Q2_30(cos(j * i * M_PI * 2 / time_series_size)); + real += Q2_30_FixedMultiply_Q10_22(time_series[j], real_scale); + } + int32_t imaginary = 0; + for (int j = 0; j < time_series_size; ++j) { + const int32_t imaginary_scale = + FloatToFixed_Q2_30(sin(j * i * M_PI * 2 / time_series_size)); + imaginary -= Q2_30_FixedMultiply_Q10_22(time_series[j], imaginary_scale); + } + fourier_output[(i * 2) + 0] = real; + fourier_output[(i * 2) + 1] = imaginary; + } +} + +// Produces a simple sine curve that is used to ensure frequencies at the center +// of the current sample window are weighted more heavily than those at the end. +// q1.15 output format. +void CalculatePeriodicHann(int window_length, int16_t* window_function) { + for (int i = 0; i < window_length; ++i) { + const float real_value = (0.5 - 0.5 * cos((2 * M_PI * i) / window_length)); + int tmp = static_cast(roundf(real_value * (1 << 15))); + // Saturate the 0x8000 value to 0x7fff + if (tmp > 0x7fff) tmp = 0x7fff; + window_function[i] = tmp; + } +} + +} // namespace + +TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter, + const int16_t* input, int input_size, int output_size, + uint8_t* output) { + // Ensure our input and output data arrays are valid. + if (input_size > kInputSize) { + error_reporter->Report("Input size %d larger than %d", input_size, + kInputSize); + return kTfLiteError; + } + if (output_size != kOutputSize) { + error_reporter->Report("Requested output size %d doesn't match %d", + output_size, kOutputSize); + return kTfLiteError; + } + + // Pre-calculate the window function we'll be applying to the input data. + // In a real application, we'd calculate this table once in an initialization + // function and store it for repeated reuse. + // q1.15 format. + int16_t window_function[kInputSize]; + CalculatePeriodicHann(input_size, window_function); + + // Apply the window function to our time series input, and pad it with zeroes + // to the next power of two. + int32_t fixed_input[kInputSize]; + for (int i = 0; i < kInputSize; ++i) { + if (i < input_size) { + // input is int16_t. Treat as q1.15 fixed point value in range [-1,1) + // window_function is also q1.15 fixed point number + fixed_input[i] = + Q1_15_FixedMultiply_Q2_30(input[i], window_function[i]); + } else { + fixed_input[i] = 0; + } + } + + // Pull the frequency data from the time series sample. + // Calculated in q10.22 format from q2.30 inputs. + int32_t fourier_values[kInputSize]; + CalculateDiscreteFourierTransform(fixed_input, kInputSize, fourier_values); + + // We have the complex numbers giving us information about each frequency + // band, but all we want to know is how strong each frequency is, so calculate + // the squared magnitude by adding together the squares of each component. + int32_t power_spectrum[kInputSize / 2]; + for (int i = 0; i < (kInputSize / 2); ++i) { + const int32_t real = fourier_values[(i * 2) + 0]; + const int32_t imaginary = fourier_values[(i * 2) + 1]; + // q10.22 results + power_spectrum[i] = + Q10_22_FixedMultiply_Q10_22(real, real) + + Q10_22_FixedMultiply_Q10_22(imaginary, imaginary); + } + + // Finally, reduce the size of the output by averaging together six adjacent + // frequencies into each slot, producing an array of 43 values. + // Power_spectrum numbers are q10.22. Divide by kAverageWindowSize inside + // loop to prevent overflow. + for (int i = 0; i < kOutputSize; ++i) { + int32_t average = 0; + for (int j = 0; j < kAverageWindowSize; ++j) { + const int index = (i * kAverageWindowSize) + j; + if (index < (kInputSize / 2)) { + average += power_spectrum[index] / kAverageWindowSize; + } + } + // Quantize the result into eight bits, effectively multiplying by two. + // The 127.5 constant here has to match the features_max value defined in + // tensorflow/examples/speech_commands/input_data.py, and this also assumes + // that features_min is zero. + // + // q10.22 input + // integer output + // + // output = (input - features_min) * + // (output_max - output_min) / (features_max - features_min) + // == (input) * (255) / (127.5) + // == input * 2 + // == input << 1 + // Also want to round to nearest integer and only keep integer bits + // => ((input << 1) + 0x200000) >> 22 + // == (input + 0x100000) >> 21 + int32_t quantized_average = (average + 0x100000) >> 21; + if (quantized_average < 0) { + quantized_average = 0; + } + if (quantized_average > 255) { + quantized_average = 255; + } + output[i] = quantized_average; + } + return kTfLiteOk; +} diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc similarity index 100% rename from tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc rename to tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc diff --git a/tensorflow/lite/experimental/micro/tools/make/Makefile b/tensorflow/lite/experimental/micro/tools/make/Makefile index 5492003e5af..b182c120d2a 100644 --- a/tensorflow/lite/experimental/micro/tools/make/Makefile +++ b/tensorflow/lite/experimental/micro/tools/make/Makefile @@ -62,12 +62,19 @@ tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc # Test binary for the microcontroller speech model. PREPROCESSOR_TEST_SRCS := \ tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \ -tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \ tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \ tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \ tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \ tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc +PREPROCESSOR_FLOAT_TEST_SRCS = \ +$(PREPROCESSOR_TEST_SRCS) \ +tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc + +PREPROCESSOR_FIXED_TEST_SRCS += \ +$(PREPROCESSOR_TEST_SRCS) \ +tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc + MICROLITE_TEST_SRCS := \ $(wildcard tensorflow/lite/experimental/micro/*test.cc) \ $(wildcard tensorflow/lite/experimental/micro/kernels/*test.cc) @@ -91,7 +98,8 @@ include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc) ALL_SRCS := \ $(MICRO_SPEECH_TEST_SRCS) \ - $(PREPROCESSOR_TEST_SRCS) \ + $(PREPROCESSOR_FLOAT_TEST_SRCS) \ + $(PREPROCESSOR_FIXED_TEST_SRCS) \ $(MICROLITE_CC_SRCS) \ $(MICROLITE_TEST_SRCS) @@ -104,7 +112,8 @@ LIBDIR := $(GENDIR)lib/ MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME) MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test -PREPROCESSOR_TEST_BINARY := $(BINDIR)preprocessor_test +PREPROCESSOR_FLOAT_TEST_BINARY := $(BINDIR)preprocessor_float_test +PREPROCESSOR_FIXED_TEST_BINARY := $(BINDIR)preprocessor_fixed_test CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++ CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc @@ -113,8 +122,11 @@ AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \ $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS)))) -PREPROCESSOR_TEST_OBJS := $(addprefix $(OBJDIR), \ -$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_TEST_SRCS)))) +PREPROCESSOR_FLOAT_TEST_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FLOAT_TEST_SRCS)))) + +PREPROCESSOR_FIXED_TEST_OBJS := $(addprefix $(OBJDIR), \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FIXED_TEST_SRCS)))) MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \ $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS)))) @@ -158,18 +170,29 @@ micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin test_micro_speech: $(MICRO_SPEECH_TEST_BINARY) $(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~' -$(PREPROCESSOR_TEST_BINARY): $(PREPROCESSOR_TEST_OBJS) $(MICROLITE_LIB_PATH) +$(PREPROCESSOR_FLOAT_TEST_BINARY): $(PREPROCESSOR_FLOAT_TEST_OBJS) $(MICROLITE_LIB_PATH) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ - -o $(PREPROCESSOR_TEST_BINARY) $(PREPROCESSOR_TEST_OBJS) \ + -o $(PREPROCESSOR_FLOAT_TEST_BINARY) $(PREPROCESSOR_FLOAT_TEST_OBJS) \ $(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS) -preprocessor_test: $(PREPROCESSOR_TEST_BINARY) -preprocessor_test_bin: $(PREPROCESSOR_TEST_BINARY).bin +preprocessor_float_test: $(PREPROCESSOR_FLOAT_TEST_BINARY) +preprocessor_float_test_bin: $(PREPROCESSOR_FLOAT_TEST_BINARY).bin -test_preprocessor: $(PREPROCESSOR_TEST_BINARY) - $(TEST_SCRIPT) $(PREPROCESSOR_TEST_BINARY) '~~~ALL TESTS PASSED~~~' +test_preprocessor_float: $(PREPROCESSOR_FLOAT_TEST_BINARY) + $(TEST_SCRIPT) $(PREPROCESSOR_FLOAT_TEST_BINARY) '~~~ALL TESTS PASSED~~~' +$(PREPROCESSOR_FIXED_TEST_BINARY): $(PREPROCESSOR_FIXED_TEST_OBJS) $(MICROLITE_LIB_PATH) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(INCLUDES) \ + -o $(PREPROCESSOR_FIXED_TEST_BINARY) $(PREPROCESSOR_FIXED_TEST_OBJS) \ + $(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS) + +preprocessor_fixed_test: $(PREPROCESSOR_FIXED_TEST_BINARY) +preprocessor_fixed_test_bin: $(PREPROCESSOR_FIXED_TEST_BINARY).bin + +test_preprocessor_fixed: $(PREPROCESSOR_FIXED_TEST_BINARY) + $(TEST_SCRIPT) $(PREPROCESSOR_FIXED_TEST_BINARY) '~~~ALL TESTS PASSED~~~' $(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH) @mkdir -p $(dir $@)