Fixed point implementation for audio preprocessing in TF Lite Micro

PiperOrigin-RevId: 220534796
2018-11-07 14:57:31 -08:00 · 2018-11-07 14:57:31 -08:00 · 7003be098c
commit 7003be098c
parent 487a8d7fb2
4 changed files with 276 additions and 13 deletions
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/BUILD
@ -32,14 +32,36 @@ tflite_micro_cc_test(
 )

 tflite_micro_cc_test(
-    name = "preprocessor_test",
+    name = "preprocessor_float_test",
    srcs = [
        "no_30ms_sample_data.cc",
        "no_30ms_sample_data.h",
        "no_power_spectrum_data.cc",
        "no_power_spectrum_data.h",
-        "preprocessor.cc",
        "preprocessor.h",
+        "preprocessor_float.cc",
+        "preprocessor_test.cc",
+        "yes_30ms_sample_data.cc",
+        "yes_30ms_sample_data.h",
+        "yes_power_spectrum_data.cc",
+        "yes_power_spectrum_data.h",
+    ],
+    deps = [
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "preprocessor_fixed_test",
+    srcs = [
+        "no_30ms_sample_data.cc",
+        "no_30ms_sample_data.h",
+        "no_power_spectrum_data.cc",
+        "no_power_spectrum_data.h",
+        "preprocessor.h",
+        "preprocessor_fixed.cc",
        "preprocessor_test.cc",
        "yes_30ms_sample_data.cc",
        "yes_30ms_sample_data.h",
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc
@ -0,0 +1,218 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Reference implementation of the preprocessing pipeline, with the same
+// results as the audio tutorial at
+// https://www.tensorflow.org/tutorials/sequences/audio_recognition
+// This module takes 30ms of PCM-encoded signed 16-bit audio samples (at 16KHz,
+// so 480 values), and extracts a power spectrum of frequencies. There are 43
+// frequency bands in the result, derived from the original 256 output from the
+// discrete Fourier transform, and averaged together in groups of 6.
+// It's expected that most platforms will have optimized versions of the
+// functions used here, for example replacing the DFT with an FFT, so this
+// version shouldn't be used where performance is critical.
+// This implementation uses fixed point for any non-constant calculations,
+// instead of floating point, to help show how this can work on platforms that
+// don't have good float support.
+
+#include "tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.h"
+
+#include <cmath>
+
+namespace {
+
+// q format notation: qx.y => 1 sign bit, x-1 integer bits, y fraction bits.
+// Use standard (non-saturating) arithmetic with signed ints of size x+y bits.
+// Sacrifice some precision to avoid use of 64-bit ints.
+
+// q1.15 * q1.15 => q2.30
+inline int32_t Q1_15_FixedMultiply_Q2_30(int16_t a, int16_t b) {
+  int32_t big_a = a;
+  int32_t big_b = b;
+  return big_a * big_b;
+}
+
+// q2.30 * q2.30 => q10.22
+inline int32_t Q2_30_FixedMultiply_Q10_22(int32_t a, int32_t b) {
+  // q2.30 result
+  int32_t tmp = (a >> 15) * (b >> 15);
+  // q10.22 result
+  return tmp >> 8;
+}
+
+// q10.22 * q10.22 => q10.22
+// Will overflow if product is >= 512.
+// Largest product in small test set is 465.25
+inline int32_t Q10_22_FixedMultiply_Q10_22(int32_t a, int32_t b) {
+  // q10.22 result
+  return (a >> 11) * (b >> 11);
+}
+
+// float => q2.30
+// No checking for saturation.  Only used for inputs in range [-1, 1].
+inline int32_t FloatToFixed_Q2_30(float input) {
+  return static_cast<int32_t>(roundf(input * (1 << 30)));
+}
+
+// These constants allow us to allocate fixed-sized arrays on the stack for our
+// working memory.
+constexpr int kInputSize = 512;
+constexpr int kAverageWindowSize = 6;
+constexpr int kOutputSize =
+    ((kInputSize / 2) + (kAverageWindowSize - 1)) / kAverageWindowSize;
+
+// Performs a discrete Fourier transform on the real inputs. This corresponds to
+// rdft() in the FFT package at http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html,
+// and to kiss_fftr() in KISSFFT at https://github.com/mborgerding/kissfft.
+// It takes in an array of float real values, and returns a result of the same
+// length with q10.22 fixed point real and imaginary components interleaved, so
+// fourier_output[0] is the first real value, fourier_output[1] is the first
+// imaginary, fourier_output[2] is the second real, and so on.
+// The calling function should ensure that the array passed in as fourier_output
+// is at least time_series_size in length. Most optimized FFT implementations
+// require the length to be a power of two as well, but this version doesn't
+// enforce that.
+
+// input: q2.30 fixed point.  output: q10.22 fixed point.
+// Outputs interpreted as q10.22 fixed point are un-scaled.
+void CalculateDiscreteFourierTransform(int32_t* time_series,
+                                       int time_series_size,
+                                       int32_t* fourier_output) {
+  for (int i = 0; i < time_series_size / 2; ++i) {
+    int32_t real = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      const int32_t real_scale =
+          FloatToFixed_Q2_30(cos(j * i * M_PI * 2 / time_series_size));
+      real += Q2_30_FixedMultiply_Q10_22(time_series[j], real_scale);
+    }
+    int32_t imaginary = 0;
+    for (int j = 0; j < time_series_size; ++j) {
+      const int32_t imaginary_scale =
+          FloatToFixed_Q2_30(sin(j * i * M_PI * 2 / time_series_size));
+      imaginary -= Q2_30_FixedMultiply_Q10_22(time_series[j], imaginary_scale);
+    }
+    fourier_output[(i * 2) + 0] = real;
+    fourier_output[(i * 2) + 1] = imaginary;
+  }
+}
+
+// Produces a simple sine curve that is used to ensure frequencies at the center
+// of the current sample window are weighted more heavily than those at the end.
+// q1.15 output format.
+void CalculatePeriodicHann(int window_length, int16_t* window_function) {
+  for (int i = 0; i < window_length; ++i) {
+    const float real_value = (0.5 - 0.5 * cos((2 * M_PI * i) / window_length));
+    int tmp = static_cast<int32_t>(roundf(real_value * (1 << 15)));
+    // Saturate the 0x8000 value to 0x7fff
+    if (tmp > 0x7fff) tmp = 0x7fff;
+    window_function[i] = tmp;
+  }
+}
+
+}  // namespace
+
+TfLiteStatus Preprocess(tflite::ErrorReporter* error_reporter,
+                        const int16_t* input, int input_size, int output_size,
+                        uint8_t* output) {
+  // Ensure our input and output data arrays are valid.
+  if (input_size > kInputSize) {
+    error_reporter->Report("Input size %d larger than %d", input_size,
+                           kInputSize);
+    return kTfLiteError;
+  }
+  if (output_size != kOutputSize) {
+    error_reporter->Report("Requested output size %d doesn't match %d",
+                           output_size, kOutputSize);
+    return kTfLiteError;
+  }
+
+  // Pre-calculate the window function we'll be applying to the input data.
+  // In a real application, we'd calculate this table once in an initialization
+  // function and store it for repeated reuse.
+  // q1.15 format.
+  int16_t window_function[kInputSize];
+  CalculatePeriodicHann(input_size, window_function);
+
+  // Apply the window function to our time series input, and pad it with zeroes
+  // to the next power of two.
+  int32_t fixed_input[kInputSize];
+  for (int i = 0; i < kInputSize; ++i) {
+    if (i < input_size) {
+      // input is int16_t.  Treat as q1.15 fixed point value in range [-1,1)
+      // window_function is also q1.15 fixed point number
+      fixed_input[i] =
+          Q1_15_FixedMultiply_Q2_30(input[i], window_function[i]);
+    } else {
+      fixed_input[i] = 0;
+    }
+  }
+
+  // Pull the frequency data from the time series sample.
+  // Calculated in q10.22 format from q2.30 inputs.
+  int32_t fourier_values[kInputSize];
+  CalculateDiscreteFourierTransform(fixed_input, kInputSize, fourier_values);
+
+  // We have the complex numbers giving us information about each frequency
+  // band, but all we want to know is how strong each frequency is, so calculate
+  // the squared magnitude by adding together the squares of each component.
+  int32_t power_spectrum[kInputSize / 2];
+  for (int i = 0; i < (kInputSize / 2); ++i) {
+    const int32_t real = fourier_values[(i * 2) + 0];
+    const int32_t imaginary = fourier_values[(i * 2) + 1];
+    // q10.22 results
+    power_spectrum[i] =
+        Q10_22_FixedMultiply_Q10_22(real, real) +
+        Q10_22_FixedMultiply_Q10_22(imaginary, imaginary);
+  }
+
+  // Finally, reduce the size of the output by averaging together six adjacent
+  // frequencies into each slot, producing an array of 43 values.
+  // Power_spectrum numbers are q10.22.  Divide by kAverageWindowSize inside
+  // loop to prevent overflow.
+  for (int i = 0; i < kOutputSize; ++i) {
+    int32_t average = 0;
+    for (int j = 0; j < kAverageWindowSize; ++j) {
+      const int index = (i * kAverageWindowSize) + j;
+      if (index < (kInputSize / 2)) {
+        average += power_spectrum[index] / kAverageWindowSize;
+      }
+    }
+    // Quantize the result into eight bits, effectively multiplying by two.
+    // The 127.5 constant here has to match the features_max value defined in
+    // tensorflow/examples/speech_commands/input_data.py, and this also assumes
+    // that features_min is zero.
+    //
+    // q10.22 input
+    // integer output
+    //
+    // output = (input - features_min) *
+    //     (output_max - output_min) / (features_max - features_min)
+    // == (input) * (255) / (127.5)
+    // == input * 2
+    // == input << 1
+    // Also want to round to nearest integer and only keep integer bits
+    // => ((input << 1) + 0x200000) >> 22
+    // == (input + 0x100000) >> 21
+    int32_t quantized_average = (average + 0x100000) >> 21;
+    if (quantized_average < 0) {
+      quantized_average = 0;
+    }
+    if (quantized_average > 255) {
+      quantized_average = 255;
+    }
+    output[i] = quantized_average;
+  }
+  return kTfLiteOk;
+}
--- a/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc
--- a/tensorflow/lite/experimental/micro/tools/make/Makefile
+++ b/tensorflow/lite/experimental/micro/tools/make/Makefile
@ -62,12 +62,19 @@ tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
 # Test binary for the microcontroller speech model.
 PREPROCESSOR_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \
 tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc

+PREPROCESSOR_FLOAT_TEST_SRCS = \
+$(PREPROCESSOR_TEST_SRCS) \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_float.cc
+
+PREPROCESSOR_FIXED_TEST_SRCS += \
+$(PREPROCESSOR_TEST_SRCS) \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_fixed.cc
+
 MICROLITE_TEST_SRCS := \
 $(wildcard tensorflow/lite/experimental/micro/*test.cc) \
 $(wildcard tensorflow/lite/experimental/micro/kernels/*test.cc)
@ -91,7 +98,8 @@ include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)

 ALL_SRCS := \
 	$(MICRO_SPEECH_TEST_SRCS) \
-	$(PREPROCESSOR_TEST_SRCS) \
+	$(PREPROCESSOR_FLOAT_TEST_SRCS) \
+	$(PREPROCESSOR_FIXED_TEST_SRCS) \
 	$(MICROLITE_CC_SRCS) \
 	$(MICROLITE_TEST_SRCS)

@ -104,7 +112,8 @@ LIBDIR := $(GENDIR)lib/
 MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME)

 MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
-PREPROCESSOR_TEST_BINARY := $(BINDIR)preprocessor_test
+PREPROCESSOR_FLOAT_TEST_BINARY := $(BINDIR)preprocessor_float_test
+PREPROCESSOR_FIXED_TEST_BINARY := $(BINDIR)preprocessor_fixed_test

 CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++
 CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc
@ -113,8 +122,11 @@ AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar
 MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
 $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))

-PREPROCESSOR_TEST_OBJS := $(addprefix $(OBJDIR), \
-$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_TEST_SRCS))))
+PREPROCESSOR_FLOAT_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FLOAT_TEST_SRCS))))
+
+PREPROCESSOR_FIXED_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FIXED_TEST_SRCS))))

 MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
 $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
@ -158,18 +170,29 @@ micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
 test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
 	$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'

-$(PREPROCESSOR_TEST_BINARY): $(PREPROCESSOR_TEST_OBJS) $(MICROLITE_LIB_PATH)
+$(PREPROCESSOR_FLOAT_TEST_BINARY): $(PREPROCESSOR_FLOAT_TEST_OBJS) $(MICROLITE_LIB_PATH)
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $(PREPROCESSOR_TEST_BINARY) $(PREPROCESSOR_TEST_OBJS) \
+	-o $(PREPROCESSOR_FLOAT_TEST_BINARY) $(PREPROCESSOR_FLOAT_TEST_OBJS) \
 	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)

-preprocessor_test: $(PREPROCESSOR_TEST_BINARY)
-preprocessor_test_bin: $(PREPROCESSOR_TEST_BINARY).bin
+preprocessor_float_test: $(PREPROCESSOR_FLOAT_TEST_BINARY)
+preprocessor_float_test_bin: $(PREPROCESSOR_FLOAT_TEST_BINARY).bin

-test_preprocessor: $(PREPROCESSOR_TEST_BINARY)
-	$(TEST_SCRIPT) $(PREPROCESSOR_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+test_preprocessor_float: $(PREPROCESSOR_FLOAT_TEST_BINARY)
+	$(TEST_SCRIPT) $(PREPROCESSOR_FLOAT_TEST_BINARY) '~~~ALL TESTS PASSED~~~'

+$(PREPROCESSOR_FIXED_TEST_BINARY): $(PREPROCESSOR_FIXED_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(PREPROCESSOR_FIXED_TEST_BINARY) $(PREPROCESSOR_FIXED_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+
+preprocessor_fixed_test: $(PREPROCESSOR_FIXED_TEST_BINARY)
+preprocessor_fixed_test_bin: $(PREPROCESSOR_FIXED_TEST_BINARY).bin
+
+test_preprocessor_fixed: $(PREPROCESSOR_FIXED_TEST_BINARY)
+	$(TEST_SCRIPT) $(PREPROCESSOR_FIXED_TEST_BINARY) '~~~ALL TESTS PASSED~~~'

 $(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
 	@mkdir -p $(dir $@)