Fix xtensa_hifimini build.

The bottom line is that we need to have clear separation between the different targets which is what this change does. PiperOrigin-RevId: 319834782 Change-Id: I33a08ac9b45a61b3b3e019fe854577a6561573e4
2020-07-06 12:42:07 -07:00 · 2020-07-06 12:42:07 -07:00 · ee95c09271
commit ee95c09271
parent e03404596a
16 changed files with 192 additions and 165 deletions
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@ -15,6 +15,11 @@ config_setting(
    define_values = {"tflm_build": "xtensa_hifimini"},
 )

+config_setting(
+    name = "xtensa_hifimini_staging",
+    define_values = {"tflm_build": "xtensa_hifimini_staging"},
+)
+
 package_group(
    name = "micro_top_level",
    packages = ["//tensorflow/lite/micro"],
@ -72,6 +77,20 @@ cc_library(
            "xtensa_hifimini/softmax.cc",
            "xtensa_hifimini/svdf.cc",
        ],
+        ":xtensa_hifimini_staging": [
+            # TODO(b/144176795): finer granularity would help reduce the
+            # duplication of srcs in the BUILD rules (in this case conv.cc and
+            # depthwise_conv.cc). We are falling back to reference kernels in
+            # case the optimized kernels are not implemented to match the
+            # behavior that we get with the Makefiles.
+            "conv.cc",
+            "depthwise_conv.cc",
+            "xtensa_hifimini/fixedpoint_utils.h",
+            "xtensa_hifimini_staging/fully_connected.cc",
+            "xtensa_hifimini_staging/quantize.cc",
+            "xtensa_hifimini_staging/softmax.cc",
+            "xtensa_hifimini_staging/svdf.cc",
+        ],
    }),
    hdrs = ["micro_ops.h"],
    # TODO(b/153609488): enable embedded build once we can properly support it.
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
+#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"

 namespace tflite {
 namespace ops {
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
+#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"

 namespace tflite {
 namespace ops {
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
+#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"

 namespace tflite {
 namespace ops {
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
@ -21,7 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
+#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"

 namespace tflite {
 namespace ops {
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
@ -24,7 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/activation_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
+#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"

 namespace tflite {
 namespace ops {
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fixedpoint_utils.h
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fixedpoint_utils.h
@ -1,153 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
-
-#include <xtensa/tie/xt_hifi2.h>
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-
-namespace tflite {
-namespace ops {
-namespace micro {
-namespace xtensa {
-namespace hifimini {
-
-// INT24 MIN/MAX
-#define INT24_MIN -8388608
-#define INT24_MAX 8388607
-
-//
-// Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit
-// aligned value in the QR register.
-//
-inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // A value with 1 sign bit, N integer bits and M fractional bits is
-  // represented as QN+1.M since the sign bit is included in the integer bits.
-  //
-  // The Q notation in this method explains the values represented in each
-  // variable, along with an implicit division since the quantized_multiplier
-  // represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision
-  // of the type).
-  //
-  // Load the quantized multiplier into the PR register.
-  // NOTE: This method assumes that this param has been calculated for 24bit
-  // space - not 32bits.
-  // Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier.
-  ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier);
-  // Shift right by 23 - 16 bits minus the specified shift.  This is because we
-  // keep 16 fractional bits until the end to perform rounding.  Subtract shift
-  // since shift is a left shift, and the 23-16 is a right shift.
-  int shift_amount = 7 - shift;
-
-  // Find the product of x and the quantized_multiplier.
-  // Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23
-  // Q48.0 / 2^23 >> 7 = Q48.0 / 2^16
-  ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
-
-  // Shift right if shift amount is positive, left if shift amount is negative.
-  if (shift_amount >= 0) {
-    result_56 = AE_Q56S_SRA(result_56, shift_amount);
-  } else {
-    result_56 = AE_Q56S_SLA(result_56, -shift_amount);
-  }
-
-  // Round off the bottom 16 bits.
-  // Q48.0 / 2^16 -> Q32.0 aligned to 48 bits.
-  result_56 = AE_ROUNDSQ32SYM(result_56);
-  return result_56;
-}
-
-//
-// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
-// aligned value in the QR register.
-//
-inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // Convert x into a 2x24bit PR register file. If x is outside the numerical
-  // limits of a 24bit integer, the "fractional" or lower 8bits are discarded.
-  // If x is within the range of a 24 bit integer, the "signed" or upper 8bits
-  // are discarded.
-  ae_p24x2s x_24x2;
-  if (x > INT24_MIN && x < INT24_MAX) {
-    x_24x2 = AE_MOVPA24(x);
-  } else {
-    x_24x2 = static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&x));
-    shift += 8;
-  }
-
-  return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift);
-}
-
-//
-// Calculate quantization params for 24bit runtimes.
-//
-inline void QuantizeMultiplier(float multiplier, int32_t* quantized_multiplier,
-                               int* shift) {
-  if (multiplier == 0.0f) {
-    *quantized_multiplier = 0;
-    *shift = 0;
-    return;
-  }
-
-  // Special cased to 24bit:
-  const float q = std::frexp(multiplier, shift);
-  auto q_fixed = static_cast<int64_t>(std::round(q * (1 << 23)));
-
-  TFLITE_CHECK(q_fixed <= (1 << 23));
-  if (q_fixed == (1 << 23)) {
-    q_fixed /= 2;
-    ++*shift;
-  }
-  TFLITE_CHECK_LE(q_fixed, INT24_MAX);
-
-  // Ensure shift does not exceed 24-bit range.
-  TFLITE_CHECK_LE(*shift, 23);
-  if (*shift < -23) {
-    *shift = 0;
-    q_fixed = 0;
-  }
-  *quantized_multiplier = static_cast<int32_t>(q_fixed);
-}
-
-//
-// Convert a floating point number to a Q representation for 24 bit integers.
-//
-inline int CreateQConstantForInt24(int integer_bits, float f) {
-  const float min_bounds = static_cast<float>(INT24_MIN);
-  const float max_bounds = static_cast<float>(INT24_MAX);
-
-  int fractional_bits = 23 - integer_bits;
-  float raw = std::round(f * static_cast<float>(1 << fractional_bits));
-  raw = std::max(raw, min_bounds);
-  raw = std::min(raw, max_bounds);
-  return static_cast<int>(raw);
-}
-
-}  // namespace hifimini
-}  // namespace xtensa
-}  // namespace micro
-}  // namespace ops
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
--- a/tensorflow/lite/micro/testing/test_xtensa_hifimini_binary.sh
+++ b/tensorflow/lite/micro/testing/test_xtensa_hifimini_binary.sh
@ -21,7 +21,7 @@
 # logs for the test to pass.

 declare -r ROOT_DIR=`pwd`
-declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
+declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_binary/
 declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
 declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
 mkdir -p ${MICRO_LOG_PATH}
--- a/tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
+++ b/tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
@ -0,0 +1,38 @@
+#!/bin/bash -e
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Tests an Xtensa XPG binary by parsing the log output.
+#
+# First argument is the binary location.
+# Second argument is a regular expression that's required to be in the output
+# logs for the test to pass.
+
+declare -r ROOT_DIR=`pwd`
+declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_staging_binary/
+declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
+declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
+mkdir -p ${MICRO_LOG_PATH}
+
+xt-run --xtensa-core=${XTENSA_CORE} $1 2>&1 | tee ${MICRO_LOG_FILENAME}
+
+if grep -q "$2" ${MICRO_LOG_FILENAME}
+then
+  echo "$1: PASS"
+  exit 0
+else
+  echo "$1: FAIL - '$2' not found in logs."
+  exit 1
+fi
--- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifimini_staging_nn_library.inc
+++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifimini_staging_nn_library.inc
@ -1,6 +1,6 @@
-ifneq ($(filter xtensa-xpg, $(ALL_TAGS)),)
+ifneq ($(filter xtensa_hifimini_staging, $(ALL_TAGS)),)

-    XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini
+    XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini_staging

    ifneq (,$(filter xtensa_hifimini%, $(ALL_TAGS)))

--- a/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc
+++ b/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_makefile.inc
@ -1,11 +1,11 @@
-# Settings for Xtensa XPG toolchain.
+# Settings for Xtensa toolchain for the hifimini kernels.
 # REQUIRED:
 #   - RI2019.2 Toolkit (for xt-clang/xt-clang++).
 #   - XTENSA_CORE: The name of the core to use, will cause a compiler exception
 #                  without providing a core.

-ifeq ($(TARGET), xtensa-xpg)
-  TARGET_ARCH := xtensa-xpg
+ifeq ($(TARGET), xtensa_hifimini)
+  TARGET_ARCH := xtensa_hifimini

  PLATFORM_ARGS = \
    -DTF_LITE_MCU_DEBUG_LOG \
@ -29,7 +29,7 @@ ifeq ($(TARGET), xtensa-xpg)

  LDFLAGS += -Wl,-gc-sections

-  TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
+  TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_binary.sh

  # TODO(b/156962140): This manually maintained list of excluded examples is
  # quite error prone.
--- a/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_staging_makefile.inc
+++ b/tensorflow/lite/micro/tools/make/targets/xtensa_hifimini_staging_makefile.inc
@ -0,0 +1,45 @@
+# Settings for Xtensa toolchain for hifimini_staging kernels.
+# REQUIRED:
+#   - RI2019.2 Toolkit (for xt-clang/xt-clang++).
+#   - XTENSA_CORE: The name of the core to use, will cause a compiler exception
+#                  without providing a core.
+
+ifeq ($(TARGET), xtensa_hifimini_staging)
+  TARGET_ARCH := xtensa_hifimini_staging
+
+  PLATFORM_ARGS = \
+    -DTF_LITE_MCU_DEBUG_LOG \
+    --xtensa-core=$(XTENSA_CORE) \
+    -mcoproc \
+    -DXTENSA -DMAX_RFFT_PWR=9 -DMIN_RFFT_PWR=MAX_RFFT_PWR \
+    -fdata-sections \
+    -ffunction-sections \
+    -fno-exceptions \
+    -fno-unwind-tables \
+    -fno-use-cxa-atexit \
+    -fmessage-length=0 \
+    -fno-threadsafe-statics
+
+  TARGET_TOOLCHAIN_PREFIX := xt-
+  CXX_TOOL := clang++
+  CC_TOOL := clang
+
+  CXXFLAGS += $(PLATFORM_ARGS)
+  CCFLAGS += $(PLATFORM_ARGS)
+
+  LDFLAGS += -Wl,-gc-sections
+
+  TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
+
+  # TODO(b/156962140): This manually maintained list of excluded examples is
+  # quite error prone.
+  EXCLUDED_EXAMPLE_TESTS := \
+    tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc \
+    tensorflow/lite/micro/examples/magic_wand/Makefile.inc \
+    tensorflow/lite/micro/examples/micro_speech/Makefile.inc \
+    tensorflow/lite/micro/examples/network_tester/Makefile.inc \
+    tensorflow/lite/micro/examples/person_detection/Makefile.inc \
+    tensorflow/lite/micro/examples/person_detection_experimental/Makefile.inc
+  MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS))
+
+endif
--- a/tensorflow/lite/micro/xtensa_hifimini/debug_log.cc
+++ b/tensorflow/lite/micro/xtensa_hifimini/debug_log.cc
--- a/tensorflow/lite/micro/xtensa_hifimini/micro_time.cc
+++ b/tensorflow/lite/micro/xtensa_hifimini/micro_time.cc
--- a/tensorflow/lite/micro/xtensa_hifimini_staging/debug_log.cc
+++ b/tensorflow/lite/micro/xtensa_hifimini_staging/debug_log.cc
@ -0,0 +1,50 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Reference implementation of the DebugLog() function that's required for a
+// platform to support the TensorFlow Lite for Microcontrollers library. This is
+// the only function that's absolutely required to be available on a target
+// device, since it's used for communicating test results back to the host so
+// that we can verify the implementation is working correctly.
+// It's designed to be as easy as possible to supply an implementation though.
+// On platforms that have a POSIX stack or C library, it can be written as a
+// single call to `fprintf(stderr, "%s", s)` to output a string to the error
+// stream of the console, but if there's no OS or C library available, there's
+// almost always an equivalent way to write out a string to some serial
+// interface that can be used instead. For example on Arm M-series MCUs, calling
+// the `bkpt #0xAB` assembler instruction will output the string in r1 to
+// whatever debug serial connection is available. If you're running mbed, you
+// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
+// `pc.printf("%s", s)`.
+// To add an equivalent function for your own platform, create your own
+// implementation file, and place it in a subfolder with named after the OS
+// you're targeting. For example, see the Cortex M bare metal version in
+// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
+// tensorflow/lite/micro/mbed/debug_log.cc.
+
+#include "tensorflow/lite/micro/debug_log.h"
+
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+#include <cstdio>
+#endif
+
+extern "C" void DebugLog(const char* s) {
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+  // Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
+  // maximum reduction in binary size. This is because we have DebugLog calls
+  // via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
+  fprintf(stderr, "%s", s);
+#endif
+}
--- a/tensorflow/lite/micro/xtensa_hifimini_staging/micro_time.cc
+++ b/tensorflow/lite/micro/xtensa_hifimini_staging/micro_time.cc
@ -0,0 +1,28 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Xtensa implementation of micro_timer.
+// To include this with make, add TAGS=xtensa-xpg.
+#include "tensorflow/lite/micro/micro_time.h"
+
+#include <time.h>
+
+namespace tflite {
+
+int32_t ticks_per_second() { return CLOCKS_PER_SEC; }
+
+int32_t GetCurrentTimeTicks() { return clock(); }
+
+}  // namespace tflite