Fix xtensa_hifimini build.

The bottom line is that we need to have clear separation between the different
targets which is what this change does.

PiperOrigin-RevId: 319834782
Change-Id: I33a08ac9b45a61b3b3e019fe854577a6561573e4
This commit is contained in:
Advait Jain 2020-07-06 12:42:07 -07:00 committed by TensorFlower Gardener
parent e03404596a
commit ee95c09271
16 changed files with 192 additions and 165 deletions

View File

@ -15,6 +15,11 @@ config_setting(
define_values = {"tflm_build": "xtensa_hifimini"},
)
config_setting(
name = "xtensa_hifimini_staging",
define_values = {"tflm_build": "xtensa_hifimini_staging"},
)
package_group(
name = "micro_top_level",
packages = ["//tensorflow/lite/micro"],
@ -72,6 +77,20 @@ cc_library(
"xtensa_hifimini/softmax.cc",
"xtensa_hifimini/svdf.cc",
],
":xtensa_hifimini_staging": [
# TODO(b/144176795): finer granularity would help reduce the
# duplication of srcs in the BUILD rules (in this case conv.cc and
# depthwise_conv.cc). We are falling back to reference kernels in
# case the optimized kernels are not implemented to match the
# behavior that we get with the Makefiles.
"conv.cc",
"depthwise_conv.cc",
"xtensa_hifimini/fixedpoint_utils.h",
"xtensa_hifimini_staging/fully_connected.cc",
"xtensa_hifimini_staging/quantize.cc",
"xtensa_hifimini_staging/softmax.cc",
"xtensa_hifimini_staging/svdf.cc",
],
}),
hdrs = ["micro_ops.h"],
# TODO(b/153609488): enable embedded build once we can properly support it.

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
namespace tflite {
namespace ops {

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
namespace tflite {
namespace ops {

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
namespace tflite {
namespace ops {

View File

@ -21,7 +21,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
namespace tflite {
namespace ops {

View File

@ -24,7 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/kernels/activation_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
namespace tflite {
namespace ops {

View File

@ -1,153 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
#include <xtensa/tie/xt_hifi2.h>
#include <algorithm>
#include <cmath>
#include <cstdint>
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
namespace ops {
namespace micro {
namespace xtensa {
namespace hifimini {
// INT24 MIN/MAX
#define INT24_MIN -8388608
#define INT24_MAX 8388607
//
// Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit
// aligned value in the QR register.
//
inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2,
int32_t quantized_multiplier,
int shift) {
// A value with 1 sign bit, N integer bits and M fractional bits is
// represented as QN+1.M since the sign bit is included in the integer bits.
//
// The Q notation in this method explains the values represented in each
// variable, along with an implicit division since the quantized_multiplier
// represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision
// of the type).
//
// Load the quantized multiplier into the PR register.
// NOTE: This method assumes that this param has been calculated for 24bit
// space - not 32bits.
// Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier.
ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier);
// Shift right by 23 - 16 bits minus the specified shift. This is because we
// keep 16 fractional bits until the end to perform rounding. Subtract shift
// since shift is a left shift, and the 23-16 is a right shift.
int shift_amount = 7 - shift;
// Find the product of x and the quantized_multiplier.
// Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23
// Q48.0 / 2^23 >> 7 = Q48.0 / 2^16
ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
// Shift right if shift amount is positive, left if shift amount is negative.
if (shift_amount >= 0) {
result_56 = AE_Q56S_SRA(result_56, shift_amount);
} else {
result_56 = AE_Q56S_SLA(result_56, -shift_amount);
}
// Round off the bottom 16 bits.
// Q48.0 / 2^16 -> Q32.0 aligned to 48 bits.
result_56 = AE_ROUNDSQ32SYM(result_56);
return result_56;
}
//
// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
// aligned value in the QR register.
//
inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
int32_t quantized_multiplier,
int shift) {
// Convert x into a 2x24bit PR register file. If x is outside the numerical
// limits of a 24bit integer, the "fractional" or lower 8bits are discarded.
// If x is within the range of a 24 bit integer, the "signed" or upper 8bits
// are discarded.
ae_p24x2s x_24x2;
if (x > INT24_MIN && x < INT24_MAX) {
x_24x2 = AE_MOVPA24(x);
} else {
x_24x2 = static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&x));
shift += 8;
}
return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift);
}
//
// Calculate quantization params for 24bit runtimes.
//
inline void QuantizeMultiplier(float multiplier, int32_t* quantized_multiplier,
int* shift) {
if (multiplier == 0.0f) {
*quantized_multiplier = 0;
*shift = 0;
return;
}
// Special cased to 24bit:
const float q = std::frexp(multiplier, shift);
auto q_fixed = static_cast<int64_t>(std::round(q * (1 << 23)));
TFLITE_CHECK(q_fixed <= (1 << 23));
if (q_fixed == (1 << 23)) {
q_fixed /= 2;
++*shift;
}
TFLITE_CHECK_LE(q_fixed, INT24_MAX);
// Ensure shift does not exceed 24-bit range.
TFLITE_CHECK_LE(*shift, 23);
if (*shift < -23) {
*shift = 0;
q_fixed = 0;
}
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
//
// Convert a floating point number to a Q representation for 24 bit integers.
//
inline int CreateQConstantForInt24(int integer_bits, float f) {
const float min_bounds = static_cast<float>(INT24_MIN);
const float max_bounds = static_cast<float>(INT24_MAX);
int fractional_bits = 23 - integer_bits;
float raw = std::round(f * static_cast<float>(1 << fractional_bits));
raw = std::max(raw, min_bounds);
raw = std::min(raw, max_bounds);
return static_cast<int>(raw);
}
} // namespace hifimini
} // namespace xtensa
} // namespace micro
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_

View File

@ -21,7 +21,7 @@
# logs for the test to pass.
declare -r ROOT_DIR=`pwd`
declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_binary/
declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
mkdir -p ${MICRO_LOG_PATH}

View File

@ -0,0 +1,38 @@
#!/bin/bash -e
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Tests an Xtensa XPG binary by parsing the log output.
#
# First argument is the binary location.
# Second argument is a regular expression that's required to be in the output
# logs for the test to pass.
declare -r ROOT_DIR=`pwd`
declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_staging_binary/
declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
mkdir -p ${MICRO_LOG_PATH}
xt-run --xtensa-core=${XTENSA_CORE} $1 2>&1 | tee ${MICRO_LOG_FILENAME}
if grep -q "$2" ${MICRO_LOG_FILENAME}
then
echo "$1: PASS"
exit 0
else
echo "$1: FAIL - '$2' not found in logs."
exit 1
fi

View File

@ -1,6 +1,6 @@
ifneq ($(filter xtensa-xpg, $(ALL_TAGS)),)
ifneq ($(filter xtensa_hifimini_staging, $(ALL_TAGS)),)
XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini
XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini_staging
ifneq (,$(filter xtensa_hifimini%, $(ALL_TAGS)))

View File

@ -1,11 +1,11 @@
# Settings for Xtensa XPG toolchain.
# Settings for Xtensa toolchain for the hifimini kernels.
# REQUIRED:
# - RI2019.2 Toolkit (for xt-clang/xt-clang++).
# - XTENSA_CORE: The name of the core to use, will cause a compiler exception
# without providing a core.
ifeq ($(TARGET), xtensa-xpg)
TARGET_ARCH := xtensa-xpg
ifeq ($(TARGET), xtensa_hifimini)
TARGET_ARCH := xtensa_hifimini
PLATFORM_ARGS = \
-DTF_LITE_MCU_DEBUG_LOG \
@ -29,7 +29,7 @@ ifeq ($(TARGET), xtensa-xpg)
LDFLAGS += -Wl,-gc-sections
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_binary.sh
# TODO(b/156962140): This manually maintained list of excluded examples is
# quite error prone.

View File

@ -0,0 +1,45 @@
# Settings for Xtensa toolchain for hifimini_staging kernels.
# REQUIRED:
# - RI2019.2 Toolkit (for xt-clang/xt-clang++).
# - XTENSA_CORE: The name of the core to use, will cause a compiler exception
# without providing a core.
ifeq ($(TARGET), xtensa_hifimini_staging)
TARGET_ARCH := xtensa_hifimini_staging
PLATFORM_ARGS = \
-DTF_LITE_MCU_DEBUG_LOG \
--xtensa-core=$(XTENSA_CORE) \
-mcoproc \
-DXTENSA -DMAX_RFFT_PWR=9 -DMIN_RFFT_PWR=MAX_RFFT_PWR \
-fdata-sections \
-ffunction-sections \
-fno-exceptions \
-fno-unwind-tables \
-fno-use-cxa-atexit \
-fmessage-length=0 \
-fno-threadsafe-statics
TARGET_TOOLCHAIN_PREFIX := xt-
CXX_TOOL := clang++
CC_TOOL := clang
CXXFLAGS += $(PLATFORM_ARGS)
CCFLAGS += $(PLATFORM_ARGS)
LDFLAGS += -Wl,-gc-sections
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
# TODO(b/156962140): This manually maintained list of excluded examples is
# quite error prone.
EXCLUDED_EXAMPLE_TESTS := \
tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc \
tensorflow/lite/micro/examples/magic_wand/Makefile.inc \
tensorflow/lite/micro/examples/micro_speech/Makefile.inc \
tensorflow/lite/micro/examples/network_tester/Makefile.inc \
tensorflow/lite/micro/examples/person_detection/Makefile.inc \
tensorflow/lite/micro/examples/person_detection_experimental/Makefile.inc
MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS))
endif

View File

@ -0,0 +1,50 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Reference implementation of the DebugLog() function that's required for a
// platform to support the TensorFlow Lite for Microcontrollers library. This is
// the only function that's absolutely required to be available on a target
// device, since it's used for communicating test results back to the host so
// that we can verify the implementation is working correctly.
// It's designed to be as easy as possible to supply an implementation though.
// On platforms that have a POSIX stack or C library, it can be written as a
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
// stream of the console, but if there's no OS or C library available, there's
// almost always an equivalent way to write out a string to some serial
// interface that can be used instead. For example on Arm M-series MCUs, calling
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
// whatever debug serial connection is available. If you're running mbed, you
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
// `pc.printf("%s", s)`.
// To add an equivalent function for your own platform, create your own
// implementation file, and place it in a subfolder with named after the OS
// you're targeting. For example, see the Cortex M bare metal version in
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
// tensorflow/lite/micro/mbed/debug_log.cc.
#include "tensorflow/lite/micro/debug_log.h"
#ifndef TF_LITE_STRIP_ERROR_STRINGS
#include <cstdio>
#endif
extern "C" void DebugLog(const char* s) {
#ifndef TF_LITE_STRIP_ERROR_STRINGS
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
// maximum reduction in binary size. This is because we have DebugLog calls
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
fprintf(stderr, "%s", s);
#endif
}

View File

@ -0,0 +1,28 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// Xtensa implementation of micro_timer.
// To include this with make, add TAGS=xtensa-xpg.
#include "tensorflow/lite/micro/micro_time.h"
#include <time.h>
namespace tflite {
int32_t ticks_per_second() { return CLOCKS_PER_SEC; }
int32_t GetCurrentTimeTicks() { return clock(); }
} // namespace tflite