Fix xtensa_hifimini build.
The bottom line is that we need to have clear separation between the different targets which is what this change does. PiperOrigin-RevId: 319834782 Change-Id: I33a08ac9b45a61b3b3e019fe854577a6561573e4
This commit is contained in:
parent
e03404596a
commit
ee95c09271
@ -15,6 +15,11 @@ config_setting(
|
||||
define_values = {"tflm_build": "xtensa_hifimini"},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "xtensa_hifimini_staging",
|
||||
define_values = {"tflm_build": "xtensa_hifimini_staging"},
|
||||
)
|
||||
|
||||
package_group(
|
||||
name = "micro_top_level",
|
||||
packages = ["//tensorflow/lite/micro"],
|
||||
@ -72,6 +77,20 @@ cc_library(
|
||||
"xtensa_hifimini/softmax.cc",
|
||||
"xtensa_hifimini/svdf.cc",
|
||||
],
|
||||
":xtensa_hifimini_staging": [
|
||||
# TODO(b/144176795): finer granularity would help reduce the
|
||||
# duplication of srcs in the BUILD rules (in this case conv.cc and
|
||||
# depthwise_conv.cc). We are falling back to reference kernels in
|
||||
# case the optimized kernels are not implemented to match the
|
||||
# behavior that we get with the Makefiles.
|
||||
"conv.cc",
|
||||
"depthwise_conv.cc",
|
||||
"xtensa_hifimini/fixedpoint_utils.h",
|
||||
"xtensa_hifimini_staging/fully_connected.cc",
|
||||
"xtensa_hifimini_staging/quantize.cc",
|
||||
"xtensa_hifimini_staging/softmax.cc",
|
||||
"xtensa_hifimini_staging/svdf.cc",
|
||||
],
|
||||
}),
|
||||
hdrs = ["micro_ops.h"],
|
||||
# TODO(b/153609488): enable embedded build once we can properly support it.
|
||||
|
@ -24,7 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
|
@ -24,7 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/padding.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
|
@ -24,7 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
|
@ -21,7 +21,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
|
@ -24,7 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
|
@ -1,153 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
|
||||
#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
|
||||
|
||||
#include <xtensa/tie/xt_hifi2.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/compatibility.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace xtensa {
|
||||
namespace hifimini {
|
||||
|
||||
// INT24 MIN/MAX
|
||||
#define INT24_MIN -8388608
|
||||
#define INT24_MAX 8388607
|
||||
|
||||
//
|
||||
// Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit
|
||||
// aligned value in the QR register.
|
||||
//
|
||||
inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
// A value with 1 sign bit, N integer bits and M fractional bits is
|
||||
// represented as QN+1.M since the sign bit is included in the integer bits.
|
||||
//
|
||||
// The Q notation in this method explains the values represented in each
|
||||
// variable, along with an implicit division since the quantized_multiplier
|
||||
// represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision
|
||||
// of the type).
|
||||
//
|
||||
// Load the quantized multiplier into the PR register.
|
||||
// NOTE: This method assumes that this param has been calculated for 24bit
|
||||
// space - not 32bits.
|
||||
// Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier.
|
||||
ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier);
|
||||
// Shift right by 23 - 16 bits minus the specified shift. This is because we
|
||||
// keep 16 fractional bits until the end to perform rounding. Subtract shift
|
||||
// since shift is a left shift, and the 23-16 is a right shift.
|
||||
int shift_amount = 7 - shift;
|
||||
|
||||
// Find the product of x and the quantized_multiplier.
|
||||
// Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23
|
||||
// Q48.0 / 2^23 >> 7 = Q48.0 / 2^16
|
||||
ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
|
||||
|
||||
// Shift right if shift amount is positive, left if shift amount is negative.
|
||||
if (shift_amount >= 0) {
|
||||
result_56 = AE_Q56S_SRA(result_56, shift_amount);
|
||||
} else {
|
||||
result_56 = AE_Q56S_SLA(result_56, -shift_amount);
|
||||
}
|
||||
|
||||
// Round off the bottom 16 bits.
|
||||
// Q48.0 / 2^16 -> Q32.0 aligned to 48 bits.
|
||||
result_56 = AE_ROUNDSQ32SYM(result_56);
|
||||
return result_56;
|
||||
}
|
||||
|
||||
//
|
||||
// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
|
||||
// aligned value in the QR register.
|
||||
//
|
||||
inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
|
||||
int32_t quantized_multiplier,
|
||||
int shift) {
|
||||
// Convert x into a 2x24bit PR register file. If x is outside the numerical
|
||||
// limits of a 24bit integer, the "fractional" or lower 8bits are discarded.
|
||||
// If x is within the range of a 24 bit integer, the "signed" or upper 8bits
|
||||
// are discarded.
|
||||
ae_p24x2s x_24x2;
|
||||
if (x > INT24_MIN && x < INT24_MAX) {
|
||||
x_24x2 = AE_MOVPA24(x);
|
||||
} else {
|
||||
x_24x2 = static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&x));
|
||||
shift += 8;
|
||||
}
|
||||
|
||||
return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift);
|
||||
}
|
||||
|
||||
//
|
||||
// Calculate quantization params for 24bit runtimes.
|
||||
//
|
||||
inline void QuantizeMultiplier(float multiplier, int32_t* quantized_multiplier,
|
||||
int* shift) {
|
||||
if (multiplier == 0.0f) {
|
||||
*quantized_multiplier = 0;
|
||||
*shift = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Special cased to 24bit:
|
||||
const float q = std::frexp(multiplier, shift);
|
||||
auto q_fixed = static_cast<int64_t>(std::round(q * (1 << 23)));
|
||||
|
||||
TFLITE_CHECK(q_fixed <= (1 << 23));
|
||||
if (q_fixed == (1 << 23)) {
|
||||
q_fixed /= 2;
|
||||
++*shift;
|
||||
}
|
||||
TFLITE_CHECK_LE(q_fixed, INT24_MAX);
|
||||
|
||||
// Ensure shift does not exceed 24-bit range.
|
||||
TFLITE_CHECK_LE(*shift, 23);
|
||||
if (*shift < -23) {
|
||||
*shift = 0;
|
||||
q_fixed = 0;
|
||||
}
|
||||
*quantized_multiplier = static_cast<int32_t>(q_fixed);
|
||||
}
|
||||
|
||||
//
|
||||
// Convert a floating point number to a Q representation for 24 bit integers.
|
||||
//
|
||||
inline int CreateQConstantForInt24(int integer_bits, float f) {
|
||||
const float min_bounds = static_cast<float>(INT24_MIN);
|
||||
const float max_bounds = static_cast<float>(INT24_MAX);
|
||||
|
||||
int fractional_bits = 23 - integer_bits;
|
||||
float raw = std::round(f * static_cast<float>(1 << fractional_bits));
|
||||
raw = std::max(raw, min_bounds);
|
||||
raw = std::min(raw, max_bounds);
|
||||
return static_cast<int>(raw);
|
||||
}
|
||||
|
||||
} // namespace hifimini
|
||||
} // namespace xtensa
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
|
@ -21,7 +21,7 @@
|
||||
# logs for the test to pass.
|
||||
|
||||
declare -r ROOT_DIR=`pwd`
|
||||
declare -r TEST_TMPDIR=/tmp/test_bluepill_binary/
|
||||
declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_binary/
|
||||
declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
|
||||
declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
|
||||
mkdir -p ${MICRO_LOG_PATH}
|
38
tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
Executable file
38
tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/bin/bash -e
|
||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
#
|
||||
# Tests an Xtensa XPG binary by parsing the log output.
|
||||
#
|
||||
# First argument is the binary location.
|
||||
# Second argument is a regular expression that's required to be in the output
|
||||
# logs for the test to pass.
|
||||
|
||||
declare -r ROOT_DIR=`pwd`
|
||||
declare -r TEST_TMPDIR=/tmp/test_xtensa_hifimini_staging_binary/
|
||||
declare -r MICRO_LOG_PATH=${TEST_TMPDIR}/$1
|
||||
declare -r MICRO_LOG_FILENAME=${MICRO_LOG_PATH}/logs.txt
|
||||
mkdir -p ${MICRO_LOG_PATH}
|
||||
|
||||
xt-run --xtensa-core=${XTENSA_CORE} $1 2>&1 | tee ${MICRO_LOG_FILENAME}
|
||||
|
||||
if grep -q "$2" ${MICRO_LOG_FILENAME}
|
||||
then
|
||||
echo "$1: PASS"
|
||||
exit 0
|
||||
else
|
||||
echo "$1: FAIL - '$2' not found in logs."
|
||||
exit 1
|
||||
fi
|
@ -1,6 +1,6 @@
|
||||
ifneq ($(filter xtensa-xpg, $(ALL_TAGS)),)
|
||||
ifneq ($(filter xtensa_hifimini_staging, $(ALL_TAGS)),)
|
||||
|
||||
XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini
|
||||
XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifimini_staging
|
||||
|
||||
ifneq (,$(filter xtensa_hifimini%, $(ALL_TAGS)))
|
||||
|
@ -1,11 +1,11 @@
|
||||
# Settings for Xtensa XPG toolchain.
|
||||
# Settings for Xtensa toolchain for the hifimini kernels.
|
||||
# REQUIRED:
|
||||
# - RI2019.2 Toolkit (for xt-clang/xt-clang++).
|
||||
# - XTENSA_CORE: The name of the core to use, will cause a compiler exception
|
||||
# without providing a core.
|
||||
|
||||
ifeq ($(TARGET), xtensa-xpg)
|
||||
TARGET_ARCH := xtensa-xpg
|
||||
ifeq ($(TARGET), xtensa_hifimini)
|
||||
TARGET_ARCH := xtensa_hifimini
|
||||
|
||||
PLATFORM_ARGS = \
|
||||
-DTF_LITE_MCU_DEBUG_LOG \
|
||||
@ -29,7 +29,7 @@ ifeq ($(TARGET), xtensa-xpg)
|
||||
|
||||
LDFLAGS += -Wl,-gc-sections
|
||||
|
||||
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_xpg_binary.sh
|
||||
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_binary.sh
|
||||
|
||||
# TODO(b/156962140): This manually maintained list of excluded examples is
|
||||
# quite error prone.
|
@ -0,0 +1,45 @@
|
||||
# Settings for Xtensa toolchain for hifimini_staging kernels.
|
||||
# REQUIRED:
|
||||
# - RI2019.2 Toolkit (for xt-clang/xt-clang++).
|
||||
# - XTENSA_CORE: The name of the core to use, will cause a compiler exception
|
||||
# without providing a core.
|
||||
|
||||
ifeq ($(TARGET), xtensa_hifimini_staging)
|
||||
TARGET_ARCH := xtensa_hifimini_staging
|
||||
|
||||
PLATFORM_ARGS = \
|
||||
-DTF_LITE_MCU_DEBUG_LOG \
|
||||
--xtensa-core=$(XTENSA_CORE) \
|
||||
-mcoproc \
|
||||
-DXTENSA -DMAX_RFFT_PWR=9 -DMIN_RFFT_PWR=MAX_RFFT_PWR \
|
||||
-fdata-sections \
|
||||
-ffunction-sections \
|
||||
-fno-exceptions \
|
||||
-fno-unwind-tables \
|
||||
-fno-use-cxa-atexit \
|
||||
-fmessage-length=0 \
|
||||
-fno-threadsafe-statics
|
||||
|
||||
TARGET_TOOLCHAIN_PREFIX := xt-
|
||||
CXX_TOOL := clang++
|
||||
CC_TOOL := clang
|
||||
|
||||
CXXFLAGS += $(PLATFORM_ARGS)
|
||||
CCFLAGS += $(PLATFORM_ARGS)
|
||||
|
||||
LDFLAGS += -Wl,-gc-sections
|
||||
|
||||
TEST_SCRIPT := tensorflow/lite/micro/testing/test_xtensa_hifimini_staging_binary.sh
|
||||
|
||||
# TODO(b/156962140): This manually maintained list of excluded examples is
|
||||
# quite error prone.
|
||||
EXCLUDED_EXAMPLE_TESTS := \
|
||||
tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc \
|
||||
tensorflow/lite/micro/examples/magic_wand/Makefile.inc \
|
||||
tensorflow/lite/micro/examples/micro_speech/Makefile.inc \
|
||||
tensorflow/lite/micro/examples/network_tester/Makefile.inc \
|
||||
tensorflow/lite/micro/examples/person_detection/Makefile.inc \
|
||||
tensorflow/lite/micro/examples/person_detection_experimental/Makefile.inc
|
||||
MICRO_LITE_EXAMPLE_TESTS := $(filter-out $(EXCLUDED_EXAMPLE_TESTS), $(MICRO_LITE_EXAMPLE_TESTS))
|
||||
|
||||
endif
|
50
tensorflow/lite/micro/xtensa_hifimini_staging/debug_log.cc
Normal file
50
tensorflow/lite/micro/xtensa_hifimini_staging/debug_log.cc
Normal file
@ -0,0 +1,50 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Reference implementation of the DebugLog() function that's required for a
|
||||
// platform to support the TensorFlow Lite for Microcontrollers library. This is
|
||||
// the only function that's absolutely required to be available on a target
|
||||
// device, since it's used for communicating test results back to the host so
|
||||
// that we can verify the implementation is working correctly.
|
||||
// It's designed to be as easy as possible to supply an implementation though.
|
||||
// On platforms that have a POSIX stack or C library, it can be written as a
|
||||
// single call to `fprintf(stderr, "%s", s)` to output a string to the error
|
||||
// stream of the console, but if there's no OS or C library available, there's
|
||||
// almost always an equivalent way to write out a string to some serial
|
||||
// interface that can be used instead. For example on Arm M-series MCUs, calling
|
||||
// the `bkpt #0xAB` assembler instruction will output the string in r1 to
|
||||
// whatever debug serial connection is available. If you're running mbed, you
|
||||
// can do the same by creating `Serial pc(USBTX, USBRX)` and then calling
|
||||
// `pc.printf("%s", s)`.
|
||||
// To add an equivalent function for your own platform, create your own
|
||||
// implementation file, and place it in a subfolder with named after the OS
|
||||
// you're targeting. For example, see the Cortex M bare metal version in
|
||||
// tensorflow/lite/micro/bluepill/debug_log.cc or the mbed one on
|
||||
// tensorflow/lite/micro/mbed/debug_log.cc.
|
||||
|
||||
#include "tensorflow/lite/micro/debug_log.h"
|
||||
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
extern "C" void DebugLog(const char* s) {
|
||||
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
||||
// Reusing TF_LITE_STRIP_ERROR_STRINGS to disable DebugLog completely to get
|
||||
// maximum reduction in binary size. This is because we have DebugLog calls
|
||||
// via TF_LITE_CHECK that are not stubbed out by TF_LITE_REPORT_ERROR.
|
||||
fprintf(stderr, "%s", s);
|
||||
#endif
|
||||
}
|
28
tensorflow/lite/micro/xtensa_hifimini_staging/micro_time.cc
Normal file
28
tensorflow/lite/micro/xtensa_hifimini_staging/micro_time.cc
Normal file
@ -0,0 +1,28 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Xtensa implementation of micro_timer.
|
||||
// To include this with make, add TAGS=xtensa-xpg.
|
||||
#include "tensorflow/lite/micro/micro_time.h"
|
||||
|
||||
#include <time.h>
|
||||
|
||||
namespace tflite {
|
||||
|
||||
int32_t ticks_per_second() { return CLOCKS_PER_SEC; }
|
||||
|
||||
int32_t GetCurrentTimeTicks() { return clock(); }
|
||||
|
||||
} // namespace tflite
|
Loading…
Reference in New Issue
Block a user