Merge pull request #40943 from pnikam-cad:hifi4_nnlib_v2_2_0_update
PiperOrigin-RevId: 322616499 Change-Id: I6a08cb4f11abe33c38c91a72cc45c635d0f78797
This commit is contained in:
commit
bf3b14ffcb
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -41,8 +41,8 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/op_macros.h"
|
#include "tensorflow/lite/kernels/op_macros.h"
|
||||||
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
#include "tensorflow/lite/micro/micro_utils.h"
|
#include "tensorflow/lite/micro/micro_utils.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -109,6 +109,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
|
#if HIFI_VFPU
|
||||||
int err;
|
int err;
|
||||||
const float* inp_data_ptr;
|
const float* inp_data_ptr;
|
||||||
float* out_data_ptr;
|
float* out_data_ptr;
|
||||||
@ -119,11 +120,13 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
inp_data_ptr = GetTensorData<float>(input);
|
inp_data_ptr = GetTensorData<float>(input);
|
||||||
out_data_ptr = GetTensorData<float>(output);
|
out_data_ptr = GetTensorData<float>(output);
|
||||||
|
|
||||||
const float f32_pos_inf = 0x7F800000;
|
err = xa_nn_vec_relu_std_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
||||||
err = xa_nn_vec_relu_f32_f32(out_data_ptr, inp_data_ptr, f32_pos_inf,
|
|
||||||
flat_size);
|
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu1_f32_f32 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu_std_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
ReluFloat(GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
@ -140,14 +143,17 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const RuntimeShape& input_shape = GetTensorShape(input);
|
const RuntimeShape& input_shape = GetTensorShape(input);
|
||||||
const RuntimeShape& output_shape = GetTensorShape(output);
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||||
|
const uint8_t zero = input->params.zero_point;
|
||||||
|
|
||||||
inp_data_ptr = GetTensorData<uint8_t>(input);
|
inp_data_ptr = GetTensorData<uint8_t>(input);
|
||||||
out_data_ptr = GetTensorData<uint8_t>(output);
|
out_data_ptr = GetTensorData<uint8_t>(output);
|
||||||
|
|
||||||
err = xa_nn_vec_activation_min_max_asym8_asym8(
|
err = xa_nn_vec_activation_min_max_asym8_asym8(
|
||||||
out_data_ptr, inp_data_ptr, 0, 255, flat_size); // Is 255 right?
|
out_data_ptr, inp_data_ptr, zero, std::numeric_limits<uint8_t>::max(),
|
||||||
|
flat_size);
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_8_8 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
|
err, "xa_nn_vec_activation_min_max_asym8_asym8 failed");
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
@ -168,6 +174,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
|
#if HIFI_VFPU
|
||||||
int err;
|
int err;
|
||||||
const float* inp_data_ptr;
|
const float* inp_data_ptr;
|
||||||
float* out_data_ptr;
|
float* out_data_ptr;
|
||||||
@ -180,7 +187,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
err = xa_nn_vec_relu6_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
err = xa_nn_vec_relu6_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu1_f32_f32 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu6_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
Relu6Float(GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
@ -209,7 +220,8 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
err = xa_nn_vec_activation_min_max_asym8_asym8(out_data_ptr, inp_data_ptr,
|
err = xa_nn_vec_activation_min_max_asym8_asym8(out_data_ptr, inp_data_ptr,
|
||||||
zero, six, flat_size);
|
zero, six, flat_size);
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_8_8 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
|
err, "xa_nn_vec_activation_min_max_asym8_asym8 failed");
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
|
273
tensorflow/lite/micro/kernels/xtensa_hifi/add.cc
Normal file
273
tensorflow/lite/micro/kernels/xtensa_hifi/add.cc
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/add.h"
|
||||||
|
|
||||||
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||||
|
#include "tensorflow/lite/c/common.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
|
#include "tensorflow/lite/kernels/op_macros.h"
|
||||||
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace ops {
|
||||||
|
namespace micro {
|
||||||
|
namespace add {
|
||||||
|
|
||||||
|
constexpr int kInputTensor1 = 0;
|
||||||
|
constexpr int kInputTensor2 = 1;
|
||||||
|
constexpr int kOutputTensor = 0;
|
||||||
|
|
||||||
|
struct OpData {
|
||||||
|
bool requires_broadcast;
|
||||||
|
|
||||||
|
// These fields are used in both the general 8-bit -> 8bit quantized path,
|
||||||
|
// and the special 16-bit -> 16bit quantized path
|
||||||
|
int input1_shift;
|
||||||
|
int input2_shift;
|
||||||
|
int32 output_activation_min;
|
||||||
|
int32 output_activation_max;
|
||||||
|
|
||||||
|
// These fields are used only in the general 8-bit -> 8bit quantized path
|
||||||
|
int32 input1_multiplier;
|
||||||
|
int32 input2_multiplier;
|
||||||
|
int32 output_multiplier;
|
||||||
|
int output_shift;
|
||||||
|
int left_shift;
|
||||||
|
int32 input1_offset;
|
||||||
|
int32 input2_offset;
|
||||||
|
int32 output_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
|
||||||
|
const TfLiteTensor* input1,
|
||||||
|
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||||
|
OpData* data) {
|
||||||
|
data->requires_broadcast = !HaveSameShapes(input1, input2);
|
||||||
|
|
||||||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||||
|
// 8bit -> 8bit general quantized path, with general rescalings
|
||||||
|
data->input1_offset = -input1->params.zero_point;
|
||||||
|
data->input2_offset = -input2->params.zero_point;
|
||||||
|
data->output_offset = output->params.zero_point;
|
||||||
|
data->left_shift = 20;
|
||||||
|
const double twice_max_input_scale =
|
||||||
|
2 * static_cast<double>(
|
||||||
|
std::max(input1->params.scale, input2->params.scale));
|
||||||
|
const double real_input1_multiplier =
|
||||||
|
static_cast<double>(input1->params.scale) / twice_max_input_scale;
|
||||||
|
const double real_input2_multiplier =
|
||||||
|
static_cast<double>(input2->params.scale) / twice_max_input_scale;
|
||||||
|
const double real_output_multiplier =
|
||||||
|
twice_max_input_scale /
|
||||||
|
((1 << data->left_shift) * static_cast<double>(output->params.scale));
|
||||||
|
|
||||||
|
QuantizeMultiplierSmallerThanOneExp(
|
||||||
|
real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
|
||||||
|
|
||||||
|
QuantizeMultiplierSmallerThanOneExp(
|
||||||
|
real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
|
||||||
|
|
||||||
|
QuantizeMultiplierSmallerThanOneExp(
|
||||||
|
real_output_multiplier, &data->output_multiplier, &data->output_shift);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||||
|
context, params->activation, output, &data->output_activation_min,
|
||||||
|
&data->output_activation_max));
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
TfLiteAddParams* params, const OpData* data,
|
||||||
|
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||||
|
TfLiteTensor* output) {
|
||||||
|
float output_activation_min, output_activation_max;
|
||||||
|
CalculateActivationRange(params->activation, &output_activation_min,
|
||||||
|
&output_activation_max);
|
||||||
|
tflite::ArithmeticParams op_params;
|
||||||
|
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||||
|
#define TF_LITE_ADD(opname) \
|
||||||
|
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||||
|
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||||
|
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||||
|
GetTensorData<float>(output))
|
||||||
|
if (data->requires_broadcast) {
|
||||||
|
TF_LITE_ADD(BroadcastAdd4DSlow);
|
||||||
|
} else {
|
||||||
|
#if HIFI_VFPU
|
||||||
|
int err;
|
||||||
|
const RuntimeShape& input1_shape = GetTensorShape(input1);
|
||||||
|
const RuntimeShape& input2_shape = GetTensorShape(input2);
|
||||||
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
|
const int flat_size =
|
||||||
|
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||||
|
|
||||||
|
err = xa_nn_elm_add_f32xf32_f32(GetTensorData<float>(output),
|
||||||
|
GetTensorData<float>(input1),
|
||||||
|
GetTensorData<float>(input2), flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_add_f32xf32_f32 failed");
|
||||||
|
|
||||||
|
err = xa_nn_vec_activation_min_max_f32_f32(
|
||||||
|
GetTensorData<float>(output), GetTensorData<float>(output),
|
||||||
|
output_activation_min, output_activation_max, flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err,
|
||||||
|
"xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
TF_LITE_ADD(Add);
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
|
}
|
||||||
|
#undef TF_LITE_ADD
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
TfLiteAddParams* params, const OpData* data,
|
||||||
|
const TfLiteTensor* input1,
|
||||||
|
const TfLiteTensor* input2,
|
||||||
|
TfLiteTensor* output) {
|
||||||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||||
|
tflite::ArithmeticParams op_params;
|
||||||
|
op_params.left_shift = data->left_shift;
|
||||||
|
op_params.input1_offset = data->input1_offset;
|
||||||
|
op_params.input1_multiplier = data->input1_multiplier;
|
||||||
|
op_params.input1_shift = data->input1_shift;
|
||||||
|
op_params.input2_offset = data->input2_offset;
|
||||||
|
op_params.input2_multiplier = data->input2_multiplier;
|
||||||
|
op_params.input2_shift = data->input2_shift;
|
||||||
|
op_params.output_offset = data->output_offset;
|
||||||
|
op_params.output_multiplier = data->output_multiplier;
|
||||||
|
op_params.output_shift = data->output_shift;
|
||||||
|
SetActivationParams(data->output_activation_min,
|
||||||
|
data->output_activation_max, &op_params);
|
||||||
|
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||||
|
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||||
|
#define TF_LITE_ADD(type, opname, dtype) \
|
||||||
|
type::opname(op_params, GetTensorShape(input1), \
|
||||||
|
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||||
|
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||||
|
GetTensorData<dtype>(output));
|
||||||
|
if (output->type == kTfLiteInt8) {
|
||||||
|
if (need_broadcast) {
|
||||||
|
TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
|
||||||
|
} else {
|
||||||
|
TF_LITE_ADD(reference_integer_ops, Add, int8_t);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (need_broadcast) {
|
||||||
|
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
|
||||||
|
} else {
|
||||||
|
int err;
|
||||||
|
const RuntimeShape& input1_shape = GetTensorShape(input1);
|
||||||
|
const RuntimeShape& input2_shape = GetTensorShape(input2);
|
||||||
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
|
const int flat_size =
|
||||||
|
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||||
|
|
||||||
|
err = xa_nn_elm_add_asym8xasym8_asym8(
|
||||||
|
GetTensorData<uint8_t>(output), op_params.output_offset,
|
||||||
|
op_params.output_shift, op_params.output_multiplier,
|
||||||
|
op_params.quantized_activation_min,
|
||||||
|
op_params.quantized_activation_max, GetTensorData<uint8_t>(input1),
|
||||||
|
op_params.input1_offset, op_params.input1_shift,
|
||||||
|
op_params.input1_multiplier, GetTensorData<uint8_t>(input2),
|
||||||
|
op_params.input2_offset, op_params.input2_shift,
|
||||||
|
op_params.input2_multiplier, op_params.left_shift, flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_add_asym8xasym8_asym8 failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef TF_LITE_ADD
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||||
|
void* data = nullptr;
|
||||||
|
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||||
|
kTfLiteError) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
|
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||||
|
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
CalculateOpData(context, params, input1, input2, output, data));
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
|
||||||
|
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
const OpData* data = static_cast<const OpData*>(node->user_data);
|
||||||
|
|
||||||
|
const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
|
||||||
|
const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
if (output->type == kTfLiteFloat32) {
|
||||||
|
TF_LITE_ENSURE_OK(
|
||||||
|
context, EvalAdd(context, node, params, data, input1, input2, output));
|
||||||
|
} else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||||
|
TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
|
||||||
|
input1, input2, output));
|
||||||
|
} else {
|
||||||
|
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||||
|
TfLiteTypeGetName(output->type), output->type);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace add
|
||||||
|
|
||||||
|
TfLiteRegistration Register_ADD() {
|
||||||
|
return {/*init=*/add::Init,
|
||||||
|
/*free=*/nullptr,
|
||||||
|
/*prepare=*/add::Prepare,
|
||||||
|
/*invoke=*/add::Eval,
|
||||||
|
/*profiling_string=*/nullptr,
|
||||||
|
/*builtin_code=*/0,
|
||||||
|
/*custom_name=*/nullptr,
|
||||||
|
/*version=*/0};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace micro
|
||||||
|
} // namespace ops
|
||||||
|
} // namespace tflite
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -44,7 +44,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/padding.h"
|
#include "tensorflow/lite/kernels/padding.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -55,7 +55,6 @@ constexpr int kInputTensor = 0;
|
|||||||
constexpr int kFilterTensor = 1;
|
constexpr int kFilterTensor = 1;
|
||||||
constexpr int kBiasTensor = 2;
|
constexpr int kBiasTensor = 2;
|
||||||
constexpr int kOutputTensor = 0;
|
constexpr int kOutputTensor = 0;
|
||||||
constexpr int kMaxChannels = 256;
|
|
||||||
|
|
||||||
// Conv is quantized along dimension 0:
|
// Conv is quantized along dimension 0:
|
||||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||||
@ -71,9 +70,8 @@ struct OpData {
|
|||||||
int output_shift;
|
int output_shift;
|
||||||
|
|
||||||
// Per channel output multiplier and shift.
|
// Per channel output multiplier and shift.
|
||||||
// (b/141139247): Allocate these dynamically when possible.
|
int32_t* per_channel_output_multiplier;
|
||||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
int32_t* per_channel_output_shift;
|
||||||
int32_t per_channel_output_shift[kMaxChannels];
|
|
||||||
|
|
||||||
// The range of the fused activation layer. For example for kNone and
|
// The range of the fused activation layer. For example for kNone and
|
||||||
// uint8_t these would be 0 and 255.
|
// uint8_t these would be 0 and 255.
|
||||||
@ -94,10 +92,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteConvParams* params, int width, int height,
|
const TfLiteConvParams* params, int width,
|
||||||
int filter_width, int filter_height, int out_width,
|
int height, int filter_width, int filter_height,
|
||||||
int out_height, const TfLiteType data_type,
|
int out_width, int out_height,
|
||||||
OpData* data) {
|
const TfLiteType data_type, OpData* data) {
|
||||||
bool has_bias = node->inputs->size == 3;
|
bool has_bias = node->inputs->size == 3;
|
||||||
// Check number of inputs/outputs
|
// Check number of inputs/outputs
|
||||||
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
|
||||||
@ -131,8 +129,69 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||||
|
void* data = nullptr;
|
||||||
|
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||||
|
kTfLiteError) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
|
||||||
|
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
|
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||||
|
|
||||||
|
int input_width = input->dims->data[2];
|
||||||
|
int input_height = input->dims->data[1];
|
||||||
|
int filter_width = filter->dims->data[2];
|
||||||
|
int filter_height = filter->dims->data[1];
|
||||||
|
int output_width = output->dims->data[2];
|
||||||
|
int output_height = output->dims->data[1];
|
||||||
|
|
||||||
|
// Dynimically allocate per-channel quantization parameters.
|
||||||
|
const int num_channels = filter->dims->data[kConvQuantizedDimension];
|
||||||
|
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||||
|
context, num_channels * sizeof(int32_t),
|
||||||
|
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
|
||||||
|
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||||
|
context, num_channels * sizeof(int32_t),
|
||||||
|
reinterpret_cast<void**>(&data->per_channel_output_shift)));
|
||||||
|
|
||||||
|
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||||
|
if (input->type == kTfLiteInt8) {
|
||||||
|
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||||
|
kTfLiteAffineQuantization);
|
||||||
|
|
||||||
|
const auto* affine_quantization =
|
||||||
|
static_cast<TfLiteAffineQuantization*>(filter->quantization.params);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE(context,
|
||||||
|
affine_quantization->scale->size == 1 ||
|
||||||
|
affine_quantization->scale->size ==
|
||||||
|
filter->dims->data[kConvQuantizedDimension]);
|
||||||
|
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||||
|
affine_quantization->zero_point->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CalculateOpData(context, node, params, input_width, input_height,
|
||||||
|
filter_width, filter_height, output_width,
|
||||||
|
output_height, input->type, data);
|
||||||
|
} // namespace conv
|
||||||
|
|
||||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteConvParams* params, OpData* data,
|
TfLiteConvParams* params, const OpData& data,
|
||||||
const TfLiteTensor* input,
|
const TfLiteTensor* input,
|
||||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||||
TfLiteTensor* im2col, TfLiteTensor* hwcn_weights,
|
TfLiteTensor* im2col, TfLiteTensor* hwcn_weights,
|
||||||
@ -143,9 +202,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
if ((params->dilation_width_factor == 1) &&
|
if ((params->dilation_width_factor == 1) &&
|
||||||
(params->dilation_height_factor == 1)) {
|
(params->dilation_height_factor == 1)) {
|
||||||
const uint8 *input_data, *filter_data;
|
const uint8_t *input_data, *filter_data;
|
||||||
const int32_t* bias_data;
|
const int32_t* bias_data;
|
||||||
uint8* output_data;
|
uint8_t* output_data;
|
||||||
const RuntimeShape& input_shape = GetTensorShape(input);
|
const RuntimeShape& input_shape = GetTensorShape(input);
|
||||||
const RuntimeShape& filter_shape = GetTensorShape(filter);
|
const RuntimeShape& filter_shape = GetTensorShape(filter);
|
||||||
const RuntimeShape& output_shape = GetTensorShape(output);
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
@ -158,14 +217,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int dilation_width_factor = 1;
|
const int pad_width = data.padding.width;
|
||||||
const int dilation_height_factor = 1;
|
const int pad_height = data.padding.height;
|
||||||
const int pad_width = data->padding.width;
|
const int32 output_activation_min = data.output_activation_min;
|
||||||
const int pad_height = data->padding.height;
|
const int32 output_activation_max = data.output_activation_max;
|
||||||
const int32 output_activation_min = data->output_activation_min;
|
const int32 output_multiplier = data.output_multiplier;
|
||||||
const int32 output_activation_max = data->output_activation_max;
|
const int output_shift = -data.output_shift;
|
||||||
const int32 output_multiplier = data->output_multiplier;
|
|
||||||
const int output_shift = -data->output_shift;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||||
@ -186,13 +243,14 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
const int filter_depth = filter_shape.Dims(3);
|
const int filter_depth = filter_shape.Dims(3);
|
||||||
|
|
||||||
int err, output_data_format = 0;
|
int err, output_data_format = 0;
|
||||||
void* p_scratch;
|
uint8_t* p_scratch;
|
||||||
uint8 *p_filter, *p_out_scratch;
|
uint8_t* p_filter;
|
||||||
// Calculate filter_depth_padded as next near multiple of 4
|
// Calculate filter_depth_padded as next near multiple of 4
|
||||||
int filter_depth_padded = (filter_depth + 3) & (~3);
|
int filter_depth_padded = (filter_depth + 3) & (~3);
|
||||||
int out_length = output_height * output_width * output_depth;
|
int out_length = output_height * output_width * output_depth;
|
||||||
|
int filter_size_padded = filter_height * filter_width * filter_depth_padded;
|
||||||
int required_scratch, input_precision = PREC_ASYM8;
|
int required_scratch, input_precision = PREC_ASYM8;
|
||||||
int h, w, c;
|
int h, c;
|
||||||
|
|
||||||
required_scratch = xa_nn_conv2d_std_getsize(
|
required_scratch = xa_nn_conv2d_std_getsize(
|
||||||
input_height, input_depth, filter_height, filter_width, stride_height,
|
input_height, input_depth, filter_height, filter_width, stride_height,
|
||||||
@ -207,19 +265,11 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
||||||
p_scratch = xtensa_nnlib_scratch_buf;
|
p_scratch = xtensa_nnlib_scratch_buf;
|
||||||
|
|
||||||
p_filter = (uint8*)p_scratch;
|
p_filter = p_scratch;
|
||||||
p_out_scratch =
|
|
||||||
(p_filter +
|
|
||||||
ALIGNED_SIZE((sizeof(uint8_t) * filter_height * filter_width *
|
|
||||||
filter_depth_padded * output_depth),
|
|
||||||
8));
|
|
||||||
required_scratch +=
|
required_scratch +=
|
||||||
ALIGNED_SIZE((sizeof(uint8_t) * filter_height * filter_width *
|
ALIGNED_SIZE((sizeof(uint8_t) * filter_size_padded * output_depth), 8);
|
||||||
filter_depth_padded * output_depth),
|
p_scratch +=
|
||||||
8);
|
ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded * output_depth, 8);
|
||||||
p_scratch =
|
|
||||||
(uint8*)(p_out_scratch + ALIGNED_SIZE(sizeof(uint8_t) * out_length, 8));
|
|
||||||
required_scratch += ALIGNED_SIZE(sizeof(uint8_t) * out_length, 8);
|
|
||||||
|
|
||||||
if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
|
if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
|
||||||
TF_LITE_KERNEL_LOG(context,
|
TF_LITE_KERNEL_LOG(context,
|
||||||
@ -240,9 +290,8 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
for (int batch = 0; batch < batches; ++batch) {
|
||||||
uint8* p_out_temp;
|
uint8_t* p_out_temp;
|
||||||
p_out_temp = (uint8*)&p_out_scratch[0];
|
p_out_temp = &output_data[batch * out_length];
|
||||||
p_out_temp = (uint8*)ALIGN_PTR(p_out_temp, 8);
|
|
||||||
|
|
||||||
err = xa_nn_conv2d_std_asym8xasym8(
|
err = xa_nn_conv2d_std_asym8xasym8(
|
||||||
p_out_temp,
|
p_out_temp,
|
||||||
@ -252,24 +301,24 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
filter_width, output_depth, stride_width, stride_height, pad_width,
|
filter_width, output_depth, stride_width, stride_height, pad_width,
|
||||||
pad_height, output_height, output_width, input_offset, filter_offset,
|
pad_height, output_height, output_width, input_offset, filter_offset,
|
||||||
output_multiplier, output_shift, output_offset, output_data_format,
|
output_multiplier, output_shift, output_offset, output_data_format,
|
||||||
p_scratch);
|
static_cast<void*>(p_scratch));
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "conv2d_std_asym8: xa_nn_conv2d_std_asym8xasym8 failed");
|
err, "conv2d_std_asym8: xa_nn_conv2d_std_asym8xasym8 failed");
|
||||||
|
|
||||||
for (int i = 0; i < out_length; i++) {
|
err = xa_nn_vec_activation_min_max_asym8_asym8(
|
||||||
uint8* p_temp;
|
p_out_temp, p_out_temp, output_activation_min, output_activation_max,
|
||||||
p_temp = &output_data[batch * out_length];
|
out_length);
|
||||||
|
|
||||||
ACTIVATION_MIN_MAX_ASYM8(p_temp[i], p_out_temp[i],
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
output_activation_min, output_activation_max)
|
err, "xa_nn_vec_activation_min_max_asym8_asym8 failed");
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// TODO(b/154032858): Investigate removing extra copies.
|
||||||
ConvParams op_params;
|
ConvParams op_params;
|
||||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||||
op_params.padding_values.width = data->padding.width;
|
op_params.padding_values.width = data.padding.width;
|
||||||
op_params.padding_values.height = data->padding.height;
|
op_params.padding_values.height = data.padding.height;
|
||||||
op_params.stride_width = params->stride_width;
|
op_params.stride_width = params->stride_width;
|
||||||
op_params.stride_height = params->stride_height;
|
op_params.stride_height = params->stride_height;
|
||||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||||
@ -277,10 +326,10 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
op_params.input_offset = input_offset;
|
op_params.input_offset = input_offset;
|
||||||
op_params.weights_offset = filter_offset;
|
op_params.weights_offset = filter_offset;
|
||||||
op_params.output_offset = output_offset;
|
op_params.output_offset = output_offset;
|
||||||
op_params.output_multiplier = data->output_multiplier;
|
op_params.output_multiplier = data.output_multiplier;
|
||||||
op_params.output_shift = -data->output_shift;
|
op_params.output_shift = -data.output_shift;
|
||||||
op_params.quantized_activation_min = data->output_activation_min;
|
op_params.quantized_activation_min = data.output_activation_min;
|
||||||
op_params.quantized_activation_max = data->output_activation_max;
|
op_params.quantized_activation_max = data.output_activation_max;
|
||||||
reference_ops::Conv(op_params, GetTensorShape(input),
|
reference_ops::Conv(op_params, GetTensorShape(input),
|
||||||
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
GetTensorData<uint8_t>(input), GetTensorShape(filter),
|
||||||
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
GetTensorData<uint8_t>(filter), GetTensorShape(bias),
|
||||||
@ -292,11 +341,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteConvParams* params, OpData* data,
|
TfLiteConvParams* params, const OpData& data,
|
||||||
const TfLiteTensor* input,
|
const TfLiteTensor* input,
|
||||||
const TfLiteTensor* filter,
|
const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||||
TfLiteTensor* im2col) {
|
TfLiteTensor* im2col) {
|
||||||
|
// TODO(b/154032858): Investigate removing extra copies.
|
||||||
ConvParams op_params;
|
ConvParams op_params;
|
||||||
op_params.input_offset = -input->params.zero_point;
|
op_params.input_offset = -input->params.zero_point;
|
||||||
op_params.output_offset = output->params.zero_point;
|
op_params.output_offset = output->params.zero_point;
|
||||||
@ -304,14 +354,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
|||||||
op_params.stride_width = params->stride_width;
|
op_params.stride_width = params->stride_width;
|
||||||
op_params.dilation_height_factor = params->dilation_height_factor;
|
op_params.dilation_height_factor = params->dilation_height_factor;
|
||||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||||
op_params.padding_values.height = data->padding.height;
|
op_params.padding_values.height = data.padding.height;
|
||||||
op_params.padding_values.width = data->padding.width;
|
op_params.padding_values.width = data.padding.width;
|
||||||
op_params.quantized_activation_min = data->output_activation_min;
|
op_params.quantized_activation_min = data.output_activation_min;
|
||||||
op_params.quantized_activation_max = data->output_activation_max;
|
op_params.quantized_activation_max = data.output_activation_max;
|
||||||
|
|
||||||
reference_integer_ops::ConvPerChannel(
|
reference_integer_ops::ConvPerChannel(
|
||||||
op_params, data->per_channel_output_multiplier,
|
op_params, data.per_channel_output_multiplier,
|
||||||
data->per_channel_output_shift, GetTensorShape(input),
|
data.per_channel_output_shift, GetTensorShape(input),
|
||||||
GetTensorData<int8>(input), GetTensorShape(filter),
|
GetTensorData<int8>(input), GetTensorShape(filter),
|
||||||
GetTensorData<int8>(filter), GetTensorShape(bias),
|
GetTensorData<int8>(filter), GetTensorShape(bias),
|
||||||
GetTensorData<int32>(bias), GetTensorShape(output),
|
GetTensorData<int32>(bias), GetTensorShape(output),
|
||||||
@ -319,7 +369,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteConvParams* params, OpData* data,
|
TfLiteConvParams* params, const OpData& data,
|
||||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
const TfLiteTensor* bias, TfLiteTensor* im2col,
|
||||||
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
|
||||||
@ -327,6 +377,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
CalculateActivationRange(params->activation, &output_activation_min,
|
CalculateActivationRange(params->activation, &output_activation_min,
|
||||||
&output_activation_max);
|
&output_activation_max);
|
||||||
|
|
||||||
|
#if HIFI_VFPU
|
||||||
if ((params->dilation_width_factor == 1) &&
|
if ((params->dilation_width_factor == 1) &&
|
||||||
(params->dilation_height_factor == 1)) {
|
(params->dilation_height_factor == 1)) {
|
||||||
const float *input_data, *filter_data;
|
const float *input_data, *filter_data;
|
||||||
@ -344,10 +395,8 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int dilation_width_factor = 1;
|
const int pad_width = data.padding.width;
|
||||||
const int dilation_height_factor = 1;
|
const int pad_height = data.padding.height;
|
||||||
const int pad_width = data->padding.width;
|
|
||||||
const int pad_height = data->padding.height;
|
|
||||||
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
|
||||||
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
|
||||||
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
|
||||||
@ -366,13 +415,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
const int output_width = output_shape.Dims(2);
|
const int output_width = output_shape.Dims(2);
|
||||||
const int filter_depth = filter_shape.Dims(3);
|
const int filter_depth = filter_shape.Dims(3);
|
||||||
int err, output_data_format = 0;
|
int err, output_data_format = 0;
|
||||||
void* p_scratch;
|
uint8_t* p_scratch;
|
||||||
float *p_filter, *p_out_scratch;
|
float* p_filter;
|
||||||
// Calculate filter_depth_padded as next near multiple of 2
|
// Calculate filter_depth_padded as next near multiple of 2
|
||||||
int filter_depth_padded = (filter_depth + 1) & (~1);
|
int filter_depth_padded = (filter_depth + 1) & (~1);
|
||||||
int out_length = output_height * output_width * output_depth;
|
int out_length = output_height * output_width * output_depth;
|
||||||
|
int filter_size_padded = filter_height * filter_width * filter_depth_padded;
|
||||||
int required_scratch, input_precision = PREC_F32;
|
int required_scratch, input_precision = PREC_F32;
|
||||||
int h, w, c;
|
int h, c;
|
||||||
|
|
||||||
required_scratch = xa_nn_conv2d_std_getsize(
|
required_scratch = xa_nn_conv2d_std_getsize(
|
||||||
input_height, input_depth, filter_height, filter_width, stride_height,
|
input_height, input_depth, filter_height, filter_width, stride_height,
|
||||||
@ -387,19 +437,11 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
||||||
p_scratch = xtensa_nnlib_scratch_buf;
|
p_scratch = xtensa_nnlib_scratch_buf;
|
||||||
|
|
||||||
p_filter = (float*)p_scratch;
|
p_filter = reinterpret_cast<float*>(p_scratch);
|
||||||
p_out_scratch =
|
p_scratch +=
|
||||||
(float*)((uint8_t*)p_filter +
|
ALIGNED_SIZE((sizeof(float) * filter_size_padded * output_depth), 8);
|
||||||
ALIGNED_SIZE((sizeof(float) * filter_height * filter_width *
|
|
||||||
filter_depth_padded * output_depth),
|
|
||||||
8));
|
|
||||||
required_scratch +=
|
required_scratch +=
|
||||||
ALIGNED_SIZE((sizeof(float) * filter_height * filter_width *
|
ALIGNED_SIZE((sizeof(float) * filter_size_padded * output_depth), 8);
|
||||||
filter_depth_padded * output_depth),
|
|
||||||
8);
|
|
||||||
p_scratch = (float*)((uint8_t*)p_out_scratch +
|
|
||||||
ALIGNED_SIZE(sizeof(float) * out_length, 8));
|
|
||||||
required_scratch += ALIGNED_SIZE(sizeof(float) * out_length, 8);
|
|
||||||
|
|
||||||
if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
|
if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
|
||||||
TF_LITE_KERNEL_LOG(context,
|
TF_LITE_KERNEL_LOG(context,
|
||||||
@ -420,8 +462,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
for (int batch = 0; batch < batches; ++batch) {
|
for (int batch = 0; batch < batches; ++batch) {
|
||||||
float* p_out_temp;
|
float* p_out_temp;
|
||||||
p_out_temp = (float*)&p_out_scratch[0];
|
p_out_temp = &output_data[batch * out_length];
|
||||||
p_out_temp = (float*)ALIGN_PTR(p_out_temp, 8);
|
|
||||||
|
|
||||||
err = xa_nn_conv2d_std_f32(
|
err = xa_nn_conv2d_std_f32(
|
||||||
p_out_temp,
|
p_out_temp,
|
||||||
@ -429,23 +470,26 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
p_filter, bias_data, input_height, input_width, input_depth,
|
p_filter, bias_data, input_height, input_width, input_depth,
|
||||||
filter_height, filter_width, output_depth, stride_width,
|
filter_height, filter_width, output_depth, stride_width,
|
||||||
stride_height, pad_width, pad_height, output_height, output_width,
|
stride_height, pad_width, pad_height, output_height, output_width,
|
||||||
output_data_format, p_scratch);
|
output_data_format, static_cast<void*>(p_scratch));
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "conv2d_std_f32: xa_nn_conv2d_std_f32xf32 failed");
|
err, "conv2d_std_f32: xa_nn_conv2d_std_f32xf32 failed");
|
||||||
|
|
||||||
for (int i = 0; i < out_length; i++) {
|
err = xa_nn_vec_activation_min_max_f32_f32(
|
||||||
float* p_temp;
|
p_out_temp, p_out_temp, output_activation_min, output_activation_max,
|
||||||
p_temp = &output_data[batch * out_length];
|
out_length);
|
||||||
ACTIVATION_MIN_MAX(float, p_temp[i], p_out_temp[i],
|
|
||||||
output_activation_min, output_activation_max)
|
CHECK_ERR_HIFI_NNLIB_KER(err,
|
||||||
}
|
"xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
}
|
}
|
||||||
} else {
|
} else
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
|
{
|
||||||
|
// TODO(b/154032858): Investigate removing extra copies.
|
||||||
ConvParams op_params;
|
ConvParams op_params;
|
||||||
op_params.padding_type = RuntimePaddingType(params->padding);
|
op_params.padding_type = RuntimePaddingType(params->padding);
|
||||||
op_params.padding_values.width = data->padding.width;
|
op_params.padding_values.width = data.padding.width;
|
||||||
op_params.padding_values.height = data->padding.height;
|
op_params.padding_values.height = data.padding.height;
|
||||||
op_params.stride_width = params->stride_width;
|
op_params.stride_width = params->stride_width;
|
||||||
op_params.stride_height = params->stride_height;
|
op_params.stride_height = params->stride_height;
|
||||||
op_params.dilation_width_factor = params->dilation_width_factor;
|
op_params.dilation_width_factor = params->dilation_width_factor;
|
||||||
@ -471,50 +515,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||||
|
|
||||||
int input_width = input->dims->data[2];
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
int input_height = input->dims->data[1];
|
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||||
int filter_width = filter->dims->data[2];
|
|
||||||
int filter_height = filter->dims->data[1];
|
|
||||||
int output_width = output->dims->data[2];
|
|
||||||
int output_height = output->dims->data[1];
|
|
||||||
|
|
||||||
OpData data;
|
|
||||||
|
|
||||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
|
||||||
if (input->type == kTfLiteInt8) {
|
|
||||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
|
||||||
kTfLiteAffineQuantization);
|
|
||||||
|
|
||||||
const auto* affine_quantization =
|
|
||||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
filter->quantization.params);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
|
||||||
|
|
||||||
TF_LITE_ENSURE(context,
|
|
||||||
affine_quantization->scale->size == 1 ||
|
|
||||||
affine_quantization->scale->size ==
|
|
||||||
filter->dims->data[kConvQuantizedDimension]);
|
|
||||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
|
||||||
affine_quantization->zero_point->size);
|
|
||||||
}
|
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateOpData(
|
|
||||||
context, node, params, input_width, input_height, filter_width,
|
|
||||||
filter_height, output_width, output_height, input->type, &data));
|
|
||||||
|
|
||||||
switch (input->type) { // Already know in/out types are same.
|
switch (input->type) { // Already know in/out types are same.
|
||||||
case kTfLiteFloat32:
|
case kTfLiteFloat32:
|
||||||
EvalFloat(context, node, params, &data, input, filter, bias, nullptr,
|
EvalFloat(context, node, params, data, input, filter, bias, nullptr,
|
||||||
nullptr, output);
|
nullptr, output);
|
||||||
break;
|
break;
|
||||||
case kTfLiteInt8:
|
case kTfLiteInt8:
|
||||||
EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias,
|
EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
|
||||||
output, nullptr);
|
output, nullptr);
|
||||||
break;
|
break;
|
||||||
case kTfLiteUInt8:
|
case kTfLiteUInt8:
|
||||||
EvalQuantized(context, node, params, &data, input, filter, bias, nullptr,
|
EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
|
||||||
nullptr, output);
|
nullptr, output);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -528,9 +542,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
} // namespace conv
|
} // namespace conv
|
||||||
|
|
||||||
TfLiteRegistration Register_CONV_2D() {
|
TfLiteRegistration Register_CONV_2D() {
|
||||||
return {/*init=*/nullptr,
|
return {/*init=*/conv::Init,
|
||||||
/*free=*/nullptr,
|
/*free=*/nullptr,
|
||||||
/*prepare=*/nullptr,
|
/*prepare=*/conv::Prepare,
|
||||||
/*invoke=*/conv::Eval,
|
/*invoke=*/conv::Eval,
|
||||||
/*profiling_string=*/nullptr,
|
/*profiling_string=*/nullptr,
|
||||||
/*builtin_code=*/0,
|
/*builtin_code=*/0,
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -45,7 +45,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/padding.h"
|
#include "tensorflow/lite/kernels/padding.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -57,8 +57,6 @@ constexpr int kInputTensor = 0;
|
|||||||
constexpr int kFilterTensor = 1;
|
constexpr int kFilterTensor = 1;
|
||||||
constexpr int kBiasTensor = 2;
|
constexpr int kBiasTensor = 2;
|
||||||
constexpr int kOutputTensor = 0;
|
constexpr int kOutputTensor = 0;
|
||||||
// Per channel quantization is not needed for any model on xtensa.
|
|
||||||
constexpr int kMaxChannels = 256;
|
|
||||||
|
|
||||||
// Depthwise conv is quantized along dimension 3:
|
// Depthwise conv is quantized along dimension 3:
|
||||||
// https://www.tensorflow.org/lite/performance/quantization_spec
|
// https://www.tensorflow.org/lite/performance/quantization_spec
|
||||||
@ -72,10 +70,8 @@ struct OpData {
|
|||||||
int output_shift;
|
int output_shift;
|
||||||
|
|
||||||
// Per channel output multiplier and shift.
|
// Per channel output multiplier and shift.
|
||||||
// (b/141139247): Allocate these dynamically when possible.
|
int32_t* per_channel_output_multiplier;
|
||||||
int32_t per_channel_output_multiplier[kMaxChannels];
|
int32_t* per_channel_output_shift;
|
||||||
int32_t per_channel_output_shift[kMaxChannels];
|
|
||||||
|
|
||||||
// The range of the fused activation layer. For example for kNone and
|
// The range of the fused activation layer. For example for kNone and
|
||||||
// uint8_t these would be 0 and 255.
|
// uint8_t these would be 0 and 255.
|
||||||
int32_t output_activation_min;
|
int32_t output_activation_min;
|
||||||
@ -107,26 +103,88 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
|||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
|
return tflite::PopulateConvolutionQuantizationParams(
|
||||||
context, input, filter, bias, output, params->activation,
|
context, input, filter, bias, output, params->activation,
|
||||||
&data->output_multiplier, &data->output_shift,
|
&data->output_multiplier, &data->output_shift,
|
||||||
&data->output_activation_min, &data->output_activation_max,
|
&data->output_activation_min, &data->output_activation_max,
|
||||||
data->per_channel_output_multiplier,
|
data->per_channel_output_multiplier,
|
||||||
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels));
|
reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
|
||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||||
|
void* data = nullptr;
|
||||||
|
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||||
|
kTfLiteError) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
|
auto* params =
|
||||||
|
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
|
||||||
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
|
const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
|
||||||
|
|
||||||
|
const TfLiteType data_type = input->type;
|
||||||
|
int width = SizeOfDimension(input, 2);
|
||||||
|
int height = SizeOfDimension(input, 1);
|
||||||
|
int filter_width = SizeOfDimension(filter, 2);
|
||||||
|
int filter_height = SizeOfDimension(filter, 1);
|
||||||
|
|
||||||
|
// Per channel quantization is only needed for int8 inference. For other
|
||||||
|
// quantized types, only a single scale and zero point is needed.
|
||||||
|
const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
|
||||||
|
// Dynimically allocate per-channel quantization parameters.
|
||||||
|
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||||
|
context, num_channels * sizeof(int32_t),
|
||||||
|
reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
|
||||||
|
TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
|
||||||
|
context, num_channels * sizeof(int32_t),
|
||||||
|
reinterpret_cast<void**>(&data->per_channel_output_shift)));
|
||||||
|
|
||||||
|
// All per-channel quantized tensors need valid zero point and scale arrays.
|
||||||
|
if (input->type == kTfLiteInt8) {
|
||||||
|
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
||||||
|
kTfLiteAffineQuantization);
|
||||||
|
|
||||||
|
const auto* affine_quantization =
|
||||||
|
reinterpret_cast<TfLiteAffineQuantization*>(
|
||||||
|
filter->quantization.params);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization->scale);
|
||||||
|
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
||||||
|
TF_LITE_ENSURE(
|
||||||
|
context, affine_quantization->scale->size == 1 ||
|
||||||
|
affine_quantization->scale->size ==
|
||||||
|
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
||||||
|
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
||||||
|
affine_quantization->zero_point->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return CalculateOpData(context, node, params, width, height, filter_width,
|
||||||
|
filter_height, data_type, data);
|
||||||
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
TfLiteDepthwiseConvParams* params, const OpData* data,
|
||||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||||
float output_activation_min, output_activation_max;
|
float output_activation_min, output_activation_max;
|
||||||
CalculateActivationRange(params->activation, &output_activation_min,
|
CalculateActivationRange(params->activation, &output_activation_min,
|
||||||
&output_activation_max);
|
&output_activation_max);
|
||||||
|
|
||||||
|
#if HIFI_VFPU
|
||||||
if ((params->dilation_width_factor == 1) &&
|
if ((params->dilation_width_factor == 1) &&
|
||||||
(params->dilation_height_factor == 1)) {
|
(params->dilation_height_factor == 1)) {
|
||||||
const float *input_data, *filter_data, *bias_data;
|
const float *input_data, *filter_data, *bias_data;
|
||||||
@ -143,10 +201,6 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int dilation_width_factor = 1;
|
|
||||||
const int dilation_height_factor = 1;
|
|
||||||
// const int dilation_width_factor = params->dilation_width_factor;;
|
|
||||||
// const int dilation_height_factor = params->dilation_height_factor;
|
|
||||||
const int pad_width = data->padding.width;
|
const int pad_width = data->padding.width;
|
||||||
const int pad_height = data->padding.height;
|
const int pad_height = data->padding.height;
|
||||||
const int depth_multiplier = params->depth_multiplier;
|
const int depth_multiplier = params->depth_multiplier;
|
||||||
@ -168,7 +222,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||||
|
|
||||||
int32_t err, input_data_format = 0, output_data_format = 0;
|
int32_t err, input_data_format = 0, output_data_format = 0;
|
||||||
void* p_scratch;
|
uint8_t* p_scratch;
|
||||||
float* p_filter;
|
float* p_filter;
|
||||||
int filter_depth_padded, filter_size_padded, required_scratch;
|
int filter_depth_padded, filter_size_padded, required_scratch;
|
||||||
int input_precision = PREC_F32;
|
int input_precision = PREC_F32;
|
||||||
@ -198,9 +252,8 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
|
|
||||||
p_filter = (float*)p_scratch;
|
p_filter = reinterpret_cast<float*>(p_scratch);
|
||||||
p_scratch = (void*)((uint8_t*)p_filter +
|
p_scratch += ALIGNED_SIZE(sizeof(float) * filter_size_padded, 8);
|
||||||
ALIGNED_SIZE(sizeof(float) * filter_size_padded, 8));
|
|
||||||
|
|
||||||
for (h = 0; h < filter_height * filter_width; h++) {
|
for (h = 0; h < filter_height * filter_width; h++) {
|
||||||
for (c = 0; c < filter_depth; c++) {
|
for (c = 0; c < filter_depth; c++) {
|
||||||
@ -220,37 +273,22 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
input_height, input_width, input_depth, filter_height, filter_width,
|
input_height, input_width, input_depth, filter_height, filter_width,
|
||||||
depth_multiplier, stride_width, stride_height, pad_width, pad_height,
|
depth_multiplier, stride_width, stride_height, pad_width, pad_height,
|
||||||
output_height, output_width, input_data_format, output_data_format,
|
output_height, output_width, input_data_format, output_data_format,
|
||||||
p_scratch);
|
static_cast<void*>(p_scratch));
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "DepthwiseConvFloat: xa_nn_conv2d_depthwise_f32 failed");
|
err, "DepthwiseConvFloat: xa_nn_conv2d_depthwise_f32 failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
// pre loop for activation_min_max to handle alignment
|
|
||||||
int out_length = batches * output_height * output_width * output_depth;
|
int out_length = batches * output_height * output_width * output_depth;
|
||||||
uint32 p_unalign_val = (uint32)output_data, p_align_val;
|
err = xa_nn_vec_activation_min_max_f32_f32(
|
||||||
p_align_val = (p_unalign_val + 7) & (~7);
|
output_data, output_data, output_activation_min, output_activation_max,
|
||||||
|
out_length);
|
||||||
|
|
||||||
int pre_loop_count = p_align_val - p_unalign_val;
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
pre_loop_count = MIN(pre_loop_count, out_length);
|
err, "DepthwiseConvFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
|
} else
|
||||||
for (i = 0; i < pre_loop_count; i++) {
|
#endif /* HIFI_VFPU */
|
||||||
ACTIVATION_MIN_MAX(float, output_data[i], output_data[i],
|
{
|
||||||
output_activation_min, output_activation_max)
|
|
||||||
}
|
|
||||||
|
|
||||||
out_length = out_length - pre_loop_count;
|
|
||||||
|
|
||||||
if (out_length) {
|
|
||||||
err = xa_nn_vec_activation_min_max_f32_f32(
|
|
||||||
&output_data[i], &output_data[i], output_activation_min,
|
|
||||||
output_activation_max, out_length);
|
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
|
||||||
err,
|
|
||||||
"DepthwiseConvFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tflite::DepthwiseParams op_params;
|
tflite::DepthwiseParams op_params;
|
||||||
// Padding type is ignored, but still set.
|
// Padding type is ignored, but still set.
|
||||||
op_params.padding_type = PaddingType::kSame;
|
op_params.padding_type = PaddingType::kSame;
|
||||||
@ -274,8 +312,8 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
TfLiteDepthwiseConvParams* params,
|
||||||
const TfLiteTensor* input,
|
const OpData* data, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* filter,
|
const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||||
DepthwiseParams op_params;
|
DepthwiseParams op_params;
|
||||||
@ -290,7 +328,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
|||||||
op_params.input_offset = -input->params.zero_point;
|
op_params.input_offset = -input->params.zero_point;
|
||||||
op_params.weights_offset = 0;
|
op_params.weights_offset = 0;
|
||||||
op_params.output_offset = output->params.zero_point;
|
op_params.output_offset = output->params.zero_point;
|
||||||
// (b/130439627): Use calculated value for clamping.
|
// TODO(b/130439627): Use calculated value for clamping.
|
||||||
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
|
||||||
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
|
||||||
|
|
||||||
@ -304,8 +342,8 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteDepthwiseConvParams* params, OpData* data,
|
TfLiteDepthwiseConvParams* params,
|
||||||
const TfLiteTensor* input,
|
const OpData* data, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||||
TfLiteTensor* output) {
|
TfLiteTensor* output) {
|
||||||
const int32_t input_offset = -input->params.zero_point;
|
const int32_t input_offset = -input->params.zero_point;
|
||||||
@ -314,9 +352,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
if ((params->dilation_width_factor == 1) &&
|
if ((params->dilation_width_factor == 1) &&
|
||||||
(params->dilation_height_factor == 1)) {
|
(params->dilation_height_factor == 1)) {
|
||||||
const uint8 *input_data, *filter_data;
|
const uint8_t *input_data, *filter_data;
|
||||||
const int32_t* bias_data;
|
const int32_t* bias_data;
|
||||||
uint8* output_data;
|
uint8_t* output_data;
|
||||||
const RuntimeShape& input_shape = GetTensorShape(input);
|
const RuntimeShape& input_shape = GetTensorShape(input);
|
||||||
const RuntimeShape& filter_shape = GetTensorShape(filter);
|
const RuntimeShape& filter_shape = GetTensorShape(filter);
|
||||||
const RuntimeShape& output_shape = GetTensorShape(output);
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
@ -329,10 +367,6 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
|
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int dilation_width_factor = 1;
|
|
||||||
const int dilation_height_factor = 1;
|
|
||||||
// const int dilation_width_factor = params->dilation_width_factor;
|
|
||||||
// const int dilation_height_factor = params->dilation_height_factor;
|
|
||||||
const int pad_width = data->padding.width;
|
const int pad_width = data->padding.width;
|
||||||
const int pad_height = data->padding.height;
|
const int pad_height = data->padding.height;
|
||||||
const int depth_multiplier = params->depth_multiplier;
|
const int depth_multiplier = params->depth_multiplier;
|
||||||
@ -360,11 +394,11 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
|
||||||
|
|
||||||
int32_t err, i, input_data_format = 0, output_data_format = 0;
|
int32_t err, i, input_data_format = 0, output_data_format = 0;
|
||||||
void* p_scratch;
|
uint8_t* p_scratch;
|
||||||
uint8* p_filter;
|
uint8_t* p_filter;
|
||||||
int filter_depth_padded, filter_size_padded, required_scratch;
|
int filter_depth_padded, filter_size_padded, required_scratch;
|
||||||
int input_precision = PREC_ASYM8;
|
int input_precision = PREC_ASYM8;
|
||||||
int h, c;
|
int h;
|
||||||
|
|
||||||
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
|
||||||
p_scratch = xtensa_nnlib_scratch_buf;
|
p_scratch = xtensa_nnlib_scratch_buf;
|
||||||
@ -390,18 +424,15 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
|
|
||||||
p_filter = (uint8*)p_scratch;
|
p_filter = p_scratch;
|
||||||
p_scratch = (void*)(p_filter +
|
p_scratch += ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded, 8);
|
||||||
ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded, 8));
|
int pad_value = filter_depth_padded - filter_depth;
|
||||||
|
|
||||||
for (h = 0; h < filter_height * filter_width; h++) {
|
for (h = 0; h < filter_height * filter_width; h++) {
|
||||||
for (c = 0; c < filter_depth; c++) {
|
memcpy(&p_filter[h * filter_depth_padded], &filter_data[h * filter_depth],
|
||||||
p_filter[h * filter_depth_padded + c] =
|
filter_depth);
|
||||||
filter_data[h * filter_depth + c];
|
memset(&p_filter[h * filter_depth_padded + filter_depth], -filter_offset,
|
||||||
}
|
pad_value);
|
||||||
for (c = filter_depth; c < filter_depth_padded; c++) {
|
|
||||||
p_filter[h * filter_depth_padded + c] = -filter_offset;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < batches; i++) {
|
for (i = 0; i < batches; i++) {
|
||||||
@ -413,37 +444,22 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
depth_multiplier, stride_width, stride_height, pad_width, pad_height,
|
depth_multiplier, stride_width, stride_height, pad_width, pad_height,
|
||||||
output_height, output_width, input_offset, filter_offset,
|
output_height, output_width, input_offset, filter_offset,
|
||||||
output_multiplier, output_shift, output_offset, input_data_format,
|
output_multiplier, output_shift, output_offset, input_data_format,
|
||||||
output_data_format, p_scratch);
|
output_data_format, static_cast<void*>(p_scratch));
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "DepthwiseConvAsym8: xa_nn_conv2d_depthwise_asym8xasym8 failed");
|
err, "DepthwiseConvAsym8: xa_nn_conv2d_depthwise_asym8xasym8 failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
// pre loop for activation_min_max to handle alignment
|
|
||||||
int out_length = batches * output_height * output_width * output_depth;
|
int out_length = batches * output_height * output_width * output_depth;
|
||||||
uint32 p_unalign_val = (uint32)output_data, p_align_val;
|
err = xa_nn_vec_activation_min_max_asym8_asym8(
|
||||||
p_align_val = (p_unalign_val + 7) & (~7);
|
output_data, output_data, output_activation_min, output_activation_max,
|
||||||
|
out_length);
|
||||||
|
|
||||||
int pre_loop_count = p_align_val - p_unalign_val;
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
pre_loop_count = MIN(pre_loop_count, out_length);
|
err,
|
||||||
|
"DepthwiseConvAsym8: xa_nn_vec_activation_min_max_asym8_asym8 "
|
||||||
|
"failed");
|
||||||
|
|
||||||
for (i = 0; i < pre_loop_count; i++) {
|
|
||||||
ACTIVATION_MIN_MAX_ASYM8(output_data[i], output_data[i],
|
|
||||||
output_activation_min, output_activation_max)
|
|
||||||
}
|
|
||||||
|
|
||||||
out_length = out_length - pre_loop_count;
|
|
||||||
|
|
||||||
if (out_length > 0) {
|
|
||||||
err = xa_nn_vec_activation_min_max_asym8_asym8(
|
|
||||||
&output_data[i], &output_data[i], output_activation_min,
|
|
||||||
output_activation_max, out_length);
|
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
|
||||||
err,
|
|
||||||
"DepthwiseConvAsym8: xa_nn_vec_activation_min_max_asym8_asym8 "
|
|
||||||
"failed");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
tflite::DepthwiseParams op_params;
|
tflite::DepthwiseParams op_params;
|
||||||
// Padding type is ignored, but still set.
|
// Padding type is ignored, but still set.
|
||||||
@ -474,8 +490,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
auto* params =
|
auto* params =
|
||||||
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
|
||||||
|
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||||
|
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
@ -483,38 +503,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const TfLiteTensor* bias =
|
const TfLiteTensor* bias =
|
||||||
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
(NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
|
||||||
|
|
||||||
const TfLiteType data_type = input->type;
|
// TODO(aselle): Consider whether float conv and quantized conv should be
|
||||||
int width = SizeOfDimension(input, 2);
|
|
||||||
int height = SizeOfDimension(input, 1);
|
|
||||||
int filter_width = SizeOfDimension(filter, 2);
|
|
||||||
int filter_height = SizeOfDimension(filter, 1);
|
|
||||||
|
|
||||||
OpData data;
|
|
||||||
|
|
||||||
// All per-channel quantized tensors need valid zero point and scale arrays.
|
|
||||||
if (input->type == kTfLiteInt8) {
|
|
||||||
TF_LITE_ENSURE_EQ(context, filter->quantization.type,
|
|
||||||
kTfLiteAffineQuantization);
|
|
||||||
|
|
||||||
const auto* affine_quantization =
|
|
||||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
filter->quantization.params);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization->scale);
|
|
||||||
TF_LITE_ENSURE(context, affine_quantization->zero_point);
|
|
||||||
TF_LITE_ENSURE(
|
|
||||||
context, affine_quantization->scale->size == 1 ||
|
|
||||||
affine_quantization->scale->size ==
|
|
||||||
filter->dims->data[kDepthwiseConvQuantizedDimension]);
|
|
||||||
TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
|
|
||||||
affine_quantization->zero_point->size);
|
|
||||||
}
|
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
|
|
||||||
filter_width, filter_height, data_type,
|
|
||||||
&data));
|
|
||||||
|
|
||||||
// (aselle): Consider whether float conv and quantized conv should be
|
|
||||||
// separate ops to avoid dispatch overhead here.
|
// separate ops to avoid dispatch overhead here.
|
||||||
switch (input->type) { // Already know in/out types are same.
|
switch (input->type) { // Already know in/out types are same.
|
||||||
case kTfLiteFloat32:
|
case kTfLiteFloat32:
|
||||||
@ -538,9 +527,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
} // namespace depthwise_conv
|
} // namespace depthwise_conv
|
||||||
|
|
||||||
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
|
||||||
return {/*init=*/nullptr,
|
return {/*init=*/depthwise_conv::Init,
|
||||||
/*free=*/nullptr,
|
/*free=*/nullptr,
|
||||||
/*prepare=*/nullptr,
|
/*prepare=*/depthwise_conv::Prepare,
|
||||||
/*invoke=*/depthwise_conv::Eval,
|
/*invoke=*/depthwise_conv::Eval,
|
||||||
/*profiling_string=*/nullptr,
|
/*profiling_string=*/nullptr,
|
||||||
/*builtin_code=*/0,
|
/*builtin_code=*/0,
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -39,7 +39,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/c/common.h"
|
#include "tensorflow/lite/c/common.h"
|
||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -53,6 +53,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
#if HIFI_VFPU
|
||||||
int err;
|
int err;
|
||||||
const float* inp_data_ptr;
|
const float* inp_data_ptr;
|
||||||
float* out_data_ptr;
|
float* out_data_ptr;
|
||||||
@ -66,6 +67,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
err = xa_nn_elm_floor_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
err = xa_nn_elm_floor_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_floor_f32_f32 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_floor_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
reference_ops::Floor(GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
} // namespace floor
|
} // namespace floor
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -43,7 +43,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
#include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
|
||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -70,7 +70,7 @@ constexpr int kBiasTensor = 2;
|
|||||||
constexpr int kOutputTensor = 0;
|
constexpr int kOutputTensor = 0;
|
||||||
|
|
||||||
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
TfLiteStatus CalculateOpData(TfLiteContext* context,
|
||||||
TfLiteFullyConnectedParams* params,
|
TfLiteFusedActivation activation,
|
||||||
TfLiteType data_type, const TfLiteTensor* input,
|
TfLiteType data_type, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* filter,
|
const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output,
|
const TfLiteTensor* bias, TfLiteTensor* output,
|
||||||
@ -84,7 +84,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
|||||||
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
|
||||||
data->output_shift = -exponent;
|
data->output_shift = -exponent;
|
||||||
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||||
context, params->activation, output, &data->output_activation_min,
|
context, activation, output, &data->output_activation_min,
|
||||||
&data->output_activation_max));
|
&data->output_activation_max));
|
||||||
}
|
}
|
||||||
return status;
|
return status;
|
||||||
@ -92,20 +92,50 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||||
|
void* data = nullptr;
|
||||||
|
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||||
|
kTfLiteError) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
const auto params =
|
||||||
|
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||||
|
|
||||||
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
|
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||||
|
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||||
|
TF_LITE_ENSURE_MSG(context, input->type == filter->type,
|
||||||
|
"Hybrid models are not supported on TFLite Micro.");
|
||||||
|
|
||||||
|
return CalculateOpData(context, params->activation, input->type, input,
|
||||||
|
filter, bias, output, data);
|
||||||
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteFullyConnectedParams* params, OpData* data,
|
const OpData& data, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* input,
|
|
||||||
const TfLiteTensor* filter,
|
const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||||
FullyConnectedParams op_params;
|
tflite::FullyConnectedParams op_params;
|
||||||
op_params.input_offset = -input->params.zero_point;
|
op_params.input_offset = -input->params.zero_point;
|
||||||
op_params.weights_offset = -filter->params.zero_point;
|
op_params.weights_offset = -filter->params.zero_point;
|
||||||
op_params.output_offset = output->params.zero_point;
|
op_params.output_offset = output->params.zero_point;
|
||||||
op_params.output_multiplier = data->output_multiplier;
|
op_params.output_multiplier = data.output_multiplier;
|
||||||
// (b/138810107): Figure out whether output shift should be inverted
|
// TODO(b/138810107): Figure out whether output shift should be inverted
|
||||||
op_params.output_shift = -data->output_shift;
|
op_params.output_shift = -data.output_shift;
|
||||||
op_params.quantized_activation_min = data->output_activation_min;
|
op_params.quantized_activation_min = data.output_activation_min;
|
||||||
op_params.quantized_activation_max = data->output_activation_max;
|
op_params.quantized_activation_max = data.output_activation_max;
|
||||||
|
|
||||||
reference_integer_ops::FullyConnected(
|
reference_integer_ops::FullyConnected(
|
||||||
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
|
||||||
@ -116,8 +146,7 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteFullyConnectedParams* params, OpData* data,
|
const OpData& data, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* input,
|
|
||||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||||
TfLiteTensor* output) {
|
TfLiteTensor* output) {
|
||||||
const int32_t input_offset = -input->params.zero_point;
|
const int32_t input_offset = -input->params.zero_point;
|
||||||
@ -128,11 +157,11 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
op_params.input_offset = input_offset;
|
op_params.input_offset = input_offset;
|
||||||
op_params.weights_offset = filter_offset;
|
op_params.weights_offset = filter_offset;
|
||||||
op_params.output_offset = output_offset;
|
op_params.output_offset = output_offset;
|
||||||
op_params.output_multiplier = data->output_multiplier;
|
op_params.output_multiplier = data.output_multiplier;
|
||||||
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
|
||||||
op_params.output_shift = -data->output_shift;
|
op_params.output_shift = -data.output_shift;
|
||||||
op_params.quantized_activation_min = data->output_activation_min;
|
op_params.quantized_activation_min = data.output_activation_min;
|
||||||
op_params.quantized_activation_max = data->output_activation_max;
|
op_params.quantized_activation_max = data.output_activation_max;
|
||||||
|
|
||||||
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
#define TF_LITE_FULLY_CONNECTED(output_data_type) \
|
||||||
reference_ops::FullyConnected( \
|
reference_ops::FullyConnected( \
|
||||||
@ -162,11 +191,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
ret, "xa_nn_fully_connected_asym8xasym8_asym8 failed");
|
ret, "xa_nn_fully_connected_asym8xasym8_asym8 failed");
|
||||||
}
|
}
|
||||||
for (int i = 0; i < batches * out_depth; i++) {
|
ret = xa_nn_vec_activation_min_max_asym8_asym8(
|
||||||
ACTIVATION_MIN_MAX_ASYM8(p_out[i], p_out[i],
|
p_out, p_out, data.output_activation_min, data.output_activation_max,
|
||||||
data->output_activation_min,
|
batches * out_depth);
|
||||||
data->output_activation_max)
|
|
||||||
}
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
|
ret, "xa_nn_vec_activation_min_max_asym8_asym8 failed");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kTfLiteInt16:
|
case kTfLiteInt16:
|
||||||
@ -182,15 +212,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteFullyConnectedParams* params, OpData* data,
|
TfLiteFusedActivation activation,
|
||||||
const TfLiteTensor* input, const TfLiteTensor* filter,
|
const TfLiteTensor* input, const TfLiteTensor* filter,
|
||||||
const TfLiteTensor* bias, TfLiteTensor* output) {
|
const TfLiteTensor* bias, TfLiteTensor* output) {
|
||||||
float output_activation_min, output_activation_max;
|
float output_activation_min, output_activation_max;
|
||||||
CalculateActivationRange(params->activation, &output_activation_min,
|
CalculateActivationRange(activation, &output_activation_min,
|
||||||
&output_activation_max);
|
&output_activation_max);
|
||||||
tflite::FullyConnectedParams op_params;
|
tflite::FullyConnectedParams op_params;
|
||||||
op_params.float_activation_min = output_activation_min;
|
op_params.float_activation_min = output_activation_min;
|
||||||
op_params.float_activation_max = output_activation_max;
|
op_params.float_activation_max = output_activation_max;
|
||||||
|
#if HIFI_VFPU
|
||||||
int ret, b, weight_depth, out_depth, batches;
|
int ret, b, weight_depth, out_depth, batches;
|
||||||
weight_depth =
|
weight_depth =
|
||||||
GetTensorShape(filter).Dims(GetTensorShape(filter).DimensionsCount() - 1);
|
GetTensorShape(filter).Dims(GetTensorShape(filter).DimensionsCount() - 1);
|
||||||
@ -208,43 +239,48 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
CHECK_ERR_HIFI_NNLIB_KER(ret, "xa_nn_fully_connected_f32 failed.");
|
CHECK_ERR_HIFI_NNLIB_KER(ret, "xa_nn_fully_connected_f32 failed.");
|
||||||
}
|
}
|
||||||
float* p_out = GetTensorData<float>(output);
|
float* p_out = GetTensorData<float>(output);
|
||||||
for (int i = 0; i < batches * out_depth; i++) {
|
ret = xa_nn_vec_activation_min_max_f32_f32(
|
||||||
ACTIVATION_MIN_MAX(float, p_out[i], p_out[i], output_activation_min,
|
p_out, p_out, output_activation_min, output_activation_max,
|
||||||
output_activation_max)
|
batches * out_depth);
|
||||||
}
|
CHECK_ERR_HIFI_NNLIB_KER(ret, "xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
tflite::reference_ops::FullyConnected(
|
||||||
|
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(filter), GetTensorData<float>(filter),
|
||||||
|
GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
|
||||||
|
GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
auto* params =
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
|
const auto* params =
|
||||||
|
static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
|
||||||
|
|
||||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
|
||||||
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
TfLiteType data_type = input->type;
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
OpData local_data_object;
|
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||||
OpData* data = &local_data_object;
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input,
|
|
||||||
filter, bias, output, data));
|
|
||||||
|
|
||||||
switch (filter->type) { // Already know in/out types are same.
|
// Checks in Prepare ensure input, output and filter types are all the same.
|
||||||
|
switch (input->type) {
|
||||||
case kTfLiteFloat32:
|
case kTfLiteFloat32:
|
||||||
return EvalFloat(context, node, params, data, input, filter, bias,
|
return EvalFloat(context, node, params->activation, input, filter, bias,
|
||||||
output);
|
output);
|
||||||
case kTfLiteInt8:
|
case kTfLiteInt8:
|
||||||
return EvalQuantizedInt8(context, node, params, data, input, filter, bias,
|
return EvalQuantizedInt8(context, node, data, input, filter, bias,
|
||||||
output);
|
output);
|
||||||
|
|
||||||
case kTfLiteUInt8:
|
case kTfLiteUInt8:
|
||||||
return EvalQuantized(context, node, params, data, input, filter, bias,
|
return EvalQuantized(context, node, data, input, filter, bias, output);
|
||||||
output);
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||||
TfLiteTypeGetName(filter->type), filter->type);
|
TfLiteTypeGetName(input->type), input->type);
|
||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
@ -253,9 +289,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
} // namespace fully_connected
|
} // namespace fully_connected
|
||||||
|
|
||||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||||
return {/*init=*/nullptr,
|
return {/*init=*/fully_connected::Init,
|
||||||
/*free=*/nullptr,
|
/*free=*/nullptr,
|
||||||
/*prepare=*/nullptr,
|
/*prepare=*/fully_connected::Prepare,
|
||||||
/*invoke=*/fully_connected::Eval,
|
/*invoke=*/fully_connected::Eval,
|
||||||
/*profiling_string=*/nullptr,
|
/*profiling_string=*/nullptr,
|
||||||
/*builtin_code=*/0,
|
/*builtin_code=*/0,
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -34,32 +34,68 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
|
||||||
|
|
||||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||||
#include "tensorflow/lite/c/common.h"
|
#include "tensorflow/lite/c/common.h"
|
||||||
#include "tensorflow/lite/kernels/internal/common.h"
|
#include "tensorflow/lite/kernels/internal/common.h"
|
||||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
|
||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/op_macros.h"
|
#include "tensorflow/lite/kernels/op_macros.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
namespace micro {
|
namespace micro {
|
||||||
namespace activations {
|
namespace activations {
|
||||||
|
namespace {
|
||||||
constexpr int kInputTensor = 0;
|
constexpr int kInputTensor = 0;
|
||||||
constexpr int kOutputTensor = 0;
|
constexpr int kOutputTensor = 0;
|
||||||
|
|
||||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
struct OpData {
|
||||||
|
int32_t input_zero_point;
|
||||||
|
int32_t input_range_radius;
|
||||||
|
int32_t input_multiplier;
|
||||||
|
int input_left_shift;
|
||||||
|
};
|
||||||
|
|
||||||
|
TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
OpData* data) {
|
||||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_EQ(context, input->type, output->type);
|
||||||
|
if (input->type == kTfLiteInt8) {
|
||||||
|
TF_LITE_ENSURE_EQ(context, output->params.zero_point,
|
||||||
|
std::numeric_limits<int8_t>::min());
|
||||||
|
|
||||||
|
static constexpr int kInputIntegerBits = 4;
|
||||||
|
const double input_real_multiplier =
|
||||||
|
static_cast<double>(input->params.scale) *
|
||||||
|
static_cast<double>(1 << (31 - kInputIntegerBits));
|
||||||
|
|
||||||
|
const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
|
||||||
|
data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
|
||||||
|
|
||||||
|
data->input_range_radius =
|
||||||
|
CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
|
||||||
|
}
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
OpData data;
|
||||||
|
CalculateArithmeticOpData(context, node, &data);
|
||||||
|
|
||||||
if (input->type == kTfLiteFloat32) {
|
if (input->type == kTfLiteFloat32) {
|
||||||
switch (output->type) {
|
switch (output->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
|
#if HIFI_VFPU
|
||||||
int err;
|
int err;
|
||||||
const float* inp_data_ptr;
|
const float* inp_data_ptr;
|
||||||
float* out_data_ptr;
|
float* out_data_ptr;
|
||||||
@ -73,6 +109,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
err = xa_nn_vec_sigmoid_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
err = xa_nn_vec_sigmoid_f32_f32(out_data_ptr, inp_data_ptr, flat_size);
|
||||||
|
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_sigmoid_f32_f32 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_sigmoid_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
reference_ops::Logistic(
|
||||||
|
GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -84,11 +125,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
} else if (input->type == kTfLiteInt8) {
|
} else if (input->type == kTfLiteInt8) {
|
||||||
switch (output->type) {
|
switch (output->type) {
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
reference_ops::Logistic(
|
reference_integer_ops::Logistic(
|
||||||
GetTensorShape(input), GetTensorData<int8_t>(input),
|
input->params.zero_point, data.input_range_radius,
|
||||||
input->params.scale, input->params.zero_point,
|
data.input_multiplier, data.input_left_shift,
|
||||||
GetTensorShape(output), GetTensorData<int8_t>(output),
|
NumElements(input->dims), GetTensorData<int8_t>(input),
|
||||||
output->params.scale, output->params.zero_point);
|
GetTensorData<int8_t>(output));
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -98,7 +139,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
return kTfLiteError;
|
return kTfLiteError;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// (b/141211002): Also support other data types once we have supported
|
// TODO(b/141211002): Also support other data types once we have supported
|
||||||
// temporary tensors in TFLM.
|
// temporary tensors in TFLM.
|
||||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||||
TfLiteTypeGetName(input->type),
|
TfLiteTypeGetName(input->type),
|
||||||
@ -114,7 +155,7 @@ TfLiteRegistration Register_LOGISTIC() {
|
|||||||
return {/*init=*/nullptr,
|
return {/*init=*/nullptr,
|
||||||
/*free=*/nullptr,
|
/*free=*/nullptr,
|
||||||
/*prepare=*/nullptr,
|
/*prepare=*/nullptr,
|
||||||
/*invoke=*/activations::Eval,
|
/*invoke=*/activations::LogisticEval,
|
||||||
/*profiling_string=*/nullptr,
|
/*profiling_string=*/nullptr,
|
||||||
/*builtin_code=*/0,
|
/*builtin_code=*/0,
|
||||||
/*custom_name=*/nullptr,
|
/*custom_name=*/nullptr,
|
||||||
|
229
tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc
Normal file
229
tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/mul.h"
|
||||||
|
|
||||||
|
#include "tensorflow/lite/c/common.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
|
||||||
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace ops {
|
||||||
|
namespace micro {
|
||||||
|
namespace mul {
|
||||||
|
|
||||||
|
constexpr int kInput1Tensor = 0;
|
||||||
|
constexpr int kInput2Tensor = 1;
|
||||||
|
constexpr int kOutputTensor = 0;
|
||||||
|
|
||||||
|
struct OpData {
|
||||||
|
int32_t output_activation_min;
|
||||||
|
int32_t output_activation_max;
|
||||||
|
|
||||||
|
int32_t output_multiplier;
|
||||||
|
int output_shift;
|
||||||
|
};
|
||||||
|
|
||||||
|
TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
TfLiteMulParams* params, OpData* data) {
|
||||||
|
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||||
|
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
|
||||||
|
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
|
||||||
|
|
||||||
|
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||||
|
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
|
||||||
|
context, params->activation, output, &data->output_activation_min,
|
||||||
|
&data->output_activation_max));
|
||||||
|
|
||||||
|
double real_multiplier = static_cast<double>(input1->params.scale) *
|
||||||
|
static_cast<double>(input2->params.scale) /
|
||||||
|
static_cast<double>(output->params.scale);
|
||||||
|
QuantizeMultiplier(real_multiplier, &data->output_multiplier,
|
||||||
|
&data->output_shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||||
|
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
if (output->dims->size == 0) {
|
||||||
|
return AllocateOutputDimensionsFromInput(context, input1, input2, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
TfLiteMulParams* params, OpData* data,
|
||||||
|
const TfLiteTensor* input1,
|
||||||
|
const TfLiteTensor* input2, TfLiteTensor* output) {
|
||||||
|
if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
|
||||||
|
tflite::ArithmeticParams op_params;
|
||||||
|
SetActivationParams(data->output_activation_min,
|
||||||
|
data->output_activation_max, &op_params);
|
||||||
|
op_params.input1_offset = -input1->params.zero_point;
|
||||||
|
op_params.input2_offset = -input2->params.zero_point;
|
||||||
|
op_params.output_offset = output->params.zero_point;
|
||||||
|
op_params.output_multiplier = data->output_multiplier;
|
||||||
|
op_params.output_shift = data->output_shift;
|
||||||
|
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||||
|
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||||
|
|
||||||
|
#define TF_LITE_MUL(type, opname, dtype) \
|
||||||
|
type::opname(op_params, GetTensorShape(input1), \
|
||||||
|
GetTensorData<dtype>(input1), GetTensorShape(input2), \
|
||||||
|
GetTensorData<dtype>(input2), GetTensorShape(output), \
|
||||||
|
GetTensorData<dtype>(output));
|
||||||
|
|
||||||
|
if (output->type == kTfLiteInt8) {
|
||||||
|
if (need_broadcast) {
|
||||||
|
TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
|
||||||
|
} else {
|
||||||
|
TF_LITE_MUL(reference_integer_ops, Mul, int8_t);
|
||||||
|
}
|
||||||
|
} else if (output->type == kTfLiteUInt8) {
|
||||||
|
if (need_broadcast) {
|
||||||
|
TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t);
|
||||||
|
} else {
|
||||||
|
int err;
|
||||||
|
const RuntimeShape& input1_shape = GetTensorShape(input1);
|
||||||
|
const RuntimeShape& input2_shape = GetTensorShape(input2);
|
||||||
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
|
const int flat_size =
|
||||||
|
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||||
|
|
||||||
|
err = xa_nn_elm_mul_asym8xasym8_asym8(
|
||||||
|
GetTensorData<uint8_t>(output), op_params.output_offset,
|
||||||
|
op_params.output_shift, op_params.output_multiplier,
|
||||||
|
op_params.quantized_activation_min,
|
||||||
|
op_params.quantized_activation_max, GetTensorData<uint8_t>(input1),
|
||||||
|
op_params.input1_offset, GetTensorData<uint8_t>(input2),
|
||||||
|
op_params.input2_offset, flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_mul_asym8xasym8_asym8 failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef TF_LITE_MUL
|
||||||
|
}
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
|
||||||
|
TfLiteMulParams* params, OpData* data,
|
||||||
|
const TfLiteTensor* input1, const TfLiteTensor* input2,
|
||||||
|
TfLiteTensor* output) {
|
||||||
|
float output_activation_min, output_activation_max;
|
||||||
|
CalculateActivationRange(params->activation, &output_activation_min,
|
||||||
|
&output_activation_max);
|
||||||
|
tflite::ArithmeticParams op_params;
|
||||||
|
SetActivationParams(output_activation_min, output_activation_max, &op_params);
|
||||||
|
|
||||||
|
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
|
||||||
|
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||||
|
#define TF_LITE_MUL(opname) \
|
||||||
|
reference_ops::opname(op_params, GetTensorShape(input1), \
|
||||||
|
GetTensorData<float>(input1), GetTensorShape(input2), \
|
||||||
|
GetTensorData<float>(input2), GetTensorShape(output), \
|
||||||
|
GetTensorData<float>(output));
|
||||||
|
|
||||||
|
if (need_broadcast) {
|
||||||
|
TF_LITE_MUL(BroadcastMul4DSlow);
|
||||||
|
} else {
|
||||||
|
#if HIFI_VFPU
|
||||||
|
int err;
|
||||||
|
const RuntimeShape& input1_shape = GetTensorShape(input1);
|
||||||
|
const RuntimeShape& input2_shape = GetTensorShape(input2);
|
||||||
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
|
const int flat_size =
|
||||||
|
MatchingElementsSize(input1_shape, input2_shape, output_shape);
|
||||||
|
|
||||||
|
err = xa_nn_elm_mul_f32xf32_f32(GetTensorData<float>(output),
|
||||||
|
GetTensorData<float>(input1),
|
||||||
|
GetTensorData<float>(input2), flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_mul_f32xf32_f32 failed");
|
||||||
|
|
||||||
|
err = xa_nn_vec_activation_min_max_f32_f32(
|
||||||
|
GetTensorData<float>(output), GetTensorData<float>(output),
|
||||||
|
output_activation_min, output_activation_max, flat_size);
|
||||||
|
|
||||||
|
CHECK_ERR_HIFI_NNLIB_KER(err,
|
||||||
|
"xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
|
#else
|
||||||
|
TF_LITE_MUL(Mul);
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
|
}
|
||||||
|
#undef TF_LITE_MUL
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||||
|
auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
|
||||||
|
OpData data;
|
||||||
|
|
||||||
|
const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
|
||||||
|
const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
|
||||||
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, &data));
|
||||||
|
|
||||||
|
switch (input1->type) {
|
||||||
|
case kTfLiteUInt8:
|
||||||
|
case kTfLiteInt8:
|
||||||
|
TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, &data,
|
||||||
|
input1, input2, output));
|
||||||
|
break;
|
||||||
|
case kTfLiteFloat32:
|
||||||
|
TF_LITE_ENSURE_OK(context, EvalFloat(context, node, params, &data, input1,
|
||||||
|
input2, output));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
|
||||||
|
TfLiteTypeGetName(input1->type), input1->type);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
} // namespace mul
|
||||||
|
|
||||||
|
TfLiteRegistration Register_MUL() {
|
||||||
|
return {/*init=*/nullptr,
|
||||||
|
/*free=*/nullptr,
|
||||||
|
/*prepare=*/nullptr,
|
||||||
|
/*invoke=*/mul::Eval,
|
||||||
|
/*profiling_string=*/nullptr,
|
||||||
|
/*builtin_code=*/0,
|
||||||
|
/*custom_name=*/nullptr,
|
||||||
|
/*version=*/0};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace micro
|
||||||
|
} // namespace ops
|
||||||
|
} // namespace tflite
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -40,7 +40,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/padding.h"
|
#include "tensorflow/lite/kernels/padding.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -83,6 +83,7 @@ TfLiteStatus AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node,
|
|||||||
CalculateActivationRange(params->activation, &activation_min,
|
CalculateActivationRange(params->activation, &activation_min,
|
||||||
&activation_max);
|
&activation_max);
|
||||||
|
|
||||||
|
#if HIFI_VFPU
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int pad_width = data->padding.width;
|
const int pad_width = data->padding.width;
|
||||||
@ -168,6 +169,20 @@ TfLiteStatus AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node,
|
|||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "AveragepoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
err, "AveragepoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
PoolParams op_params;
|
||||||
|
op_params.stride_height = params->stride_height;
|
||||||
|
op_params.stride_width = params->stride_width;
|
||||||
|
op_params.filter_height = params->filter_height;
|
||||||
|
op_params.filter_width = params->filter_width;
|
||||||
|
op_params.padding_values.height = data->padding.height;
|
||||||
|
op_params.padding_values.width = data->padding.width;
|
||||||
|
op_params.float_activation_min = activation_min;
|
||||||
|
op_params.float_activation_max = activation_max;
|
||||||
|
reference_ops::AveragePool(
|
||||||
|
op_params, GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,7 +192,6 @@ TfLiteStatus AverageEvalQuantized(TfLiteContext* context,
|
|||||||
const OpData* data, const TfLiteTensor* input,
|
const OpData* data, const TfLiteTensor* input,
|
||||||
TfLiteTensor* output) {
|
TfLiteTensor* output) {
|
||||||
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
|
||||||
|
|
||||||
int32_t activation_min, activation_max;
|
int32_t activation_min, activation_max;
|
||||||
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
(void)CalculateActivationRangeQuantized(context, params->activation, output,
|
||||||
&activation_min, &activation_max);
|
&activation_min, &activation_max);
|
||||||
@ -295,6 +309,7 @@ TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
CalculateActivationRange(params->activation, &activation_min,
|
CalculateActivationRange(params->activation, &activation_min,
|
||||||
&activation_max);
|
&activation_max);
|
||||||
|
|
||||||
|
#if HIFI_VFPU
|
||||||
const int stride_height = params->stride_height;
|
const int stride_height = params->stride_height;
|
||||||
const int stride_width = params->stride_width;
|
const int stride_width = params->stride_width;
|
||||||
const int pad_width = data->padding.width;
|
const int pad_width = data->padding.width;
|
||||||
@ -378,6 +393,20 @@ TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
|
|||||||
CHECK_ERR_HIFI_NNLIB_KER(
|
CHECK_ERR_HIFI_NNLIB_KER(
|
||||||
err, "MaxpoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
err, "MaxpoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed");
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
tflite::PoolParams op_params;
|
||||||
|
op_params.stride_height = params->stride_height;
|
||||||
|
op_params.stride_width = params->stride_width;
|
||||||
|
op_params.filter_height = params->filter_height;
|
||||||
|
op_params.filter_width = params->filter_width;
|
||||||
|
op_params.padding_values.height = data->padding.height;
|
||||||
|
op_params.padding_values.width = data->padding.width;
|
||||||
|
op_params.float_activation_min = activation_min;
|
||||||
|
op_params.float_activation_max = activation_max;
|
||||||
|
reference_ops::MaxPool(op_params, GetTensorShape(input),
|
||||||
|
GetTensorData<float>(input), GetTensorShape(output),
|
||||||
|
GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -491,7 +520,6 @@ TfLiteStatus MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
|||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
@ -504,7 +532,7 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data));
|
||||||
|
|
||||||
// Inputs and outputs share the same type, guarenteed by the converter.
|
// Inputs and outputs share the same type, guaranteed by the converter.
|
||||||
switch (input->type) {
|
switch (input->type) {
|
||||||
case kTfLiteFloat32:
|
case kTfLiteFloat32:
|
||||||
AverageEvalFloat(context, node, params, &data, input, output);
|
AverageEvalFloat(context, node, params, &data, input, output);
|
||||||
|
@ -1,24 +1,24 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
* "Software"), to use this Software with Cadence processor cores only and
|
* "Software"), to use this Software with Cadence processor cores only and
|
||||||
* not with any other processors and platforms, subject to
|
* not with any other processors and platforms, subject to
|
||||||
* the following conditions:
|
* the following conditions:
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be included
|
* The above copyright notice and this permission notice shall be included
|
||||||
* in all copies or substantial portions of the Software.
|
* in all copies or substantial portions of the Software.
|
||||||
*
|
*
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
|
******************************************************************************/
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -43,7 +43,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/op_macros.h"
|
#include "tensorflow/lite/kernels/op_macros.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
namespace micro {
|
namespace micro {
|
||||||
@ -105,6 +105,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
// Takes a tensor and performs softmax along the last dimension.
|
// Takes a tensor and performs softmax along the last dimension.
|
||||||
TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
|
TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
|
||||||
TfLiteTensor* output, const SoftmaxParams& op_data) {
|
TfLiteTensor* output, const SoftmaxParams& op_data) {
|
||||||
|
#if HIFI_VFPU
|
||||||
const RuntimeShape& input_shape = GetTensorShape(input);
|
const RuntimeShape& input_shape = GetTensorShape(input);
|
||||||
const float* input_data = GetTensorData<float>(input);
|
const float* input_data = GetTensorData<float>(input);
|
||||||
const RuntimeShape& output_shape = GetTensorShape(output);
|
const RuntimeShape& output_shape = GetTensorShape(output);
|
||||||
@ -133,6 +134,11 @@ TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
|
|||||||
xa_nn_vec_softmax_f32_f32(&output_data[i * depth], p_scratch, depth);
|
xa_nn_vec_softmax_f32_f32(&output_data[i * depth], p_scratch, depth);
|
||||||
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_f32_f32 failed");
|
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_f32_f32 failed");
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
tflite::reference_ops::Softmax(
|
||||||
|
op_data, GetTensorShape(input), GetTensorData<float>(input),
|
||||||
|
GetTensorShape(output), GetTensorData<float>(output));
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/******************************************************************************
|
/*******************************************************************************
|
||||||
* Copyright (C) 2019 Cadence Design Systems, Inc.
|
* Copyright (c) 2019-2020 Cadence Design Systems, Inc.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
* a copy of this software and associated documentation files (the
|
* a copy of this software and associated documentation files (the
|
||||||
@ -18,7 +18,6 @@
|
|||||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
|
||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -44,8 +43,8 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||||
#include "tensorflow/lite/kernels/op_macros.h"
|
#include "tensorflow/lite/kernels/op_macros.h"
|
||||||
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
#include "tensorflow/lite/micro/kernels/activation_utils.h"
|
||||||
|
#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h"
|
||||||
#include "tensorflow/lite/micro/micro_utils.h"
|
#include "tensorflow/lite/micro/micro_utils.h"
|
||||||
#include "xtensa_tf_micro_common.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace ops {
|
namespace ops {
|
||||||
@ -53,10 +52,6 @@ namespace micro {
|
|||||||
namespace svdf {
|
namespace svdf {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// These constants represent constants specific to the hotword "OK G" model.
|
|
||||||
// They exist until (b/132070898) is fixed.
|
|
||||||
constexpr int kScratchTensorMaxSize = 64;
|
|
||||||
|
|
||||||
struct OpData {
|
struct OpData {
|
||||||
int32 effective_scale_1_a;
|
int32 effective_scale_1_a;
|
||||||
int32 effective_scale_2_a;
|
int32 effective_scale_2_a;
|
||||||
@ -64,6 +59,8 @@ struct OpData {
|
|||||||
// shift value - typically between [-32, 32].
|
// shift value - typically between [-32, 32].
|
||||||
int effective_scale_1_b;
|
int effective_scale_1_b;
|
||||||
int effective_scale_2_b;
|
int effective_scale_2_b;
|
||||||
|
int scratch_tensor_index;
|
||||||
|
int scratch_output_tensor_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -84,6 +81,7 @@ static inline TfLiteStatus ApplyTimeWeightsBiasAndActivation(
|
|||||||
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
|
float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
|
||||||
float* const __restrict__ output_ptr) {
|
float* const __restrict__ output_ptr) {
|
||||||
// Compute matmul(activation_state, weights_time).
|
// Compute matmul(activation_state, weights_time).
|
||||||
|
#if HIFI_VFPU
|
||||||
float* scratch_bias = scratch_ptr;
|
float* scratch_bias = scratch_ptr;
|
||||||
if (bias_ptr) {
|
if (bias_ptr) {
|
||||||
const float* bias_data = bias_ptr;
|
const float* bias_data = bias_ptr;
|
||||||
@ -111,6 +109,51 @@ static inline TfLiteStatus ApplyTimeWeightsBiasAndActivation(
|
|||||||
weights_time_vec += memory_size * rank;
|
weights_time_vec += memory_size * rank;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
for (int b = 0; b < batch_size; ++b) {
|
||||||
|
// Perform batched vector dot product:
|
||||||
|
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||||
|
const float* vector1_ptr = weights_time_ptr;
|
||||||
|
const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
|
||||||
|
for (int i = 0; i < num_filters; ++i) {
|
||||||
|
*scratch_ptr_batch = 0.f;
|
||||||
|
for (int j = 0; j < memory_size; ++j) {
|
||||||
|
*scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
|
||||||
|
}
|
||||||
|
scratch_ptr_batch++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize output with bias if provided.
|
||||||
|
if (bias_ptr) {
|
||||||
|
// VectorBatchVectorAssign
|
||||||
|
for (int i = 0; i < batch_size; ++i) {
|
||||||
|
float* output_data = output_ptr + i * num_units;
|
||||||
|
const float* bias_data = bias_ptr;
|
||||||
|
for (int j = 0; j < num_units; ++j) {
|
||||||
|
*output_data++ = *bias_data++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
float* output_data = output_ptr;
|
||||||
|
for (int i = 0; i < batch_size * num_units; ++i) {
|
||||||
|
*output_data++ = 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reduction sum.
|
||||||
|
for (int b = 0; b < batch_size; ++b) {
|
||||||
|
float* output_ptr_batch = output_ptr + b * num_units;
|
||||||
|
float* scratch_ptr_batch = scratch_ptr + b * num_filters;
|
||||||
|
|
||||||
|
// Reduction sum vector
|
||||||
|
for (int i = 0; i < num_units; ++i) {
|
||||||
|
for (int j = 0; j < rank; j++) {
|
||||||
|
output_ptr_batch[i] += *scratch_ptr_batch++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
|
|
||||||
// Apply activation.
|
// Apply activation.
|
||||||
for (int b = 0; b < batch_size; ++b) {
|
for (int b = 0; b < batch_size; ++b) {
|
||||||
@ -127,7 +170,8 @@ inline TfLiteStatus EvalFloatSVDF(
|
|||||||
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
|
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input,
|
||||||
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
|
const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time,
|
||||||
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
|
const TfLiteTensor* bias, const TfLiteSVDFParams* params,
|
||||||
TfLiteTensor* activation_state, TfLiteTensor* output) {
|
int scratch_tensor_index, TfLiteTensor* activation_state,
|
||||||
|
TfLiteTensor* output) {
|
||||||
const int rank = params->rank;
|
const int rank = params->rank;
|
||||||
const int batch_size = input->dims->data[0];
|
const int batch_size = input->dims->data[0];
|
||||||
const int input_size = input->dims->data[1];
|
const int input_size = input->dims->data[1];
|
||||||
@ -142,10 +186,11 @@ inline TfLiteStatus EvalFloatSVDF(
|
|||||||
|
|
||||||
float* state_ptr = GetTensorData<float>(activation_state);
|
float* state_ptr = GetTensorData<float>(activation_state);
|
||||||
|
|
||||||
// TODO(b/132070898): Move this temp variable to the new scratch buffer API
|
TFLITE_DCHECK(context != nullptr);
|
||||||
// when ready.
|
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||||
float scratch_tensor[kScratchTensorMaxSize];
|
|
||||||
float* scratch_ptr = scratch_tensor;
|
float* scratch_ptr = static_cast<float*>(
|
||||||
|
context->GetScratchBuffer(context, scratch_tensor_index));
|
||||||
|
|
||||||
float* output_ptr = GetTensorData<float>(output);
|
float* output_ptr = GetTensorData<float>(output);
|
||||||
|
|
||||||
@ -174,6 +219,7 @@ inline TfLiteStatus EvalFloatSVDF(
|
|||||||
float* result = &state_ptr[memory_size - 1];
|
float* result = &state_ptr[memory_size - 1];
|
||||||
float* result_in_batch = result;
|
float* result_in_batch = result;
|
||||||
|
|
||||||
|
#if HIFI_VFPU
|
||||||
float* out_scratch = scratch_ptr;
|
float* out_scratch = scratch_ptr;
|
||||||
float* bias_scratch = output_ptr;
|
float* bias_scratch = output_ptr;
|
||||||
for (int i = 0; i < num_units; i++) bias_scratch[i] = 0.0f;
|
for (int i = 0; i < num_units; i++) bias_scratch[i] = 0.0f;
|
||||||
@ -195,6 +241,20 @@ inline TfLiteStatus EvalFloatSVDF(
|
|||||||
result_in_batch += memory_size;
|
result_in_batch += memory_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
for (int i = 0; i < batch_size; ++i) {
|
||||||
|
const float* matrix_ptr = matrix;
|
||||||
|
for (int j = 0; j < num_filters; ++j) {
|
||||||
|
float dot_prod = 0.0f;
|
||||||
|
const float* vector_in_batch = vector + i * input_size;
|
||||||
|
for (int k = 0; k < input_size; ++k) {
|
||||||
|
dot_prod += *matrix_ptr++ * *vector_in_batch++;
|
||||||
|
}
|
||||||
|
*result_in_batch = dot_prod;
|
||||||
|
result_in_batch += memory_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* HIFI_VFPU */
|
||||||
}
|
}
|
||||||
|
|
||||||
return ApplyTimeWeightsBiasAndActivation(
|
return ApplyTimeWeightsBiasAndActivation(
|
||||||
@ -203,13 +263,15 @@ inline TfLiteStatus EvalFloatSVDF(
|
|||||||
output_ptr);
|
output_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EvalIntegerSVDF(
|
void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
|
||||||
TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input_tensor,
|
const TfLiteTensor* input_tensor,
|
||||||
const TfLiteTensor* weights_feature_tensor,
|
const TfLiteTensor* weights_feature_tensor,
|
||||||
const TfLiteTensor* weights_time_tensor, const TfLiteTensor* bias_tensor,
|
const TfLiteTensor* weights_time_tensor,
|
||||||
const TfLiteSVDFParams* params, TfLiteTensor* activation_state_tensor,
|
const TfLiteTensor* bias_tensor,
|
||||||
TfLiteTensor* output_tensor, int32_t scale_1_a, int scale_1_b,
|
const TfLiteSVDFParams* params,
|
||||||
int32_t scale_2_a, int scale_2_b, int32_t input_zp, int32_t output_zp) {
|
TfLiteTensor* activation_state_tensor,
|
||||||
|
TfLiteTensor* output_tensor, const OpData& data,
|
||||||
|
int32_t input_zp, int32_t output_zp) {
|
||||||
const int n_rank = params->rank;
|
const int n_rank = params->rank;
|
||||||
const int n_batch = input_tensor->dims->data[0];
|
const int n_batch = input_tensor->dims->data[0];
|
||||||
const int n_input = input_tensor->dims->data[1];
|
const int n_input = input_tensor->dims->data[1];
|
||||||
@ -217,10 +279,13 @@ void EvalIntegerSVDF(
|
|||||||
const int n_unit = n_filter / n_rank;
|
const int n_unit = n_filter / n_rank;
|
||||||
const int n_memory = weights_time_tensor->dims->data[1];
|
const int n_memory = weights_time_tensor->dims->data[1];
|
||||||
|
|
||||||
// TODO(b/132070898): Move these temp variables to the new scratch buffer API
|
TFLITE_DCHECK(context != nullptr);
|
||||||
// when ready.
|
TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
|
||||||
int32_t scratch_tensor[kScratchTensorMaxSize];
|
|
||||||
int32_t scratch_output_tensor[kScratchTensorMaxSize];
|
int32_t* scratch_tensor = static_cast<int32_t*>(
|
||||||
|
context->GetScratchBuffer(context, data.scratch_tensor_index));
|
||||||
|
int32_t* scratch_output_tensor = static_cast<int32_t*>(
|
||||||
|
context->GetScratchBuffer(context, data.scratch_output_tensor_index));
|
||||||
|
|
||||||
// Shift states.
|
// Shift states.
|
||||||
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
|
int16_t* const state_ptr = GetTensorData<int16_t>(activation_state_tensor);
|
||||||
@ -254,8 +319,8 @@ void EvalIntegerSVDF(
|
|||||||
for (int c = 0; c < n_input; c++) {
|
for (int c = 0; c < n_input; c++) {
|
||||||
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
|
dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
|
||||||
}
|
}
|
||||||
dot_prod =
|
dot_prod = MultiplyByQuantizedMultiplier(
|
||||||
MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
|
dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
|
||||||
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
dot_prod = std::min(std::max(output_min, dot_prod), output_max);
|
||||||
// This assumes state is symmetrically quantized. Otherwise last bit of
|
// This assumes state is symmetrically quantized. Otherwise last bit of
|
||||||
// state should be initialized to its zero point and accumulate the
|
// state should be initialized to its zero point and accumulate the
|
||||||
@ -328,7 +393,8 @@ void EvalIntegerSVDF(
|
|||||||
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
const int32_t output_min = std::numeric_limits<int8_t>::min();
|
||||||
for (int i = 0; i < n_batch * n_unit; ++i) {
|
for (int i = 0; i < n_batch * n_unit; ++i) {
|
||||||
int32_t x1 = scratch_output_tensor[i];
|
int32_t x1 = scratch_output_tensor[i];
|
||||||
int32_t x2 = MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
|
int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
|
||||||
|
data.effective_scale_2_b);
|
||||||
int32_t x3 = x2 + output_zp;
|
int32_t x3 = x2 + output_zp;
|
||||||
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
int32_t x4 = std::min(std::max(output_min, x3), output_max);
|
||||||
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
|
GetTensorData<int8_t>(output_tensor)[i] = static_cast<int8_t>(x4);
|
||||||
@ -349,8 +415,20 @@ constexpr int kInputActivationStateTensor = 4;
|
|||||||
// Output tensor.
|
// Output tensor.
|
||||||
constexpr int kOutputTensor = 0;
|
constexpr int kOutputTensor = 0;
|
||||||
|
|
||||||
|
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||||
|
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||||
|
void* data = nullptr;
|
||||||
|
if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) ==
|
||||||
|
kTfLiteError) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||||
const auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
|
TFLITE_DCHECK(node->builtin_data != nullptr);
|
||||||
|
|
||||||
|
const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
|
||||||
|
|
||||||
// Validate Tensor Inputs (dtype depends on quantization):
|
// Validate Tensor Inputs (dtype depends on quantization):
|
||||||
// [0] = Input, {2, batch_size, input_size}
|
// [0] = Input, {2, batch_size, input_size}
|
||||||
@ -359,7 +437,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
// [3] = Bias (optional), {1, num_units}
|
// [3] = Bias (optional), {1, num_units}
|
||||||
// [4] = Activation State (variable),
|
// [4] = Activation State (variable),
|
||||||
// {2, batch_size, memory_size * num_filters}
|
// {2, batch_size, memory_size * num_filters}
|
||||||
|
|
||||||
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
const TfLiteTensor* input = GetInput(context, node, kInputTensor);
|
||||||
const TfLiteTensor* weights_feature =
|
const TfLiteTensor* weights_feature =
|
||||||
GetInput(context, node, kWeightsFeatureTensor);
|
GetInput(context, node, kWeightsFeatureTensor);
|
||||||
@ -378,8 +455,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
const int num_units = num_filters / rank;
|
const int num_units = num_filters / rank;
|
||||||
const int memory_size = weights_time->dims->data[1];
|
const int memory_size = weights_time->dims->data[1];
|
||||||
|
|
||||||
const bool is_full_integer = input->type == kTfLiteInt8;
|
|
||||||
|
|
||||||
// Validate Input Tensor:
|
// Validate Input Tensor:
|
||||||
TF_LITE_ENSURE(context,
|
TF_LITE_ENSURE(context,
|
||||||
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
|
||||||
@ -403,7 +478,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
|
||||||
|
|
||||||
// Validate Optional Bias Input Tensor:
|
// Validate Optional Bias Input Tensor:
|
||||||
if (bias) {
|
if (bias != nullptr) {
|
||||||
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -413,53 +488,77 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
|
||||||
memory_size * num_filters);
|
memory_size * num_filters);
|
||||||
|
|
||||||
if (is_full_integer) {
|
|
||||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
||||||
|
|
||||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
if (input->type == kTfLiteInt8) {
|
||||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
|
||||||
|
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
|
||||||
|
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
|
||||||
|
if (bias != nullptr) {
|
||||||
|
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
||||||
|
}
|
||||||
|
|
||||||
if (bias) {
|
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
||||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
|
|
||||||
|
const auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
||||||
|
input->quantization.params);
|
||||||
|
const auto* weights_feature_params =
|
||||||
|
static_cast<const TfLiteAffineQuantization*>(
|
||||||
|
weights_feature->quantization.params);
|
||||||
|
const auto* state_params = static_cast<const TfLiteAffineQuantization*>(
|
||||||
|
activation_state->quantization.params);
|
||||||
|
const auto* weight_time_params =
|
||||||
|
static_cast<const TfLiteAffineQuantization*>(
|
||||||
|
weights_time->quantization.params);
|
||||||
|
const auto* output_params = static_cast<const TfLiteAffineQuantization*>(
|
||||||
|
output->quantization.params);
|
||||||
|
const double effective_scale_1 =
|
||||||
|
static_cast<double>(input_params->scale->data[0] *
|
||||||
|
weights_feature_params->scale->data[0] /
|
||||||
|
state_params->scale->data[0]);
|
||||||
|
const double effective_scale_2 = static_cast<double>(
|
||||||
|
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
||||||
|
output_params->scale->data[0]);
|
||||||
|
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
|
||||||
|
QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
|
||||||
|
&(data->effective_scale_1_b));
|
||||||
|
QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
|
||||||
|
&(data->effective_scale_2_b));
|
||||||
|
|
||||||
|
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||||
|
|
||||||
|
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||||
|
context, batch_size * num_filters * sizeof(int32_t),
|
||||||
|
&(data->scratch_tensor_index));
|
||||||
|
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||||
|
|
||||||
|
const TfLiteStatus scratch_output_status =
|
||||||
|
context->RequestScratchBufferInArena(
|
||||||
|
context, batch_size * num_units * sizeof(int32_t),
|
||||||
|
&(data->scratch_output_tensor_index));
|
||||||
|
TF_LITE_ENSURE_OK(context, scratch_output_status);
|
||||||
|
} else {
|
||||||
|
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
||||||
|
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
||||||
|
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
||||||
|
if (bias != nullptr) {
|
||||||
|
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
||||||
|
}
|
||||||
|
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
||||||
|
|
||||||
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
OpData* data = static_cast<OpData*>(node->user_data);
|
||||||
|
|
||||||
|
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
|
||||||
|
const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
|
||||||
|
context, batch_size * num_filters * sizeof(float),
|
||||||
|
&(data->scratch_tensor_index));
|
||||||
|
TF_LITE_ENSURE_OK(context, scratch_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
|
|
||||||
|
|
||||||
// Validate Scratch Tensors:
|
|
||||||
// [0] = (shared - see float block below for usage)
|
|
||||||
// [1] = Output Temp, int8_t, {2, num_units, batch_size}
|
|
||||||
// TODO(b/132070898): Scratch values are used as stack variables in
|
|
||||||
// EvalIntegerSVDF().
|
|
||||||
|
|
||||||
// Validate output tensor:
|
|
||||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
|
|
||||||
} else {
|
|
||||||
TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
|
|
||||||
|
|
||||||
// Validate Input Tensor dtypes:
|
|
||||||
TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
|
|
||||||
TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
|
|
||||||
TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
|
|
||||||
|
|
||||||
if (bias) {
|
|
||||||
TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate shared Scratch Tensor:
|
|
||||||
// [0] = Holds dot-product of time-forward calculations in
|
|
||||||
// ApplyTimeWeightsBiasAndActivation():
|
|
||||||
// float/int32, {2, batch_size, num_filters}
|
|
||||||
// TODO(b/132070898): Scratch values are used as stack variables in
|
|
||||||
// EvalIntegerSVDF().
|
|
||||||
|
|
||||||
// Full-float SVDF only uses the one shared scratch tensor (see above for
|
|
||||||
// usage).
|
|
||||||
// TODO(b/132070898): Use input tensor as variable until scratch tensor
|
|
||||||
// allocation has been implemented.
|
|
||||||
// TF_LITE_ENSURE_EQ(context, node->temporaries->size, 1);
|
|
||||||
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
|
|
||||||
}
|
|
||||||
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -476,56 +575,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
GetVariableInput(context, node, kInputActivationStateTensor);
|
GetVariableInput(context, node, kInputActivationStateTensor);
|
||||||
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
|
||||||
|
|
||||||
const bool is_full_integer = input->type == kTfLiteInt8;
|
TFLITE_DCHECK(node->user_data != nullptr);
|
||||||
|
const OpData& data = *(static_cast<const OpData*>(node->user_data));
|
||||||
|
|
||||||
switch (weights_feature->type) {
|
switch (weights_feature->type) {
|
||||||
case kTfLiteFloat32: {
|
case kTfLiteFloat32: {
|
||||||
// TODO(b/132070898): Use input tensor as variable until scratch tensor
|
|
||||||
// allocation has been implemented.
|
|
||||||
// TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0);
|
|
||||||
return EvalFloatSVDF(context, node, input, weights_feature, weights_time,
|
return EvalFloatSVDF(context, node, input, weights_feature, weights_time,
|
||||||
bias, params, activation_state, output);
|
bias, params, data.scratch_tensor_index,
|
||||||
|
activation_state, output);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case kTfLiteInt8: {
|
case kTfLiteInt8: {
|
||||||
if (is_full_integer) {
|
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
|
||||||
// TODO(b/132070898): Store these values in ::Prepare() instead of
|
|
||||||
// ::Eval():
|
|
||||||
// Calculate effective scales.
|
|
||||||
OpData op_data;
|
|
||||||
auto* input_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
input->quantization.params);
|
|
||||||
auto* weights_feature_params =
|
|
||||||
reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
weights_feature->quantization.params);
|
|
||||||
auto* state_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
activation_state->quantization.params);
|
|
||||||
auto* weight_time_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
weights_time->quantization.params);
|
|
||||||
auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
|
|
||||||
output->quantization.params);
|
|
||||||
const double effective_scale_1 =
|
|
||||||
static_cast<double>(input_params->scale->data[0] *
|
|
||||||
weights_feature_params->scale->data[0] /
|
|
||||||
state_params->scale->data[0]);
|
|
||||||
const double effective_scale_2 = static_cast<double>(
|
|
||||||
state_params->scale->data[0] * weight_time_params->scale->data[0] /
|
|
||||||
output_params->scale->data[0]);
|
|
||||||
QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a,
|
|
||||||
&op_data.effective_scale_1_b);
|
|
||||||
QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a,
|
|
||||||
&op_data.effective_scale_2_b);
|
|
||||||
|
|
||||||
TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu);
|
EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
|
||||||
EvalIntegerSVDF(
|
params, activation_state, output, data,
|
||||||
context, node, input, weights_feature, weights_time, bias, params,
|
input->params.zero_point, output->params.zero_point);
|
||||||
activation_state, output, op_data.effective_scale_1_a,
|
return kTfLiteOk;
|
||||||
op_data.effective_scale_1_b, op_data.effective_scale_2_a,
|
|
||||||
op_data.effective_scale_2_b, input->params.zero_point,
|
|
||||||
output->params.zero_point);
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -540,7 +607,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
|||||||
} // namespace svdf
|
} // namespace svdf
|
||||||
|
|
||||||
TfLiteRegistration Register_SVDF() {
|
TfLiteRegistration Register_SVDF() {
|
||||||
return {/*init=*/nullptr,
|
return {/*init=*/svdf::Init,
|
||||||
/*free=*/nullptr,
|
/*free=*/nullptr,
|
||||||
/*prepare=*/svdf::Prepare,
|
/*prepare=*/svdf::Prepare,
|
||||||
/*invoke=*/svdf::Eval,
|
/*invoke=*/svdf::Eval,
|
||||||
|
@ -4,6 +4,8 @@ ifneq ($(filter xtensa_hifi, $(ALL_TAGS)),)
|
|||||||
|
|
||||||
ifneq (,$(filter hifi4%, $(TARGET_ARCH)))
|
ifneq (,$(filter hifi4%, $(TARGET_ARCH)))
|
||||||
|
|
||||||
|
NNLIB = xa_nnlib_hifi4
|
||||||
|
|
||||||
CCFLAGS += -DNNLIB_V2 \
|
CCFLAGS += -DNNLIB_V2 \
|
||||||
-DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024
|
-DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024
|
||||||
|
|
||||||
@ -11,56 +13,60 @@ ifneq ($(filter xtensa_hifi, $(ALL_TAGS)),)
|
|||||||
-DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024
|
-DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024
|
||||||
|
|
||||||
MICROLITE_CC_SRCS += \
|
MICROLITE_CC_SRCS += \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_f32_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_f32_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_asym8_asym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_asym8_asym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_32_16.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_32_16.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_32_8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_32_8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_softmax_asym8_asym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_softmax_asym8_asym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_floor_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_floor_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_circ_buf.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_add_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_asym8xasym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_add_quant8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_mul_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_matXvec_asym8xasym8_asym8_circ.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_mul_quant8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_matXvec_f32_circ.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_circ_buf.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_asym8xasym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_asym8xasym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_matXvec_asym8xasym8_asym8_circ.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_circ_buf.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_matXvec_f32_circ.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/fc/hifi4/xa_nn_fully_connected.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_16x16.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_asym8xasym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x16.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_circ_buf.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/fc/hifi4/xa_nn_fully_connected.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_asym8xasym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_16x16.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x16.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_asym8xasym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_f32.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_f32_nhwc.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8_nhwc.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_f32_nhwc.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_f32.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8_nhwc.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_inv_256_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_f32_nhwc.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_sigmoidf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8_nhwc.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_tanhf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_f32_nhwc.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_reluf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8_nhwc.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_softmaxf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_inv_256_tbl.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_alognf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_sigmoidf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/scl_sigmoidf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_tanhf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/scl_tanhf_hifi4.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_reluf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/expf_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_softmaxf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/pow2f_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_alognf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/inff_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/scl_sigmoidf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/tanhf_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/scl_tanhf_hifi4.c \
|
||||||
$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/nanf_tbl.c \
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/expf_tbl.c \
|
||||||
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/pow2f_tbl.c \
|
||||||
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/inff_tbl.c \
|
||||||
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/tanhf_tbl.c \
|
||||||
|
$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/nanf_tbl.c \
|
||||||
|
|
||||||
INCLUDES += -I$(XTENSA_PATH)/xa_nnlib/algo/kernels/ \
|
INCLUDES += -I$(XTENSA_PATH)/$(NNLIB)/algo/kernels/ \
|
||||||
-I$(XTENSA_PATH)/xa_nnlib/include/nnlib/ \
|
-I$(XTENSA_PATH)/$(NNLIB)/include/nnlib/ \
|
||||||
-I$(XTENSA_PATH)/xa_nnlib/include/ \
|
-I$(XTENSA_PATH)/$(NNLIB)/include/ \
|
||||||
-I$(XTENSA_PATH)/xa_nnlib/algo/common/include/ \
|
-I$(XTENSA_PATH)/$(NNLIB)/algo/common/include/ \
|
||||||
-I$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/include/ \
|
-I$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/include/ \
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
ifeq ($(TARGET), xtensa_hifi)
|
ifeq ($(TARGET), xtensa_hifi)
|
||||||
TARGET_ARCH := hifi3_bd5
|
TARGET_ARCH := hifi3_bd5
|
||||||
|
|
||||||
$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib,))
|
$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib_hifi4,))
|
||||||
|
|
||||||
PLATFORM_ARGS = \
|
PLATFORM_ARGS = \
|
||||||
-mno-mul16 \
|
-mno-mul16 \
|
||||||
|
@ -80,8 +80,8 @@ EMBARC_MLI_PRE_COMPILED_MD5 := "a95ff9e0370434484f14e7e4114327f6"
|
|||||||
ZEPHYR_URL := "https://github.com/antmicro/zephyr/archive/55e36b9.zip"
|
ZEPHYR_URL := "https://github.com/antmicro/zephyr/archive/55e36b9.zip"
|
||||||
ZEPHYR_MD5 := "755622eb4812fde918a6382b65d50c3b"
|
ZEPHYR_MD5 := "755622eb4812fde918a6382b65d50c3b"
|
||||||
|
|
||||||
XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip"
|
XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_06_27.zip"
|
||||||
XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b"
|
XTENSA_HIFI4_MD5 :="45fdc1209a8da62ab568aa6040f7eabf"
|
||||||
|
|
||||||
ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz"
|
ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz"
|
||||||
ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1"
|
ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1"
|
||||||
|
Loading…
Reference in New Issue
Block a user