Merge pull request #44183 from foss-for-synopsys-dwc-arc-processors:arc_mli_build_fix
PiperOrigin-RevId: 338287124 Change-Id: I9d856ed6271df1f4eefd1dd83abcbdf2bf7cc7e2
This commit is contained in:
commit
2fef5cc015
@ -66,6 +66,7 @@ struct OpData {
|
||||
int32_t output_activation_max;
|
||||
};
|
||||
|
||||
#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
|
||||
inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
switch (padding) {
|
||||
case TfLitePadding::kTfLitePaddingSame:
|
||||
@ -77,6 +78,7 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
|
||||
return PaddingType::kNone;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
|
||||
const TfLiteTensor* filter, const TfLiteTensor* bias,
|
||||
@ -194,7 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
data->output_zero_point = output->params.zero_point;
|
||||
|
||||
return kTfLiteOk;
|
||||
} // namespace conv
|
||||
}
|
||||
|
||||
void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
TfLiteConvParams* params, const OpData& data,
|
||||
@ -259,10 +261,10 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
mli_weights.el_params.asym.zero_point.pi16 = &filter_zero_point;
|
||||
mli_bias.el_params.asym.zero_point.pi16 = &bias_zero_point;
|
||||
|
||||
ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ConvertToMliTensorPerChannel<int8_t>(filter, &mli_weights);
|
||||
ConvertToMliTensorPerChannel<int32_t>(bias, &mli_bias);
|
||||
ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ops::micro::ConvertToMliTensorPerChannel<int8_t>(filter, &mli_weights);
|
||||
ops::micro::ConvertToMliTensorPerChannel<int32_t>(bias, &mli_bias);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
|
||||
if (params->activation == kTfLiteActRelu) {
|
||||
cfg.relu.type = MLI_RELU_GEN;
|
||||
@ -313,14 +315,16 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
mli_tensor out_local = mli_out;
|
||||
mli_mov_cfg_t copy_config;
|
||||
mli_mov_cfg_for_copy(©_config);
|
||||
TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors(
|
||||
context, &in_local, &weights_local, &bias_local, &out_local));
|
||||
TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io(
|
||||
&in_local, &out_local, kernel_height, cfg.stride_height,
|
||||
cfg.padding_top, cfg.padding_bottom, &in_slice_height,
|
||||
&out_slice_height));
|
||||
TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(
|
||||
&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels));
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
ops::micro::arc_scratch_buffer_calc_slice_size_weights(
|
||||
&weights_local, &bias_local, weight_out_ch_dimension,
|
||||
&slice_channels));
|
||||
|
||||
/* is_local indicates that the tensor is already in local memory,
|
||||
so in that case the original tensor can be used,
|
||||
@ -330,10 +334,12 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
const bool w_is_local = weights_local.data == mli_weights.data;
|
||||
const bool b_is_local = bias_local.data == mli_bias.data;
|
||||
|
||||
TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels);
|
||||
TensorSlicer b_slice(&mli_bias, weight_out_ch_dimension, slice_channels);
|
||||
TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels,
|
||||
0, 0, 0, true);
|
||||
ops::micro::TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension,
|
||||
slice_channels);
|
||||
ops::micro::TensorSlicer b_slice(&mli_bias, weight_out_ch_dimension,
|
||||
slice_channels);
|
||||
ops::micro::TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension,
|
||||
slice_channels, 0, 0, 0, true);
|
||||
|
||||
mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
|
||||
mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local;
|
||||
@ -352,15 +358,16 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
dimension. for that the sliceHeight has been calculated. The tensor slicer
|
||||
is configured that it will completely slice the nBatch dimension (0) and
|
||||
slice the height dimension (1) in chunks of 'sliceHeight' */
|
||||
TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height,
|
||||
cfg.padding_top, cfg.padding_bottom, overlap);
|
||||
ops::micro::TensorSlicer in_slice(&mli_in, height_dimension,
|
||||
in_slice_height, cfg.padding_top,
|
||||
cfg.padding_bottom, overlap);
|
||||
|
||||
/* output tensor is alreade sliced in the output channel dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output channels of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch and height dimension. */
|
||||
TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension,
|
||||
out_slice_height);
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension,
|
||||
out_slice_height);
|
||||
|
||||
/* setup the pointers to the local or remote tensor to make the code
|
||||
* inside the loop easier. */
|
||||
|
@ -242,10 +242,10 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
mli_weights.el_params.asym.zero_point.pi16 = &filter_zero_point;
|
||||
mli_bias.el_params.asym.zero_point.pi16 = &bias_zero_point;
|
||||
|
||||
ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ConvertToMliTensorPerChannel<int8_t>(filter, &mli_weights);
|
||||
ConvertToMliTensorPerChannel<int32_t>(bias, &mli_bias);
|
||||
ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ops::micro::ConvertToMliTensorPerChannel<int8_t>(filter, &mli_weights);
|
||||
ops::micro::ConvertToMliTensorPerChannel<int32_t>(bias, &mli_bias);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
|
||||
if (params->activation == kTfLiteActRelu) {
|
||||
cfg.relu.type = MLI_RELU_GEN;
|
||||
@ -301,7 +301,7 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
mli_mov_cfg_t copy_config;
|
||||
mli_mov_cfg_for_copy(©_config);
|
||||
|
||||
TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors(
|
||||
context, &in_local, &weights_local, &bias_local, &out_local));
|
||||
/* is_local indicates that the tensor is already in local memory,
|
||||
so in that case the original tensor can be used,
|
||||
@ -311,10 +311,10 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
const bool w_is_local = weights_local.data == mli_weights.data;
|
||||
const bool b_is_local = bias_local.data == mli_bias.data;
|
||||
|
||||
TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io(
|
||||
&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top,
|
||||
cfg.padding_bottom, &inSliceHeight, &outSliceHeight));
|
||||
TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_weights(
|
||||
&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels));
|
||||
|
||||
/* if input channels is not equal to output channels, a channel multiplier
|
||||
@ -324,13 +324,14 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
slice_channels = (slice_channels / in_channels) * in_channels;
|
||||
}
|
||||
|
||||
TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension, slice_channels);
|
||||
TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, 0,
|
||||
0, 0, true);
|
||||
TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels,
|
||||
0, 0, 0, true);
|
||||
TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, 0,
|
||||
0, 0, true);
|
||||
ops::micro::TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension,
|
||||
slice_channels);
|
||||
ops::micro::TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension,
|
||||
slice_channels, 0, 0, 0, true);
|
||||
ops::micro::TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension,
|
||||
slice_channels, 0, 0, 0, true);
|
||||
ops::micro::TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension,
|
||||
slice_channels, 0, 0, 0, true);
|
||||
|
||||
mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
|
||||
mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local;
|
||||
@ -355,14 +356,16 @@ TfLiteStatus EvalMliQuantizedPerChannel(
|
||||
the sliceHeight has been calculated. The tensor slicer is configured that
|
||||
it will completely slice the nBatch dimension (0) and slice the height
|
||||
dimension (1) in chunks of 'sliceHeight' */
|
||||
TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight,
|
||||
padding_top, padding_bottom, overlap);
|
||||
ops::micro::TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension,
|
||||
inSliceHeight, padding_top,
|
||||
padding_bottom, overlap);
|
||||
|
||||
/* output tensor is alreade sliced in the output channel dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output channels of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch and height dimension. */
|
||||
TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight);
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension,
|
||||
outSliceHeight);
|
||||
|
||||
/* setup the pointers to the local or remote tensor to make the code
|
||||
* inside the loop easier. */
|
||||
|
@ -29,9 +29,6 @@ limitations under the License.
|
||||
#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
namespace fully_connected {
|
||||
namespace {
|
||||
|
||||
struct OpData {
|
||||
@ -127,10 +124,10 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
mli_tensor mli_bias = {};
|
||||
mli_tensor mli_out = {};
|
||||
|
||||
ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ConvertToMliTensor<int8_t>(filter, &mli_weights);
|
||||
ConvertToMliTensor<int32_t>(bias, &mli_bias);
|
||||
ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(input, &mli_in);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(filter, &mli_weights);
|
||||
ops::micro::ConvertToMliTensor<int32_t>(bias, &mli_bias);
|
||||
ops::micro::ConvertToMliTensor<int8_t>(output, &mli_out);
|
||||
|
||||
/* The input tensor can have more than 2 dimensions. for the compute this
|
||||
doesn't make any difference because all the inputs or a batch entry will
|
||||
@ -156,9 +153,10 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
int slice_size = mli_weights.shape[weight_out_dimension];
|
||||
|
||||
/* allocate the local buffers, and compute the slice size */
|
||||
TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_fully_connect_tensors(
|
||||
context, &in_local, &weights_local, &bias_local, &out_local));
|
||||
TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
ops::micro::get_arc_scratch_buffer_for_fully_connect_tensors(
|
||||
context, &in_local, &weights_local, &bias_local, &out_local));
|
||||
TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_weights(
|
||||
&weights_local, &bias_local, weight_out_dimension, &slice_size));
|
||||
int max_out_slice_size =
|
||||
out_local.capacity / mli_hlp_tensor_element_size(&out_local);
|
||||
@ -172,10 +170,11 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
const bool w_is_local = weights_local.data == mli_weights.data;
|
||||
const bool b_is_local = bias_local.data == mli_bias.data;
|
||||
|
||||
TensorSlicer w_slice(&mli_weights, weight_out_dimension, slice_size);
|
||||
TensorSlicer b_slice(&mli_bias, weight_out_dimension, slice_size);
|
||||
TensorSlicer out_ch_slice(&mli_out, out_tensor_dimension, slice_size, 0, 0, 0,
|
||||
true);
|
||||
ops::micro::TensorSlicer w_slice(&mli_weights, weight_out_dimension,
|
||||
slice_size);
|
||||
ops::micro::TensorSlicer b_slice(&mli_bias, weight_out_dimension, slice_size);
|
||||
ops::micro::TensorSlicer out_ch_slice(&mli_out, out_tensor_dimension,
|
||||
slice_size, 0, 0, 0, true);
|
||||
|
||||
mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
|
||||
mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local;
|
||||
@ -188,15 +187,15 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
|
||||
|
||||
// Slice the input over the batches (one at a time with the size of a
|
||||
// complete input)
|
||||
TensorSlicer in_slice(&mli_in, input_size_dimension,
|
||||
mli_in.shape[input_size_dimension]);
|
||||
ops::micro::TensorSlicer in_slice(&mli_in, input_size_dimension,
|
||||
mli_in.shape[input_size_dimension]);
|
||||
|
||||
/* output tensor is alreade sliced in the output size dimension.
|
||||
out_ch_slice.Sub() is the tensor for the amount of output size of this
|
||||
itteration of the weight slice loop. This tensor needs to be further
|
||||
sliced over the batch */
|
||||
TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension,
|
||||
slice_size);
|
||||
ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension,
|
||||
slice_size);
|
||||
|
||||
/* setup the pointers to the local or remote tensor to make the code
|
||||
* inside the loop easier. */
|
||||
@ -359,19 +358,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace fully_connected
|
||||
|
||||
TfLiteRegistration Register_FULLY_CONNECTED() {
|
||||
return {/*init=*/fully_connected::Init,
|
||||
return {/*init=*/Init,
|
||||
/*free=*/nullptr,
|
||||
/*prepare=*/fully_connected::Prepare,
|
||||
/*invoke=*/fully_connected::Eval,
|
||||
/*prepare=*/Prepare,
|
||||
/*invoke=*/Eval,
|
||||
/*profiling_string=*/nullptr,
|
||||
/*builtin_code=*/0,
|
||||
/*custom_name=*/nullptr,
|
||||
/*version=*/0};
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
|
@ -25,13 +25,13 @@ TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim,
|
||||
int slice_size, int padding_pre, int padding_post,
|
||||
int overlap, bool interleave_mode)
|
||||
: full_tensor_(full_tensor),
|
||||
sub_tensor_{},
|
||||
sub_cfg_{},
|
||||
done_(false),
|
||||
sliceDim_(slice_dim),
|
||||
pad_pre_(padding_pre),
|
||||
pad_post_(padding_post),
|
||||
overlap_(overlap),
|
||||
sub_cfg_{},
|
||||
sub_tensor_{},
|
||||
done_(false) {
|
||||
overlap_(overlap) {
|
||||
/* In the interleave mode, the slicing happens from the deepest dimension up
|
||||
to the slice_dim for example in an HWC layout this can mode can be used to
|
||||
slice in the C dimenstion. in this mode the data is not contiguous in memory
|
||||
|
@ -123,6 +123,10 @@ endif
|
||||
|
||||
CXXFLAGS := $(filter-out -std=c++11,$(CXXFLAGS))
|
||||
CCFLAGS := $(filter-out -std=c11,$(CCFLAGS))
|
||||
|
||||
ldflags_to_remove = -Wl,--fatal-warnings -Wl,--gc-sections
|
||||
LDFLAGS := $(filter-out $(ldflags_to_remove),$(LDFLAGS))
|
||||
|
||||
MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS))
|
||||
|
||||
CXXFLAGS += $(PLATFORM_FLAGS)
|
||||
|
@ -87,6 +87,10 @@ ifeq ($(TARGET), himax_we1_evb)
|
||||
|
||||
CXXFLAGS := $(filter-out -std=c++11,$(CXXFLAGS))
|
||||
CCFLAGS := $(filter-out -std=c11,$(CCFLAGS))
|
||||
|
||||
ldflags_to_remove = -Wl,--fatal-warnings -Wl,--gc-sections
|
||||
LDFLAGS := $(filter-out $(ldflags_to_remove),$(LDFLAGS))
|
||||
|
||||
MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS))
|
||||
|
||||
endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user