From 441d6983812af97104aa3453b09f3f411117d6c3 Mon Sep 17 00:00:00 2001 From: jacco Date: Tue, 14 Jan 2020 09:52:26 +0100 Subject: [PATCH 01/45] Use datamove in conv wrapper --- tensorflow/lite/micro/kernels/arc/conv.cc | 57 +++++-- .../lite/micro/kernels/arc/scratch_buffers.cc | 146 ++++++++++++++++++ .../lite/micro/kernels/arc/scratch_buffers.h | 42 +++++ .../micro/tools/make/targets/arc_makefile.inc | 5 + .../tools/make/third_party_downloads.inc | 4 +- 5 files changed, 235 insertions(+), 19 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/arc/scratch_buffers.cc create mode 100644 tensorflow/lite/micro/kernels/arc/scratch_buffers.h diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/arc/conv.cc index 69542e12e90..46be76a407b 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/arc/conv.cc @@ -25,6 +25,9 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" + +#include "mli_api.h" namespace tflite { namespace ops { @@ -139,7 +142,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, GetTensorData(im2col), nullptr); } -void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, +TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, @@ -195,24 +198,43 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - mli_point_to_subtsr_cfg substr_cfg_in = { - {0, 0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg substr_cfg_out = { - {0, 0}, 2, static_cast(mli_out.shape[1])}; - mli_tensor sub_mli_in = {0}; - mli_tensor sub_mli_out = {0}; + // Get first input from batch + mli_point_to_subtsr_cfg subtsr_cfg_in = { {0, 0}, 2, static_cast(mli_in.shape[1]) }; + mli_point_to_subtsr_cfg subtsr_cfg_out = { {0, 0}, 2, static_cast(mli_out.shape[1]) }; + mli_tensor sub_mli_in = { 0 }; + mli_tensor sub_mli_out = { 0 }; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - const int batches = - MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); + // Tensors for data in fast (local) memory and config to copy data from external to local memory + mli_tensor weights_local = mli_weights; + mli_tensor bias_local = mli_bias; + mli_tensor in_local = sub_mli_in; + mli_tensor out_local = sub_mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; + + mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); + mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); + const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); for (int i = 0; i < batches; i++) { - substr_cfg_in.start_coord[0] = i; - substr_cfg_out.start_coord[0] = i; - mli_hlp_point_to_subtensor(&mli_in, &substr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &substr_cfg_out, &sub_mli_out); - - mli_krn_conv2d_hwc_sa8_sa8_sa32(&sub_mli_in, &mli_weights, &mli_bias, - &cfg, &sub_mli_out); + mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); + mli_krn_conv2d_hwc_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &cfg, &out_local); + mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); + subtsr_cfg_in.start_coord[0]++; + subtsr_cfg_out.start_coord[0]++; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + if (in_is_local) { + in_local.data = sub_mli_in.data; + } + if (out_is_local) { + out_local.data = sub_mli_out.data; + } } } else { ConvParams op_params; @@ -233,6 +255,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); } + return kTfLiteOk; } void EvalFloat(TfLiteContext* context, TfLiteNode* node, @@ -309,7 +332,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { nullptr, output); break; case kTfLiteInt8: - EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, + return EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, output, nullptr); break; case kTfLiteUInt8: diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc new file mode 100644 index 00000000000..2ac60dd0f25 --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -0,0 +1,146 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include + +/* by default use all the XY memory, and half of the DCCM because DCCM is also used + * for the data section and the stack. + * the values can be overruled by adding a -D option to the makefile of the application + */ +#ifndef SCRATCH_MEM_X_SIZE +#ifdef core_config_xy_size +#define SCRATCH_MEM_X_SIZE (core_config_xy_size) +#else +#define SCRATCH_MEM_X_SIZE (0) +#endif +#endif + +#ifndef SCRATCH_MEM_Y_SIZE +#ifdef core_config_xy_size +#define SCRATCH_MEM_Y_SIZE (core_config_xy_size) +#else +#define SCRATCH_MEM_Y_SIZE (0) +#endif +#endif + +#ifndef SCRATCH_MEM_Z_SIZE +#ifdef core_config_dccm_size +#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) +#else +#define SCRATCH_MEM_Z_SIZE (0) +#endif +#endif + +namespace { +#pragma Data(".Xdata") + static int8_t scratch_mem_x[SCRATCH_MEM_X_SIZE]; +#pragma Data() + +#pragma Data(".Ydata") + static int8_t scratch_mem_y[SCRATCH_MEM_Y_SIZE]; +#pragma Data() + +#pragma Data(".Zdata") + static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE]; +#pragma Data() +} + +static inline +bool inside_arc_dccm(void* p) { +#if core_config_dccm_present + return ((unsigned)p >= core_config_dccm_base) && ((unsigned)p < core_config_dccm_base + core_config_dccm_size); +#else + return false; +#endif +} +static inline +bool inside_arc_xccm(void* p) { +#if core_config_xy + return ((unsigned)p >= core_config_xy_x_base) && ((unsigned)p < core_config_xy_x_base + core_config_xy_size); +#else + return false; +#endif +} +static inline +bool inside_arc_yccm(void* p) { +#if core_config_xy + return ((unsigned)p >= core_config_xy_y_base) && ((unsigned)p < core_config_xy_y_base + core_config_xy_size); +#else + return false; +#endif +} + +static inline +bool inside_arc_ccm(void* p) { + return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p); +} + +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out) { +#ifdef __Xxy + // Function to assign fast memory from one of 3 scratch buffers. + // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused + mli_tensor* tensors[3] = { weights, in, out }; + uint32_t tensor_sizes[3] = { + mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0), mli_hlp_count_elem_num(tensors[2], 0) }; + bool mem_is_free[3] = { true, true, true }; + int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; + uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, SCRATCH_MEM_Z_SIZE}; + + for (int i = 0; i < 3; ++i) { + int best_mem_idx = -1; + int best_mem_delta = INT_MAX; + // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. + if (inside_arc_ccm(tensors[i]->data)) continue; + for (int j = 0; j < 3; ++j) { + // Best Fit + if (mem_is_free[j] && tensor_sizes[i] <= scratch_sizes[j] && scratch_sizes[j] - tensor_sizes[i] < best_mem_delta) { + best_mem_idx = j; + best_mem_delta = scratch_sizes[j] - tensor_sizes[i]; + } + } + if (best_mem_idx >= 0) { + tensors[i]->data = static_cast(scratch_mem[best_mem_idx]); + tensors[i]->capacity = scratch_sizes[best_mem_idx]; + mem_is_free[best_mem_idx] = false; + } else { + return kTfLiteError; + } + } + + // Bias is expected to be much smaller than other operands, not affect performance and can be placed + // in the end of some of already used memory bank (to occupy free space of it) + bool is_bias_allocated = inside_arc_ccm(bias->data); + if (!is_bias_allocated) { + uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); + for (int i = 0; i < 3; ++i) { + if (tensors[i]->capacity - tensor_sizes[i] > bias_mem_requirements) { + bias->data = &((char*)tensors[i]->data)[tensor_sizes[i]]; + bias->capacity = bias_mem_requirements; + tensors[i]->capacity = tensor_sizes[i]; + is_bias_allocated = true; + break; + } + } + } + return (is_bias_allocated) ? kTfLiteOk : kTfLiteError; +#else + return kTfLiteOk; +#endif +} diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h new file mode 100644 index 00000000000..198cc5b83cf --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h @@ -0,0 +1,42 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ +#define TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ + +#include "tensorflow/lite/c/common.h" +#include "mli_api.h" + +/** + * @brief Function to allocate scratch buffers for the convolution tensors + * + * @detail This function will update the data pointers in the 4 tensors with pointers + * to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param weights [IO] pointer to the weights tensor + * @param bias [IO] pointer to the bias tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out); + +#endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 0f56e5f4641..16e89266614 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -22,6 +22,7 @@ else endif PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + PLATFORM_FLAGS += -tcf_core_config PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map CXXFLAGS += $(PLATFORM_FLAGS) @@ -80,6 +81,10 @@ endif third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc endif # USE_EMBARC_MLI diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index b331db2c80e..69e7910f6c2 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -74,8 +74,8 @@ PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip" EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/6316034d421cbbb59756239908d7c9a99075a3bb.zip" -EMBARC_MLI_MD5 := "db0910cf0e07e43f74ae7a31de485d56" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/428cfd6a89f848e403a8b8ca02eab2a897ae8cd3.zip" +EMBARC_MLI_MD5 := "9c6c8f8877fa6dd738d7ab62665b3a6e" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From 9d6f2440471312a44914db75e77dbe91ab532e7e Mon Sep 17 00:00:00 2001 From: jacco Date: Thu, 16 Jan 2020 15:39:33 +0100 Subject: [PATCH 02/45] add data move functionality to depthwise, fc, pooling --- .../person_detection_test.cc | 2 + .../lite/micro/kernels/arc/depthwise_conv.cc | 57 +++++++++++++------ .../lite/micro/kernels/arc/fully_connected.cc | 46 +++++++++++---- tensorflow/lite/micro/kernels/arc/pooling.cc | 44 ++++++++++---- .../lite/micro/kernels/arc/scratch_buffers.cc | 44 +++++++++++++- .../lite/micro/kernels/arc/scratch_buffers.h | 16 ++++++ 6 files changed, 168 insertions(+), 41 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc index b0979735d4f..cac5596cd83 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc @@ -28,7 +28,9 @@ limitations under the License. // Create an area of memory to use for input, output, and intermediate arrays. constexpr int tensor_arena_size = 125 * 1024; +#pragma Data(".System") uint8_t tensor_arena[tensor_arena_size]; +#pragma Data() TF_LITE_MICRO_TESTS_BEGIN diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc index 6322414f5c6..4cf7b08bda8 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc @@ -26,6 +26,9 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" + +#include "mli_api.h" namespace tflite { namespace ops { @@ -131,7 +134,7 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorData(output)); } -void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, +TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, @@ -186,24 +189,43 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - mli_point_to_subtsr_cfg substr_cfg_in = { - {0, 0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg substr_cfg_out = { - {0, 0}, 2, static_cast(mli_out.shape[1])}; - mli_tensor sub_mli_in = {0}; - mli_tensor sub_mli_out = {0}; + // Get first input from batch + mli_point_to_subtsr_cfg subtsr_cfg_in = { {0, 0}, 2, static_cast(mli_in.shape[1]) }; + mli_point_to_subtsr_cfg subtsr_cfg_out = { {0, 0}, 2, static_cast(mli_out.shape[1]) }; + mli_tensor sub_mli_in = { 0 }; + mli_tensor sub_mli_out = { 0 }; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - const int batches = - MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); + // Tensors for data in fast (local) memory and config to copy data from external to local memory + mli_tensor weights_local = mli_weights; + mli_tensor bias_local = mli_bias; + mli_tensor in_local = sub_mli_in; + mli_tensor out_local = sub_mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; + + mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); + mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); + const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); for (int i = 0; i < batches; i++) { - substr_cfg_in.start_coord[0] = i; - substr_cfg_out.start_coord[0] = i; - mli_hlp_point_to_subtensor(&mli_in, &substr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &substr_cfg_out, &sub_mli_out); - - mli_krn_depthwise_conv2d_hwc_sa8_sa8_sa32(&sub_mli_in, &mli_weights, - &mli_bias, &cfg, &sub_mli_out); + mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); + mli_krn_depthwise_conv2d_hwc_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &cfg, &out_local); + mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); + subtsr_cfg_in.start_coord[0]++; + subtsr_cfg_out.start_coord[0]++; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + if (in_is_local) { + in_local.data = sub_mli_in.data; + } + if (out_is_local) { + out_local.data = sub_mli_out.data; + } } } else { DepthwiseParams op_params; @@ -230,6 +252,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); } + return kTfLiteOk; } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, @@ -311,7 +334,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { EvalFloat(context, node, params, &data, input, filter, bias, output); break; case kTfLiteInt8: - EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, + return EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, output); break; case kTfLiteUInt8: diff --git a/tensorflow/lite/micro/kernels/arc/fully_connected.cc b/tensorflow/lite/micro/kernels/arc/fully_connected.cc index 57203f10487..9c484718b25 100644 --- a/tensorflow/lite/micro/kernels/arc/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc/fully_connected.cc @@ -24,6 +24,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" + +#include "mli_api.h" + namespace tflite { namespace ops { @@ -95,24 +99,44 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, ConvertToMliTensor(bias, &mli_bias); ConvertToMliTensor(output, &mli_out); - mli_point_to_subtsr_cfg substr_cfg_in = { - {0, 0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg substr_cfg_out = { - {0, 0}, 2, static_cast(mli_out.shape[1])}; + mli_point_to_subtsr_cfg subtsr_cfg_in = {{0, 0}, 2, static_cast(mli_in.shape[1])}; + mli_point_to_subtsr_cfg subtsr_cfg_out = {{0, 0}, 2, static_cast(mli_out.shape[1])}; mli_tensor sub_mli_in = {0}; mli_tensor sub_mli_out = {0}; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + + // Tensors for data in fast (local) memory and config to copy data from external to local memory + mli_tensor weights_local = mli_weights; + mli_tensor bias_local = mli_bias; + mli_tensor in_local = sub_mli_in; + mli_tensor out_local = sub_mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; + + mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); + mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); for (int i = 0; i < batches; i++) { - substr_cfg_in.start_coord[0] = i; - substr_cfg_out.start_coord[0] = i; - mli_hlp_point_to_subtensor(&mli_in, &substr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &substr_cfg_out, &sub_mli_out); - - mli_krn_fully_connected_sa8_sa8_sa32(&sub_mli_in, &mli_weights, &mli_bias, - &sub_mli_out); + mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); + mli_krn_fully_connected_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &out_local); + mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); + subtsr_cfg_in.start_coord[0]++; + subtsr_cfg_out.start_coord[0]++; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + if (in_is_local) { + in_local.data = sub_mli_in.data; + } + if (out_is_local) { + out_local.data = sub_mli_out.data; + } } } else { FullyConnectedParams op_params; diff --git a/tensorflow/lite/micro/kernels/arc/pooling.cc b/tensorflow/lite/micro/kernels/arc/pooling.cc index 55452013028..ef72a6c0649 100644 --- a/tensorflow/lite/micro/kernels/arc/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc/pooling.cc @@ -21,6 +21,9 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" + +#include "mli_api.h" namespace tflite { namespace ops { @@ -97,7 +100,7 @@ void AverageEvalUint8(TfLiteContext* context, const TfLiteNode* node, GetTensorShape(output), GetTensorData(output)); } -void AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, +TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData* data, const TfLiteTensor* input, TfLiteTensor* output) { // Run Average Pooling MLI kernel @@ -128,23 +131,39 @@ void AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - mli_point_to_subtsr_cfg substr_cfg_in = { - {0, 0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg substr_cfg_out = { - {0, 0}, 2, static_cast(mli_out.shape[1])}; + mli_point_to_subtsr_cfg subtsr_cfg_in = {{0,0}, 2, static_cast(mli_in.shape[1])}; + mli_point_to_subtsr_cfg subtsr_cfg_out = {{0,0}, 2, static_cast(mli_out.shape[1])}; mli_tensor sub_mli_in = {0}; mli_tensor sub_mli_out = {0}; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + + // Tensors for data in fast (local) memory and config to copy data from external to local memory + mli_tensor in_local = sub_mli_in; + mli_tensor out_local = sub_mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_io_tensors(context, &in_local, &out_local)); + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); for (int i = 0; i < batches; i++) { - substr_cfg_in.start_coord[0] = i; - substr_cfg_out.start_coord[0] = i; - mli_hlp_point_to_subtensor(&mli_in, &substr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &substr_cfg_out, &sub_mli_out); - - mli_krn_avepool_hwc_sa8(&sub_mli_in, &cfg, &sub_mli_out); + mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); + mli_krn_avepool_hwc_sa8(&in_local, &cfg, &out_local); + mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); + subtsr_cfg_in.start_coord[0]++; + subtsr_cfg_out.start_coord[0]++; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + if (in_is_local) { + in_local.data = sub_mli_in.data; + } + if (out_is_local) { + out_local.data = sub_mli_out.data; + } } } else { int32_t activation_min, activation_max; @@ -163,6 +182,7 @@ void AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); } + return kTfLiteOk; } void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, @@ -227,7 +247,7 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { AverageEvalUint8(context, node, params, &data, input, output); break; case kTfLiteInt8: - AverageEvalInt8(context, node, params, &data, input, output); + return AverageEvalInt8(context, node, params, &data, input, output); break; default: TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 2ac60dd0f25..5bcc4752260 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -38,7 +38,9 @@ limitations under the License. #ifndef SCRATCH_MEM_Z_SIZE #ifdef core_config_dccm_size -#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) +// temporary disable the use of dccm scratch mem +//#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) +#define SCRATCH_MEM_Z_SIZE (0) #else #define SCRATCH_MEM_Z_SIZE (0) #endif @@ -144,3 +146,43 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, return kTfLiteOk; #endif } + +TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out) { +#ifdef __Xxy + // Function to assign fast memory from one of 3 scratch buffers. + // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused + mli_tensor* tensors[2] = { in, out }; + uint32_t tensor_sizes[2] = { + mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0)}; + bool mem_is_free[3] = { true, true, true }; + int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; + uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, SCRATCH_MEM_Z_SIZE}; + int num_tensors = 2; + int num_memories = 3; + + + for (int i = 0; i < num_tensors; ++i) { + int best_mem_idx = -1; + int best_mem_delta = INT_MAX; + // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. + if (inside_arc_ccm(tensors[i]->data)) continue; + for (int j = 0; j < num_memories; ++j) { + // Best Fit + if (mem_is_free[j] && tensor_sizes[i] <= scratch_sizes[j] && scratch_sizes[j] - tensor_sizes[i] < best_mem_delta) { + best_mem_idx = j; + best_mem_delta = scratch_sizes[j] - tensor_sizes[i]; + } + } + if (best_mem_idx >= 0) { + tensors[i]->data = static_cast(scratch_mem[best_mem_idx]); + tensors[i]->capacity = scratch_sizes[best_mem_idx]; + mem_is_free[best_mem_idx] = false; + } else { + return kTfLiteError; + } + } +#endif + return kTfLiteOk; +} \ No newline at end of file diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h index 198cc5b83cf..d92ecc02d3a 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h @@ -39,4 +39,20 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, mli_tensor* bias, mli_tensor* out); +/** + * @brief Function to allocate scratch buffers for kernels with only input and output buffers + * + * @detail This function will update the data pointers in the 2 tensors with pointers + * to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out); + #endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ From bf8b8ac71ca40917a9ba09933179343f03879edb Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 20 Jan 2020 18:41:26 +0300 Subject: [PATCH 03/45] person_detection example: wrap data with named bss section --- .../person_detection_experimental/main_functions.cc | 2 ++ .../person_detection_test.cc | 4 ++-- tensorflow/lite/micro/kernels/arc/scratch_buffers.cc | 12 ++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc index 719f16b2d36..552b52c9c51 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc @@ -42,7 +42,9 @@ TfLiteTensor* input = nullptr; // An area of memory to use for input, output, and intermediate arrays. constexpr int kTensorArenaSize = 125 * 1024; +#pragma Bss(".tensor_arena") static uint8_t tensor_arena[kTensorArenaSize]; +#pragma Bss() } // namespace // The name of this function is important for Arduino compatibility. diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc index cac5596cd83..9c7212648cc 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc @@ -28,9 +28,9 @@ limitations under the License. // Create an area of memory to use for input, output, and intermediate arrays. constexpr int tensor_arena_size = 125 * 1024; -#pragma Data(".System") +#pragma Bss(".tensor_arena") uint8_t tensor_arena[tensor_arena_size]; -#pragma Data() +#pragma Bss() TF_LITE_MICRO_TESTS_BEGIN diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 5bcc4752260..477f4f37b2b 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -47,17 +47,17 @@ limitations under the License. #endif namespace { -#pragma Data(".Xdata") +#pragma Bss(".Xdata") static int8_t scratch_mem_x[SCRATCH_MEM_X_SIZE]; -#pragma Data() +#pragma Bss() -#pragma Data(".Ydata") +#pragma Bss(".Ydata") static int8_t scratch_mem_y[SCRATCH_MEM_Y_SIZE]; -#pragma Data() +#pragma Bss() -#pragma Data(".Zdata") +#pragma Bss(".Zdata") static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE]; -#pragma Data() +#pragma Bss() } static inline From d6917614dd5d5d3d58e699ab113b08ff07a1b2d6 Mon Sep 17 00:00:00 2001 From: jacco Date: Mon, 20 Jan 2020 16:56:53 +0100 Subject: [PATCH 04/45] add LCF file for ARC target --- .../micro/tools/make/targets/arc/memory.lcf | 49 +++++++++++++++++++ .../micro/tools/make/targets/arc_makefile.inc | 4 +- 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/memory.lcf diff --git a/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf b/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf new file mode 100644 index 00000000000..1d967bde0fa --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf @@ -0,0 +1,49 @@ + # SYSTEM memory regions indicate where external memory might be located. + # The TCF has no specific knowledge of whether SYSTEM regions contain + # external memory or not. + # CCMWRAP memory regions indicate unusable portions of the address space + # due to CCM memory wrapping into upper addresses beyond its size + + MEMORY { + ICCM0 : ORIGIN = 0x00000000, LENGTH = 0x00080000 + # CCMWRAP0: ORIGIN = 0x00080000, LENGTH = 0x00080000 + # SYSTEM0 : ORIGIN = 0x00100000, LENGTH = 0x00700000 + DCCM : ORIGIN = 0x00800000, LENGTH = 0x00080000 + # CCMWRAP1: ORIGIN = 0x00880000, LENGTH = 0x00080000 + # SYSTEM1 : ORIGIN = 0x00900000, LENGTH = 0x00300000 + XCCM : ORIGIN = 0x00c00000, LENGTH = 0x00010000 + # CCMWRAP2: ORIGIN = 0x00c10000, LENGTH = 0x000f0000 + # SYSTEM2 : ORIGIN = 0x00d00000, LENGTH = 0x00100000 + YCCM : ORIGIN = 0x00e00000, LENGTH = 0x00010000 + # CCMWRAP3: ORIGIN = 0x00e10000, LENGTH = 0x000f0000 + # SYSTEM3 : ORIGIN = 0x00f00000, LENGTH = 0x00100000 + } + SECTIONS { + GROUP BLOCK(4): { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:132): {} + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + .tensor_arena?: {} + } > ICCM0 + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + .protobuf?: {} + * (DATA): {} + * (BSS): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} + } > DCCM + GROUP BLOCK(4): { + .Xdata? : {} + } > XCCM + GROUP BLOCK(4): { + .Ydata? : {} + } > YCCM + } + + + diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 16e89266614..09fabd5e2cf 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -23,7 +23,7 @@ endif PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -fslp-vectorize-aggressive -ffunction-sections -fdata-sections PLATFORM_FLAGS += -tcf_core_config - PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map + PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -default_lcf=$(MAKEFILE_DIR)/targets/arc/memory.lcf CXXFLAGS += $(PLATFORM_FLAGS) CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) @@ -86,6 +86,8 @@ endif MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf + endif # USE_EMBARC_MLI endif From bab1f34a3cb829a900f30178cda321b418909ff1 Mon Sep 17 00:00:00 2001 From: jacco Date: Mon, 20 Jan 2020 17:05:42 +0100 Subject: [PATCH 05/45] Update URL to latest MLI lib with optimizations for person detect example --- tensorflow/lite/micro/tools/make/third_party_downloads.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 69e7910f6c2..8c8684ebec6 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -74,8 +74,8 @@ PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip" EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/428cfd6a89f848e403a8b8ca02eab2a897ae8cd3.zip" -EMBARC_MLI_MD5 := "9c6c8f8877fa6dd738d7ab62665b3a6e" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/d8702db473472764dcc8d2dff1f68c690d368be3.zip" +EMBARC_MLI_MD5 := "7a798dfe1424971b9ae50cd019e03616" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From 279e034660d296ca3dc3eed1ea604ce61e96a58b Mon Sep 17 00:00:00 2001 From: jacco Date: Wed, 22 Jan 2020 14:46:58 +0100 Subject: [PATCH 06/45] fix memory allocation issue for person detect example --- .../lite/micro/kernels/arc/scratch_buffers.cc | 15 ++++++-- .../micro/tools/make/targets/arc/memory.lcf | 35 ++++++++++--------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 477f4f37b2b..4c75a0a0fd4 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -38,9 +38,7 @@ limitations under the License. #ifndef SCRATCH_MEM_Z_SIZE #ifdef core_config_dccm_size -// temporary disable the use of dccm scratch mem -//#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) -#define SCRATCH_MEM_Z_SIZE (0) +#define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2) #else #define SCRATCH_MEM_Z_SIZE (0) #endif @@ -141,6 +139,17 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, } } } + if (!is_bias_allocated) { + uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); + for (int i = 0; i < 3; ++i) { + if (mem_is_free[i]) { + bias->data = static_cast(scratch_mem[i]); + bias->capacity = bias_mem_requirements; + is_bias_allocated = true; + break; + } + } + } return (is_bias_allocated) ? kTfLiteOk : kTfLiteError; #else return kTfLiteOk; diff --git a/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf b/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf index 1d967bde0fa..00cf0a3050b 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf @@ -5,30 +5,30 @@ # due to CCM memory wrapping into upper addresses beyond its size MEMORY { - ICCM0 : ORIGIN = 0x00000000, LENGTH = 0x00080000 - # CCMWRAP0: ORIGIN = 0x00080000, LENGTH = 0x00080000 - # SYSTEM0 : ORIGIN = 0x00100000, LENGTH = 0x00700000 - DCCM : ORIGIN = 0x00800000, LENGTH = 0x00080000 - # CCMWRAP1: ORIGIN = 0x00880000, LENGTH = 0x00080000 - # SYSTEM1 : ORIGIN = 0x00900000, LENGTH = 0x00300000 - XCCM : ORIGIN = 0x00c00000, LENGTH = 0x00010000 - # CCMWRAP2: ORIGIN = 0x00c10000, LENGTH = 0x000f0000 - # SYSTEM2 : ORIGIN = 0x00d00000, LENGTH = 0x00100000 - YCCM : ORIGIN = 0x00e00000, LENGTH = 0x00010000 - # CCMWRAP3: ORIGIN = 0x00e10000, LENGTH = 0x000f0000 - # SYSTEM3 : ORIGIN = 0x00f00000, LENGTH = 0x00100000 + ICCM0 : ORIGIN = 0x00000000, LENGTH = 0x00010000 + # CCMWRAP0: ORIGIN = 0x00010000, LENGTH = 0x0fff0000 + ICCM1 : ORIGIN = 0x10000000, LENGTH = 0x00080000 + # CCMWRAP1: ORIGIN = 0x10080000, LENGTH = 0x0ff80000 + # SYSTEM0 : ORIGIN = 0x20000000, LENGTH = 0x60000000 + DCCM : ORIGIN = 0x80000000, LENGTH = 0x00080000 + # CCMWRAP2: ORIGIN = 0x80080000, LENGTH = 0x0ff80000 + XCCM : ORIGIN = 0x90000000, LENGTH = 0x00008000 + # CCMWRAP3: ORIGIN = 0x90008000, LENGTH = 0x0fff8000 + YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00008000 + # CCMWRAP4: ORIGIN = 0xa0008000, LENGTH = 0x0fff8000 + # SYSTEM1 : ORIGIN = 0xb0000000, LENGTH = 0x50000000 } SECTIONS { GROUP BLOCK(4): { .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:132): {} - .text? : { *('.text$crt*') } + .text? : { *('.text$crt*') } * (TEXT): {} * (LIT): {} - .tensor_arena?: {} - } > ICCM0 + .rodata_in_data?:{} + } > ICCM1 GROUP BLOCK(4): { - /* _SDA_BASE_ computed implicitly */ + /* _SDA_BASE_ computed implicitly */ .sdata?: {} .sbss?: {} .protobuf?: {} @@ -36,7 +36,8 @@ * (BSS): {} .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - } > DCCM + .tensor_arena?: {} + } > DCCM GROUP BLOCK(4): { .Xdata? : {} } > XCCM From b045244f289aacf22c51c9202b68e9ea311e9554 Mon Sep 17 00:00:00 2001 From: jacco Date: Mon, 10 Feb 2020 10:37:30 +0100 Subject: [PATCH 07/45] update MLI lib to performance optimized MLI1.1 pre-release --- tensorflow/lite/micro/tools/make/third_party_downloads.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8c8684ebec6..6141efedbee 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -74,8 +74,8 @@ PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip" EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/d8702db473472764dcc8d2dff1f68c690d368be3.zip" -EMBARC_MLI_MD5 := "7a798dfe1424971b9ae50cd019e03616" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/97c09b81bd1c4d0455de298626c271d75faedba2.zip" +EMBARC_MLI_MD5 := "f7c5555a15e7837806cfaeb22d3c7b50" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From f110cdd8303a2365fafa7c9ffab984d27f7538e5 Mon Sep 17 00:00:00 2001 From: jacco Date: Fri, 6 Mar 2020 15:00:54 +0100 Subject: [PATCH 08/45] Add slicing logic for convolution layers in case the tensors don't fit completely in local memory, slicing is used to split the tensors. --- tensorflow/lite/micro/kernels/arc/conv.cc | 61 +++--- .../lite/micro/kernels/arc/depthwise_conv.cc | 63 +++--- .../lite/micro/kernels/arc/fully_connected.cc | 1 + .../lite/micro/kernels/arc/mli_slicers.cc | 93 +++++++++ .../lite/micro/kernels/arc/mli_slicers.h | 56 +++++ tensorflow/lite/micro/kernels/arc/pooling.cc | 3 + .../lite/micro/kernels/arc/scratch_buf_mgr.cc | 192 ++++++++++++++++++ .../lite/micro/kernels/arc/scratch_buf_mgr.h | 75 +++++++ .../lite/micro/kernels/arc/scratch_buffers.cc | 179 +++++----------- .../lite/micro/kernels/arc/scratch_buffers.h | 75 +++---- .../micro/tools/make/targets/arc_makefile.inc | 4 + 11 files changed, 588 insertions(+), 214 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/arc/mli_slicers.cc create mode 100644 tensorflow/lite/micro/kernels/arc/mli_slicers.h create mode 100644 tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc create mode 100644 tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/arc/conv.cc index 46be76a407b..8141154147b 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/arc/conv.cc @@ -26,6 +26,8 @@ limitations under the License. #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" #include "mli_api.h" @@ -198,44 +200,51 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - // Get first input from batch - mli_point_to_subtsr_cfg subtsr_cfg_in = { {0, 0}, 2, static_cast(mli_in.shape[1]) }; - mli_point_to_subtsr_cfg subtsr_cfg_out = { {0, 0}, 2, static_cast(mli_out.shape[1]) }; - mli_tensor sub_mli_in = { 0 }; - mli_tensor sub_mli_out = { 0 }; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + const int heightDimension = 1; + int inSliceHeight = 0; + int outSliceHeight = 0; + const int kernelHeight = static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); + const int overlap = kernelHeight - cfg.stride_height; // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; - mli_tensor in_local = sub_mli_in; - mli_tensor out_local = sub_mli_out; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, &inSliceHeight, &outSliceHeight)); + + const bool in_is_local = in_local.data == mli_in.data; + const bool out_is_local = out_local.data == mli_out.data; + + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. + because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. + The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) + in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); + + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); - const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); - for (int i = 0; i < batches; i++) { - mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); - mli_krn_conv2d_hwc_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &cfg, &out_local); - mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); - subtsr_cfg_in.start_coord[0]++; - subtsr_cfg_out.start_coord[0]++; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - if (in_is_local) { - in_local.data = sub_mli_in.data; - } - if (out_is_local) { - out_local.data = sub_mli_out.data; - } + while (!out_slice.Done()) { + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + mli_krn_conv2d_hwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); } + free_arc_scratch_buffers(); } else { ConvParams op_params; op_params.input_offset = -input->params.zero_point; diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc index 4cf7b08bda8..5921c4e4dff 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc @@ -27,6 +27,8 @@ limitations under the License. #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" #include "mli_api.h" @@ -189,44 +191,53 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - // Get first input from batch - mli_point_to_subtsr_cfg subtsr_cfg_in = { {0, 0}, 2, static_cast(mli_in.shape[1]) }; - mli_point_to_subtsr_cfg subtsr_cfg_out = { {0, 0}, 2, static_cast(mli_out.shape[1]) }; - mli_tensor sub_mli_in = { 0 }; - mli_tensor sub_mli_out = { 0 }; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + const int heightDimension = 1; + int inSliceHeight = 0; + int outSliceHeight = 0; + const int kernelHeight = static_cast(mli_weights.shape[KRNL_DW_H_DIM_HWC]); + const int overlap = kernelHeight - cfg.stride_height; // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; - mli_tensor in_local = sub_mli_in; - mli_tensor out_local = sub_mli_out; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; // this assumes that output shape is already filled in the tensor struct. mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; + /* if the tensor is already in local memory, is_local is true */ + const bool in_is_local = in_local.data == mli_in.data; + const bool out_is_local = out_local.data == mli_out.data; + + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, &inSliceHeight, &outSliceHeight)); + + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. + because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. + The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) + in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); + + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); - const int batches = MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); - for (int i = 0; i < batches; i++) { - mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); - mli_krn_depthwise_conv2d_hwc_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &cfg, &out_local); - mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); - subtsr_cfg_in.start_coord[0]++; - subtsr_cfg_out.start_coord[0]++; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - if (in_is_local) { - in_local.data = sub_mli_in.data; - } - if (out_is_local) { - out_local.data = sub_mli_out.data; - } + while (!out_slice.Done()) { + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + mli_krn_depthwise_conv2d_hwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); } + free_arc_scratch_buffers(); } else { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; diff --git a/tensorflow/lite/micro/kernels/arc/fully_connected.cc b/tensorflow/lite/micro/kernels/arc/fully_connected.cc index 9c484718b25..42921037481 100644 --- a/tensorflow/lite/micro/kernels/arc/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc/fully_connected.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc new file mode 100644 index 00000000000..0ae80d1afc3 --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc @@ -0,0 +1,93 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mli_slicers.h" + +#define MAX(A,B) (((A) > (B))? (A): (B)) +#define MIN(A,B) (((A) > (B))? (B): (A)) + +namespace tflite { +namespace ops { +namespace micro { + +TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre, int padding_post, int overlap) + : full_tensor_(full_tensor) + , sliceDim_(slice_dim) + , pad_pre_(padding_pre) + , pad_post_(padding_post) + , overlap_(overlap) + , subtsr_cfg_{ {0, 0}, static_cast(slice_dim + 1), static_cast(slice_size) } + , sub_tensor_{0} + , done_(false){ + + ComputeSubTensor(); +} + +void TensorSlicer::ComputeSubTensor(void) { + // subtsr_cfg_ is used to keep track of the itteration. + // A copy is created to update it with the correct clipping and padding for the current slice + mli_point_to_subtsr_cfg cfg_new = subtsr_cfg_; + // add clipping of first_out_dim_size to not exceed total size in that dimensions + // add padding logic + + // begin and end spans the complete input region including padding areas. + const int begin = (int)subtsr_cfg_.start_coord[1] - pad_pre_; + // end is clipped to the end of the full input region. this is needed for cases where the last slice is smaller than the rest. + const int end = MIN(begin + subtsr_cfg_.first_out_dim_size + overlap_, full_tensor_->shape[sliceDim_] + pad_post_); + // The start coordinate of the subtensor is clipped to zero + cfg_new.start_coord[sliceDim_] = MAX(begin, 0); + // and the stop coordinate is clipped to the size of the full tensor + const int stop_coord = MIN(end, full_tensor_->shape[sliceDim_]); + // compute the size of the subtensor + cfg_new.first_out_dim_size = stop_coord - cfg_new.start_coord[sliceDim_]; + + // compute the padding configuration for the current slice. + actual_padding_pre = cfg_new.start_coord[sliceDim_] - begin; + actual_padding_post = end - stop_coord; + + mli_hlp_point_to_subtensor(full_tensor_, &cfg_new, &sub_tensor_); +} +void TensorSlicer::Next(void){ + // TODO make generic for any number of dimensions. + subtsr_cfg_.start_coord[1]+= subtsr_cfg_.first_out_dim_size; + if (subtsr_cfg_.start_coord[1] >= full_tensor_->shape[1]) { + subtsr_cfg_.start_coord[1] = 0; + subtsr_cfg_.start_coord[0]++; + if (subtsr_cfg_.start_coord[0] >= full_tensor_->shape[0]) { + done_ = true; + } + } + if (!done_) ComputeSubTensor(); +} + +bool TensorSlicer::Done(void) { + return done_; +} + +int TensorSlicer::GetPaddingPre(void) { + return actual_padding_pre; +} + +int TensorSlicer::GetPaddingPost(void) { + return actual_padding_post; +} + +mli_tensor* TensorSlicer::Sub(void) { + return &sub_tensor_; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.h b/tensorflow/lite/micro/kernels/arc/mli_slicers.h new file mode 100644 index 00000000000..40f948a07ef --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/mli_slicers.h @@ -0,0 +1,56 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ + +#include "mli_api.h" +namespace tflite { +namespace ops { +namespace micro { + +class TensorSlicer { +public: + + TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre = 0, int padding_post = 0, int overlap = 0); + ~TensorSlicer() = default; + + void Next(); + bool Done(); + int GetPaddingPre(); + int GetPaddingPost(); + + mli_tensor *Sub(); + + // Default constructor is deleted + TensorSlicer() = delete; + + +private: + const mli_tensor* full_tensor_; + mli_tensor sub_tensor_; + mli_point_to_subtsr_cfg subtsr_cfg_; + bool done_; + int sliceDim_; + int pad_pre_, pad_post_, overlap_; + int actual_padding_pre, actual_padding_post; + + void ComputeSubTensor(); +}; + +} // namespace micro +} // namespace ops +} // namespace tflite +#endif //TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_ diff --git a/tensorflow/lite/micro/kernels/arc/pooling.cc b/tensorflow/lite/micro/kernels/arc/pooling.cc index ef72a6c0649..dab0ad7e314 100644 --- a/tensorflow/lite/micro/kernels/arc/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc/pooling.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/padding.h" #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" #include "mli_api.h" @@ -154,6 +155,7 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); mli_krn_avepool_hwc_sa8(&in_local, &cfg, &out_local); mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); + if (i == batches -1) break; subtsr_cfg_in.start_coord[0]++; subtsr_cfg_out.start_coord[0]++; mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); @@ -165,6 +167,7 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, out_local.data = sub_mli_out.data; } } + free_arc_scratch_buffers(); } else { int32_t activation_min, activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc new file mode 100644 index 00000000000..26f4f45f17f --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc @@ -0,0 +1,192 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include +#define MAX(A,B) (((A) > (B))? (A): (B)) +#define MIN(A,B) (((A) > (B))? (B): (A)) + +namespace tflite { +namespace ops { +namespace micro { + + + +void get_arc_two_buffer_sizes(int requestsize1, int requestsize2, int *grantsize1, int *grantsize2) { + int maxrequest = 0; + int secondrequest = 0; + int maxavailable = 0; + int secondavail = 0; + + // determine the largest requested buffer. + if (requestsize1 > requestsize2) { + maxrequest = requestsize1; + secondrequest = requestsize2; + } else { + maxrequest = requestsize2; + secondrequest = requestsize1; + } + + // find the two largest available buffers. + get_arc_scratch_buffer_two_max_sizes(&maxavailable, &secondavail); + + // in case two buffers are available, the largest buffer can go to the largest request. + if (secondavail > 0) { // this condition can be enhanced to prevent cases where the second buffer is so small that it is better to use one buffer and split it. + if (requestsize1 > requestsize2) { + *grantsize1 = maxavailable; + *grantsize2 = secondavail; + } else { + *grantsize1 = secondavail; + *grantsize2 = maxavailable; + } + } else { + // In case only one buffer is available, + // use only the max buffer, and split it. + // TODO compute optimal split ratio based on request ratio. + *grantsize1 = maxavailable / 2; + *grantsize2 = maxavailable / 2; + } +} + +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out) { +#ifdef __Xxy + + if (!inside_arc_ccm(weights->data)) { + int weights_size = mli_hlp_count_elem_num(weights, 0) * mli_hlp_tensor_element_size(weights); + weights->data = get_arc_scratch_buffer(weights_size); + weights->capacity = weights_size; + if (weights->data == NULL) return kTfLiteError; + } + + if (!inside_arc_ccm(bias->data)) { + uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); + bias->data = get_arc_scratch_buffer(bias_mem_requirements); + bias->capacity = bias_mem_requirements; + if (bias->data == NULL) return kTfLiteError; + } + + int requestSizeIn = 0; + int requestSizeOut = 0; + int grantsizeIn = 0; + int grantsizeOut = 0; + if (!inside_arc_ccm(in->data)) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the size + // of a single batch. that is why the startRank is 1 in case of input rank 4 + int startRank = in->rank - 3; // tOdo explain + requestSizeIn = mli_hlp_count_elem_num(in, startRank) * mli_hlp_tensor_element_size(in); + } + if (!inside_arc_ccm(out->data)) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the size + // of a single batch. that is why the startRank is 1 in case of input rank 4 + int startRank = out->rank - 3; + requestSizeOut = mli_hlp_count_elem_num(out, startRank) * mli_hlp_tensor_element_size(out); + } + + get_arc_two_buffer_sizes(requestSizeIn, requestSizeOut, &grantsizeIn, &grantsizeOut); + + if (!inside_arc_ccm(in->data)) { + in->data = get_arc_scratch_buffer(grantsizeIn); + in->capacity = grantsizeIn; + if (in->data == NULL) return kTfLiteError; + } + if (!inside_arc_ccm(out->data)) { + out->data = get_arc_scratch_buffer(grantsizeOut); + out->capacity = grantsizeOut; + if (out->data == NULL) return kTfLiteError; + } + + return kTfLiteOk; +#else + return kTfLiteOk; +#endif +} + +TfLiteStatus arc_scratch_buffer_calc_slice_size_io( + const mli_tensor *in, + const mli_tensor *out, + const int kernelHeight, + const int strideHeight, + int *inSliceHeight, + int *outSliceHeight) { + const int heightDimension = 1; // todo: compute from rank + const int inHeight = in->shape[heightDimension]; + const int outHeight = out->shape[heightDimension]; + const int lineSizeIn = mli_hlp_count_elem_num(in, heightDimension + 1) * mli_hlp_tensor_element_size(in); + const int lineSizeOut = mli_hlp_count_elem_num(out, heightDimension + 1) * mli_hlp_tensor_element_size(out); + int maxLinesIn = 0; + int maxLinesOut = 0; + int maxOutLinesForInput = 0; + bool fit = (in->capacity >= inHeight * lineSizeIn) && (out->capacity >= outHeight * lineSizeOut); + if (fit) { + // in case both tensors completely fit in the capacity, there is no need for slicing + *inSliceHeight = inHeight; + *outSliceHeight = outHeight; + } else { + // First compute how many lines fit into the input tensor, and compute how many output lines can be computed with that. + maxLinesIn = MIN(inHeight, in->capacity / lineSizeIn); + if (maxLinesIn >= inHeight) { + maxOutLinesForInput = outHeight; + } else { + maxOutLinesForInput = (maxLinesIn - kernelHeight + 1) / strideHeight; // TODO add padding exceptions and test by makin fit=false; + } + // Ten compute how many ouput lines fit into the output tensor. + maxLinesOut = MIN(outHeight, out->capacity / lineSizeOut); + // the smallest of the two determines the slice height for the output, and the derived sliceheight for the input. + *outSliceHeight = MIN(maxOutLinesForInput, maxLinesOut); + *inSliceHeight = *outSliceHeight * strideHeight; + } + if ((*inSliceHeight > 0) && (*outSliceHeight > 0)) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out) { +#ifdef __Xxy + // Function to assign fast memory from one of 3 scratch buffers. + // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused + mli_tensor* tensors[2] = { in, out }; + uint32_t tensor_sizes[2] = { + mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0)}; + int num_tensors = 2; + + + for (int i = 0; i < num_tensors; ++i) { + // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. + if (inside_arc_ccm(tensors[i]->data)) continue; + tensors[i]->data = get_arc_scratch_buffer(tensor_sizes[i]); + tensors[i]->capacity = tensor_sizes[i]; + + if (tensors[i]->data == NULL) { + return kTfLiteError; + } + } +#endif + return kTfLiteOk; +} + +} // namespace micro +} // namespace ops +} // namespace tflite \ No newline at end of file diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h new file mode 100644 index 00000000000..a27df8a5358 --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h @@ -0,0 +1,75 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ +#define TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ + +#include "tensorflow/lite/c/common.h" +#include "mli_api.h" + +namespace tflite { +namespace ops { +namespace micro { + +/** + * @brief Function to allocate scratch buffers for the convolution tensors + * + * @detail This function will update the data pointers in the 4 tensors with pointers + * to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param weights [IO] pointer to the weights tensor + * @param bias [IO] pointer to the bias tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out); + +/** + * @brief Function to allocate scratch buffers for kernels with only input and output buffers + * + * @detail This function will update the data pointers in the 2 tensors with pointers + * to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out); + +TfLiteStatus arc_scratch_buffer_calc_slice_size_io( + const mli_tensor *in, + const mli_tensor *out, + const int kernelHeight, + const int strideHeight, + int *inSliceHeight, + int *outSliceHeight); + + +} // namespace micro +} // namespace ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_ diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 4c75a0a0fd4..5ef1b445a22 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -15,6 +15,12 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" #include +#define MAX(A,B) (((A) > (B))? (A): (B)) +#define MIN(A,B) (((A) > (B))? (B): (A)) + +namespace tflite { +namespace ops { +namespace micro { /* by default use all the XY memory, and half of the DCCM because DCCM is also used * for the data section and the stack. @@ -58,140 +64,57 @@ namespace { #pragma Bss() } -static inline -bool inside_arc_dccm(void* p) { -#if core_config_dccm_present - return ((unsigned)p >= core_config_dccm_base) && ((unsigned)p < core_config_dccm_base + core_config_dccm_size); -#else - return false; -#endif -} -static inline -bool inside_arc_xccm(void* p) { -#if core_config_xy - return ((unsigned)p >= core_config_xy_x_base) && ((unsigned)p < core_config_xy_x_base + core_config_xy_size); -#else - return false; -#endif -} -static inline -bool inside_arc_yccm(void* p) { -#if core_config_xy - return ((unsigned)p >= core_config_xy_y_base) && ((unsigned)p < core_config_xy_y_base + core_config_xy_size); -#else - return false; -#endif -} +static int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; +static uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, SCRATCH_MEM_Z_SIZE}; -static inline -bool inside_arc_ccm(void* p) { - return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p); -} -TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* weights, - mli_tensor* bias, - mli_tensor* out) { -#ifdef __Xxy - // Function to assign fast memory from one of 3 scratch buffers. - // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused - mli_tensor* tensors[3] = { weights, in, out }; - uint32_t tensor_sizes[3] = { - mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0), mli_hlp_count_elem_num(tensors[2], 0) }; - bool mem_is_free[3] = { true, true, true }; - int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; - uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, SCRATCH_MEM_Z_SIZE}; - - for (int i = 0; i < 3; ++i) { - int best_mem_idx = -1; - int best_mem_delta = INT_MAX; - // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. - if (inside_arc_ccm(tensors[i]->data)) continue; - for (int j = 0; j < 3; ++j) { - // Best Fit - if (mem_is_free[j] && tensor_sizes[i] <= scratch_sizes[j] && scratch_sizes[j] - tensor_sizes[i] < best_mem_delta) { - best_mem_idx = j; - best_mem_delta = scratch_sizes[j] - tensor_sizes[i]; - } - } - if (best_mem_idx >= 0) { - tensors[i]->data = static_cast(scratch_mem[best_mem_idx]); - tensors[i]->capacity = scratch_sizes[best_mem_idx]; - mem_is_free[best_mem_idx] = false; - } else { - return kTfLiteError; +void *get_arc_scratch_buffer(int size) { + // Function to asign fast memory from one of 3 scratch buffers. + // Best Fit strategy - memory is allocated from that memory bank that leaves the least unused memory. + void *buf = NULL; + int best_mem_idx = -1; + int best_mem_delta = INT_MAX; + // find a local memory that fits the data size. + for (int mem_idx = 0; mem_idx < sizeof(scratch_mem)/sizeof(scratch_mem[0]); ++mem_idx) { + // Best Fit + if ((size <= scratch_sizes[mem_idx]) && (scratch_sizes[mem_idx] - size < best_mem_delta)) { + best_mem_idx = mem_idx; + best_mem_delta = scratch_sizes[mem_idx] - size; } } - - // Bias is expected to be much smaller than other operands, not affect performance and can be placed - // in the end of some of already used memory bank (to occupy free space of it) - bool is_bias_allocated = inside_arc_ccm(bias->data); - if (!is_bias_allocated) { - uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); - for (int i = 0; i < 3; ++i) { - if (tensors[i]->capacity - tensor_sizes[i] > bias_mem_requirements) { - bias->data = &((char*)tensors[i]->data)[tensor_sizes[i]]; - bias->capacity = bias_mem_requirements; - tensors[i]->capacity = tensor_sizes[i]; - is_bias_allocated = true; - break; - } - } + if (best_mem_idx >= 0) { + buf = static_cast(scratch_mem[best_mem_idx]); + scratch_mem[best_mem_idx] += size; + scratch_sizes[best_mem_idx] -= size; } - if (!is_bias_allocated) { - uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); - for (int i = 0; i < 3; ++i) { - if (mem_is_free[i]) { - bias->data = static_cast(scratch_mem[i]); - bias->capacity = bias_mem_requirements; - is_bias_allocated = true; - break; - } - } - } - return (is_bias_allocated) ? kTfLiteOk : kTfLiteError; -#else - return kTfLiteOk; -#endif + return buf; } -TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* out) { -#ifdef __Xxy - // Function to assign fast memory from one of 3 scratch buffers. - // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused - mli_tensor* tensors[2] = { in, out }; - uint32_t tensor_sizes[2] = { - mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0)}; - bool mem_is_free[3] = { true, true, true }; - int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z}; - uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE, SCRATCH_MEM_Z_SIZE}; - int num_tensors = 2; - int num_memories = 3; - - - for (int i = 0; i < num_tensors; ++i) { - int best_mem_idx = -1; - int best_mem_delta = INT_MAX; - // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. - if (inside_arc_ccm(tensors[i]->data)) continue; - for (int j = 0; j < num_memories; ++j) { - // Best Fit - if (mem_is_free[j] && tensor_sizes[i] <= scratch_sizes[j] && scratch_sizes[j] - tensor_sizes[i] < best_mem_delta) { - best_mem_idx = j; - best_mem_delta = scratch_sizes[j] - tensor_sizes[i]; - } - } - if (best_mem_idx >= 0) { - tensors[i]->data = static_cast(scratch_mem[best_mem_idx]); - tensors[i]->capacity = scratch_sizes[best_mem_idx]; - mem_is_free[best_mem_idx] = false; - } else { - return kTfLiteError; +void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2) { + int maxavailable = 0; + int secondavail = 0; + // find the two largest available buffers. + for (int i = 0; i < 3; i++) { + if (scratch_sizes[i] > maxavailable) { + secondavail = maxavailable; + maxavailable = scratch_sizes[i]; + } else if (scratch_sizes[i] > secondavail) { + secondavail = scratch_sizes[i]; } } -#endif - return kTfLiteOk; -} \ No newline at end of file + *size1 = maxavailable; + *size2 = secondavail; +} + +void free_arc_scratch_buffers(void) { + scratch_mem[0] = scratch_mem_x; + scratch_mem[1] = scratch_mem_y; + scratch_mem[2] = scratch_mem_z; + scratch_sizes[0] = SCRATCH_MEM_X_SIZE; + scratch_sizes[1] = SCRATCH_MEM_Y_SIZE; + scratch_sizes[2] = SCRATCH_MEM_Z_SIZE; +} + +} // namespace micro +} // namespace ops +} // namespace tflite \ No newline at end of file diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h index d92ecc02d3a..52a12c7899d 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h @@ -19,40 +19,47 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "mli_api.h" -/** - * @brief Function to allocate scratch buffers for the convolution tensors - * - * @detail This function will update the data pointers in the 4 tensors with pointers - * to scratch buffers in fast local memory. - * - * @param context [I] pointer to TfLite context (needed for error handling) - * @param in [IO] pointer to the input tensor - * @param weights [IO] pointer to the weights tensor - * @param bias [IO] pointer to the bias tensor - * @param output [IO] pointer to the output tensor - * - * @return Tf Lite status code - */ -TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* weights, - mli_tensor* bias, - mli_tensor* out); +namespace tflite { +namespace ops { +namespace micro { -/** - * @brief Function to allocate scratch buffers for kernels with only input and output buffers - * - * @detail This function will update the data pointers in the 2 tensors with pointers - * to scratch buffers in fast local memory. - * - * @param context [I] pointer to TfLite context (needed for error handling) - * @param in [IO] pointer to the input tensor - * @param output [IO] pointer to the output tensor - * - * @return Tf Lite status code - */ -TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* out); + +void free_arc_scratch_buffers(void); +void *get_arc_scratch_buffer(int size);// Function to assign fast memory from one of 3 scratch buffers. + +void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2); + +static inline bool inside_arc_dccm(void* p) { +#if core_config_dccm_present + return ((unsigned)p >= core_config_dccm_base) && ((unsigned)p < core_config_dccm_base + core_config_dccm_size); +#else + return false; +#endif +} + +static inline bool inside_arc_xccm(void* p) { +#if core_config_xy + return ((unsigned)p >= core_config_xy_x_base) && ((unsigned)p < core_config_xy_x_base + core_config_xy_size); +#else + return false; +#endif +} + +static inline bool inside_arc_yccm(void* p) { +#if core_config_xy + return ((unsigned)p >= core_config_xy_y_base) && ((unsigned)p < core_config_xy_y_base + core_config_xy_size); +#else + return false; +#endif +} + +static inline +bool inside_arc_ccm(void* p) { + return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p); +} + +} // namespace micro +} // namespace ops +} // namespace tflite #endif // TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUFFERS_H_ diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 09fabd5e2cf..a1f5546b8f5 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -85,6 +85,10 @@ endif MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf From c2e501e017b31b94c30bc5903bc613a8b0d7e109 Mon Sep 17 00:00:00 2001 From: jacco Date: Wed, 4 Mar 2020 09:58:48 +0100 Subject: [PATCH 09/45] Fix for upstream merge conflict the location of the header file was changed in the upstream archive. but the makefile was not updated. --- tensorflow/lite/micro/tools/make/targets/arc_makefile.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index a1f5546b8f5..5ce2e03bfc3 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -89,6 +89,7 @@ endif MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf From 210253668472888264a9c8f6eef9f58e3d7f3e34 Mon Sep 17 00:00:00 2001 From: jacco Date: Thu, 26 Mar 2020 17:26:19 +0100 Subject: [PATCH 10/45] update to new version of MLI needed for slicing --- tensorflow/lite/micro/kernels/arc/conv.cc | 2 +- tensorflow/lite/micro/kernels/arc/depthwise_conv.cc | 2 +- tensorflow/lite/micro/tools/make/third_party_downloads.inc | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/arc/conv.cc index 8141154147b..06be9384125 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/arc/conv.cc @@ -238,7 +238,7 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = in_slice.GetPaddingPost(); mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - mli_krn_conv2d_hwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); + mli_krn_conv2d_nhwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); in_slice.Next(); diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc index 5921c4e4dff..fe47c7f25e0 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc @@ -231,7 +231,7 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = in_slice.GetPaddingPost(); mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - mli_krn_depthwise_conv2d_hwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); + mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); in_slice.Next(); diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 6141efedbee..ce24ba29542 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -74,8 +74,8 @@ PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip" EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/97c09b81bd1c4d0455de298626c271d75faedba2.zip" -EMBARC_MLI_MD5 := "f7c5555a15e7837806cfaeb22d3c7b50" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/4b6c6eed65395dced1564006be8188781af16035.zip" +EMBARC_MLI_MD5 := "47167553c17ff8c7cd59fb1afb90c304" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From b4bcc4e5743fbe031406745f2474bb27bc49ba2e Mon Sep 17 00:00:00 2001 From: jacco Date: Fri, 20 Mar 2020 16:32:14 +0100 Subject: [PATCH 11/45] add slicing logic for weight slicing in conv kernel for ARC backend --- tensorflow/lite/micro/kernels/arc/conv.cc | 78 ++++++-- .../lite/micro/kernels/arc/depthwise_conv.cc | 2 +- .../lite/micro/kernels/arc/mli_slicers.cc | 74 +++++-- .../lite/micro/kernels/arc/mli_slicers.h | 4 +- tensorflow/lite/micro/kernels/arc/pooling.cc | 48 +++-- .../lite/micro/kernels/arc/scratch_buf_mgr.cc | 184 +++++++++++------- .../lite/micro/kernels/arc/scratch_buf_mgr.h | 6 + .../lite/micro/kernels/arc/scratch_buffers.cc | 18 +- .../lite/micro/kernels/arc/scratch_buffers.h | 1 + 9 files changed, 278 insertions(+), 137 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/arc/conv.cc index 06be9384125..9e9a37821e8 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/arc/conv.cc @@ -200,12 +200,18 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } + // for height slicing const int heightDimension = 1; int inSliceHeight = 0; int outSliceHeight = 0; const int kernelHeight = static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); const int overlap = kernelHeight - cfg.stride_height; + // for weight slicing (on output channels) + const int weightOutChDimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. + int sliceChannels = static_cast(mli_weights.shape[weightOutChDimension]); + const int outTensorChDimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; @@ -214,36 +220,68 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, &sliceChannels)); + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ const bool in_is_local = in_local.data == mli_in.data; const bool out_is_local = out_local.data == mli_out.data; + const bool w_is_local = weights_local.data == mli_weights.data; + const bool b_is_local = bias_local.data == mli_bias.data; - /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. - because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. - on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. - The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) - in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); - TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); + TensorSlicer w_slice(&mli_weights, weightOutChDimension, sliceChannels); + TensorSlicer b_slice(&mli_bias, weightOutChDimension, sliceChannels); + TensorSlicer out_ch_slice(&mli_out, outTensorChDimension, sliceChannels, 0, 0, 0, true); - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; - mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); - mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); + void *inputBufferPtr = NULL; - while (!out_slice.Done()) { - cfg.padding_top = in_slice.GetPaddingPre(); - cfg.padding_bottom = in_slice.GetPaddingPost(); + while (!w_slice.Done()){ + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - mli_krn_conv2d_nhwc_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); - mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. + because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. + The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) + in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); - in_slice.Next(); - out_slice.Next(); + /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of + output channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and + height dimension. */ + TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); + + /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + TF_LITE_ENSURE(context, !in_slice.Done()); + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + // if same input copy as previous iteration, skip the copy of input + if (in_slice.Sub()->data != inputBufferPtr) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + inputBufferPtr = in_slice.Sub()->data; + } + mli_krn_conv2d_nhwc_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + TF_LITE_ENSURE(context, in_slice.Done()); } + free_arc_scratch_buffers(); } else { ConvParams op_params; diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc index fe47c7f25e0..00c46c442b7 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc @@ -210,7 +210,7 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const bool in_is_local = in_local.data == mli_in.data; const bool out_is_local = out_local.data == mli_out.data; - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc index 0ae80d1afc3..6c6c89715f8 100644 --- a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc +++ b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc @@ -22,53 +22,89 @@ namespace tflite { namespace ops { namespace micro { -TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre, int padding_post, int overlap) +TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre, int padding_post, int overlap, bool interleave_mode) : full_tensor_(full_tensor) , sliceDim_(slice_dim) , pad_pre_(padding_pre) , pad_post_(padding_post) , overlap_(overlap) - , subtsr_cfg_{ {0, 0}, static_cast(slice_dim + 1), static_cast(slice_size) } + , sub_cfg_{0} , sub_tensor_{0} , done_(false){ + /* In the interleave mode, the slicing happens from the deepest dimension up to the slice_dim + for example in an HWC layout this can mode can be used to slice in the C dimenstion. + in this mode the data is not contiguous in memory anymore */ + if (interleave_mode) { + for (int i = 0; i< full_tensor->rank; i++){ + if (i > slice_dim) { + sub_cfg_.size[i] = 1; + } else if (i == slice_dim) { + sub_cfg_.size[i] = slice_size; + } else { + sub_cfg_.size[i] = full_tensor->shape[i]; + } + } + sub_cfg_.sub_tensor_rank = full_tensor->rank; + + } else { + /* In the not interlevaed mode, the slicing happens from the outer most dimension up to the slice_dim + for example in an HWC layout this mode can be used to slice in the H dimension. + in this mode the data of the slice is still contiguous in memory (if that was the case in the input tensor */ + for (int i = 0; i< full_tensor->rank; i++){ + if (i < slice_dim) { + sub_cfg_.size[i] = 1; + } else if (i == slice_dim) { + sub_cfg_.size[i] = slice_size; + }else { + sub_cfg_.size[i] = full_tensor->shape[i]; + } + } + sub_cfg_.sub_tensor_rank = full_tensor->rank - slice_dim; + } + ComputeSubTensor(); } void TensorSlicer::ComputeSubTensor(void) { - // subtsr_cfg_ is used to keep track of the itteration. + + // subtsr_cfg_ is used to keep track of the iteration. // A copy is created to update it with the correct clipping and padding for the current slice - mli_point_to_subtsr_cfg cfg_new = subtsr_cfg_; - // add clipping of first_out_dim_size to not exceed total size in that dimensions - // add padding logic + mli_sub_tensor_cfg cfg_new = sub_cfg_; // begin and end spans the complete input region including padding areas. - const int begin = (int)subtsr_cfg_.start_coord[1] - pad_pre_; + const int begin = (int)sub_cfg_.offset[sliceDim_] - pad_pre_; // end is clipped to the end of the full input region. this is needed for cases where the last slice is smaller than the rest. - const int end = MIN(begin + subtsr_cfg_.first_out_dim_size + overlap_, full_tensor_->shape[sliceDim_] + pad_post_); + const int end = MIN(begin + sub_cfg_.size[sliceDim_] + overlap_, full_tensor_->shape[sliceDim_] + pad_post_); // The start coordinate of the subtensor is clipped to zero - cfg_new.start_coord[sliceDim_] = MAX(begin, 0); + cfg_new.offset[sliceDim_] = MAX(begin, 0); // and the stop coordinate is clipped to the size of the full tensor const int stop_coord = MIN(end, full_tensor_->shape[sliceDim_]); // compute the size of the subtensor - cfg_new.first_out_dim_size = stop_coord - cfg_new.start_coord[sliceDim_]; + cfg_new.size[sliceDim_] = stop_coord - cfg_new.offset[sliceDim_]; // compute the padding configuration for the current slice. - actual_padding_pre = cfg_new.start_coord[sliceDim_] - begin; + actual_padding_pre = cfg_new.offset[sliceDim_] - begin; actual_padding_post = end - stop_coord; - mli_hlp_point_to_subtensor(full_tensor_, &cfg_new, &sub_tensor_); + mli_hlp_create_subtensor(full_tensor_, &cfg_new, &sub_tensor_); } + void TensorSlicer::Next(void){ - // TODO make generic for any number of dimensions. - subtsr_cfg_.start_coord[1]+= subtsr_cfg_.first_out_dim_size; - if (subtsr_cfg_.start_coord[1] >= full_tensor_->shape[1]) { - subtsr_cfg_.start_coord[1] = 0; - subtsr_cfg_.start_coord[0]++; - if (subtsr_cfg_.start_coord[0] >= full_tensor_->shape[0]) { - done_ = true; + for (int i = full_tensor_->rank - 1; i >= 0; i--) { + sub_cfg_.offset[i] += sub_cfg_.size[i]; + if (sub_cfg_.offset[i] >= full_tensor_->shape[i]){ + // wrap + sub_cfg_.offset[i] = 0; + // and continue to the next dimension, if no next dimension we are done. + if (i == 0) done_ = true; + continue; + } else { + // carry is false, so break from the loop + break; } } + if (!done_) ComputeSubTensor(); } diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.h b/tensorflow/lite/micro/kernels/arc/mli_slicers.h index 40f948a07ef..3fc7d911fa5 100644 --- a/tensorflow/lite/micro/kernels/arc/mli_slicers.h +++ b/tensorflow/lite/micro/kernels/arc/mli_slicers.h @@ -24,7 +24,7 @@ namespace micro { class TensorSlicer { public: - TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre = 0, int padding_post = 0, int overlap = 0); + TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int slice_size, int padding_pre = 0, int padding_post = 0, int overlap = 0, bool interleave_mode = false); ~TensorSlicer() = default; void Next(); @@ -41,7 +41,7 @@ public: private: const mli_tensor* full_tensor_; mli_tensor sub_tensor_; - mli_point_to_subtsr_cfg subtsr_cfg_; + mli_sub_tensor_cfg sub_cfg_; bool done_; int sliceDim_; int pad_pre_, pad_post_, overlap_; diff --git a/tensorflow/lite/micro/kernels/arc/pooling.cc b/tensorflow/lite/micro/kernels/arc/pooling.cc index dab0ad7e314..0cfa5363d69 100644 --- a/tensorflow/lite/micro/kernels/arc/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc/pooling.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" #include "mli_api.h" @@ -139,33 +140,42 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + const int heightDimension = 1; + int inSliceHeight = 0; + int outSliceHeight = 0; + const int overlap = cfg.kernel_height - cfg.stride_height; + // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor in_local = sub_mli_in; mli_tensor out_local = sub_mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_io_tensors(context, &in_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, cfg.kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); - const int batches = - MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. + because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. + The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) + in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); - for (int i = 0; i < batches; i++) { - mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); - mli_krn_avepool_hwc_sa8(&in_local, &cfg, &out_local); - mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); - if (i == batches -1) break; - subtsr_cfg_in.start_coord[0]++; - subtsr_cfg_out.start_coord[0]++; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - if (in_is_local) { - in_local.data = sub_mli_in.data; - } - if (out_is_local) { - out_local.data = sub_mli_out.data; - } + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + mli_krn_avepool_hwc_sa8(in_ptr, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); } free_arc_scratch_buffers(); } else { diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc index 26f4f45f17f..e9adbb37e9e 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc @@ -66,22 +66,128 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, mli_tensor* weights, mli_tensor* bias, mli_tensor* out) { +TfLiteStatus ret_val = kTfLiteOk; #ifdef __Xxy if (!inside_arc_ccm(weights->data)) { int weights_size = mli_hlp_count_elem_num(weights, 0) * mli_hlp_tensor_element_size(weights); + int maxWeightsSize = 0; weights->data = get_arc_scratch_buffer(weights_size); weights->capacity = weights_size; - if (weights->data == NULL) return kTfLiteError; + if (weights->data == NULL) { + get_arc_scratch_buffer_max_size(&maxWeightsSize); + weights->data = get_arc_scratch_buffer(maxWeightsSize); + weights->capacity = maxWeightsSize; + if (maxWeightsSize == 0) ret_val = kTfLiteError; + } + if (weights->data == NULL) ret_val = kTfLiteError; } if (!inside_arc_ccm(bias->data)) { uint32_t bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); bias->data = get_arc_scratch_buffer(bias_mem_requirements); bias->capacity = bias_mem_requirements; - if (bias->data == NULL) return kTfLiteError; + } + if (ret_val == kTfLiteOk) { + ret_val = get_arc_scratch_buffer_for_io_tensors(context, in, out); + } + if (bias->data == NULL) { + int maxBiasSize = 0; + get_arc_scratch_buffer_max_size(&maxBiasSize); + bias->data = get_arc_scratch_buffer(maxBiasSize); + bias->capacity = maxBiasSize; + if (maxBiasSize == 0) ret_val = kTfLiteError; + } + if (bias->data == NULL) ret_val = kTfLiteError; + +#endif + return ret_val; +} + +TfLiteStatus arc_scratch_buffer_calc_slice_size_io( + const mli_tensor *in, + const mli_tensor *out, + const int kernelHeight, + const int strideHeight, + const int padding_top, + const int padding_bot, + int *inSliceHeight, + int *outSliceHeight) { + const int heightDimension = 1; // todo: compute from rank + const int inHeight = in->shape[heightDimension]; + const int outHeight = out->shape[heightDimension]; + const int lineSizeIn = mli_hlp_count_elem_num(in, heightDimension + 1) * mli_hlp_tensor_element_size(in); + const int lineSizeOut = mli_hlp_count_elem_num(out, heightDimension + 1) * mli_hlp_tensor_element_size(out); + int maxLinesIn = 0; + int maxLinesOut = 0; + int maxOutLinesForInput = 0; + bool fit = (in->capacity >= inHeight * lineSizeIn) && (out->capacity >= outHeight * lineSizeOut); + if (fit) { + // in case both tensors completely fit in the capacity, there is no need for slicing + *inSliceHeight = inHeight; + *outSliceHeight = outHeight; + } else { + // First compute how many lines fit into the input tensor, and compute how many output lines can be computed with that. + maxLinesIn = MIN(inHeight, in->capacity / lineSizeIn); + if (maxLinesIn >= inHeight) { + maxOutLinesForInput = outHeight; + } else if (2 * maxLinesIn >= inHeight) { + // in this case only two slices are needed, so both could benefit from padding. take the MIN to get the worst case. + maxOutLinesForInput = (maxLinesIn + MIN(padding_top, padding_bot) - kernelHeight + 1) / strideHeight; + } else { + maxOutLinesForInput = (maxLinesIn - kernelHeight + 1) / strideHeight; // TODO add padding exceptions and test by makin fit=false; + } + // Ten compute how many ouput lines fit into the output tensor. + maxLinesOut = MIN(outHeight, out->capacity / lineSizeOut); + // the smallest of the two determines the slice height for the output, and the derived sliceheight for the input. + *outSliceHeight = MIN(maxOutLinesForInput, maxLinesOut); + *inSliceHeight = *outSliceHeight * strideHeight; } + if ((*inSliceHeight > 0) && (*outSliceHeight > 0)) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( + const mli_tensor *weights, + const mli_tensor *bias, + int *sliceChannels) { + const int weightOutChDimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. + const int channels = weights->shape[weightOutChDimension]; + + + const int chSizeW = mli_hlp_count_elem_num(weights, weightOutChDimension + 1) * mli_hlp_tensor_element_size(weights); + const int chSizeB = mli_hlp_count_elem_num(bias, weightOutChDimension + 1) * mli_hlp_tensor_element_size(bias); + int maxChWeights = 0; + int maxChBias = 0; + + bool fit = (weights->capacity >= channels * chSizeW) && (bias->capacity >= channels * chSizeB); + if (fit) { + // in case both tensors completely fit in the capacity, there is no need for slicing + *sliceChannels = channels; + } else { + // First compute how many channels fit into the weights tensor + maxChWeights = MIN(channels, weights->capacity / chSizeW); + // Ten compute how many channels fit into the bias tensor. + maxChBias = MIN(channels, bias->capacity / chSizeB); + // the smallest of the two determines the slice size + *sliceChannels = MIN(maxChWeights, maxChBias); + } + + if (*sliceChannels > 0) { + return kTfLiteOk; + } else { + return kTfLiteError; + } +} + +TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out) { +#ifdef __Xxy int requestSizeIn = 0; int requestSizeOut = 0; int grantsizeIn = 0; @@ -89,8 +195,8 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, if (!inside_arc_ccm(in->data)) { // In case the input tensor contains multiple batches, it has rank 4 // because the mli kernel cannot operate on batches, we need to have the size - // of a single batch. that is why the startRank is 1 in case of input rank 4 - int startRank = in->rank - 3; // tOdo explain + // of a single HWC tensor. that is why the startRank is 1 in case of input rank 4 + int startRank = in->rank - 3; requestSizeIn = mli_hlp_count_elem_num(in, startRank) * mli_hlp_tensor_element_size(in); } if (!inside_arc_ccm(out->data)) { @@ -113,76 +219,6 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, out->capacity = grantsizeOut; if (out->data == NULL) return kTfLiteError; } - - return kTfLiteOk; -#else - return kTfLiteOk; -#endif -} - -TfLiteStatus arc_scratch_buffer_calc_slice_size_io( - const mli_tensor *in, - const mli_tensor *out, - const int kernelHeight, - const int strideHeight, - int *inSliceHeight, - int *outSliceHeight) { - const int heightDimension = 1; // todo: compute from rank - const int inHeight = in->shape[heightDimension]; - const int outHeight = out->shape[heightDimension]; - const int lineSizeIn = mli_hlp_count_elem_num(in, heightDimension + 1) * mli_hlp_tensor_element_size(in); - const int lineSizeOut = mli_hlp_count_elem_num(out, heightDimension + 1) * mli_hlp_tensor_element_size(out); - int maxLinesIn = 0; - int maxLinesOut = 0; - int maxOutLinesForInput = 0; - bool fit = (in->capacity >= inHeight * lineSizeIn) && (out->capacity >= outHeight * lineSizeOut); - if (fit) { - // in case both tensors completely fit in the capacity, there is no need for slicing - *inSliceHeight = inHeight; - *outSliceHeight = outHeight; - } else { - // First compute how many lines fit into the input tensor, and compute how many output lines can be computed with that. - maxLinesIn = MIN(inHeight, in->capacity / lineSizeIn); - if (maxLinesIn >= inHeight) { - maxOutLinesForInput = outHeight; - } else { - maxOutLinesForInput = (maxLinesIn - kernelHeight + 1) / strideHeight; // TODO add padding exceptions and test by makin fit=false; - } - // Ten compute how many ouput lines fit into the output tensor. - maxLinesOut = MIN(outHeight, out->capacity / lineSizeOut); - // the smallest of the two determines the slice height for the output, and the derived sliceheight for the input. - *outSliceHeight = MIN(maxOutLinesForInput, maxLinesOut); - *inSliceHeight = *outSliceHeight * strideHeight; - } - if ((*inSliceHeight > 0) && (*outSliceHeight > 0)) { - return kTfLiteOk; - } else { - return kTfLiteError; - } -} - -TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* out) { -#ifdef __Xxy - // Function to assign fast memory from one of 3 scratch buffers. - // Best Fit strategy - memory is asigned to those tensor which leave less memory of bank unused - mli_tensor* tensors[2] = { in, out }; - uint32_t tensor_sizes[2] = { - mli_hlp_count_elem_num(tensors[0], 0), mli_hlp_count_elem_num(tensors[1], 0)}; - int num_tensors = 2; - - - for (int i = 0; i < num_tensors; ++i) { - // only for tensors that are not already located in one of the ccm memories, find a local memory that fits the data size. - if (inside_arc_ccm(tensors[i]->data)) continue; - tensors[i]->data = get_arc_scratch_buffer(tensor_sizes[i]); - tensors[i]->capacity = tensor_sizes[i]; - - if (tensors[i]->data == NULL) { - return kTfLiteError; - } - } #endif return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h index a27df8a5358..fc348229235 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h @@ -64,9 +64,15 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( const mli_tensor *out, const int kernelHeight, const int strideHeight, + const int padding_top, + const int padding_bot, int *inSliceHeight, int *outSliceHeight); +TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( + const mli_tensor *weights, + const mli_tensor *bias, + int *sliceChannels); } // namespace micro } // namespace ops diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 5ef1b445a22..106743cf471 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -74,8 +74,9 @@ void *get_arc_scratch_buffer(int size) { void *buf = NULL; int best_mem_idx = -1; int best_mem_delta = INT_MAX; + const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); // find a local memory that fits the data size. - for (int mem_idx = 0; mem_idx < sizeof(scratch_mem)/sizeof(scratch_mem[0]); ++mem_idx) { + for (int mem_idx = 0; mem_idx < numMem; ++mem_idx) { // Best Fit if ((size <= scratch_sizes[mem_idx]) && (scratch_sizes[mem_idx] - size < best_mem_delta)) { best_mem_idx = mem_idx; @@ -90,11 +91,24 @@ void *get_arc_scratch_buffer(int size) { return buf; } +void get_arc_scratch_buffer_max_size(int *size) { + int maxavailable = 0; + const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); + // find the largest available buffer. + for (int i = 0; i < numMem; i++) { + if (scratch_sizes[i] > maxavailable) { + maxavailable = scratch_sizes[i]; + } + } + *size = maxavailable; +} + void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2) { int maxavailable = 0; int secondavail = 0; + const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); // find the two largest available buffers. - for (int i = 0; i < 3; i++) { + for (int i = 0; i < numMem; i++) { if (scratch_sizes[i] > maxavailable) { secondavail = maxavailable; maxavailable = scratch_sizes[i]; diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h index 52a12c7899d..927e480da5a 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h @@ -27,6 +27,7 @@ namespace micro { void free_arc_scratch_buffers(void); void *get_arc_scratch_buffer(int size);// Function to assign fast memory from one of 3 scratch buffers. +void get_arc_scratch_buffer_max_size(int *size); void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2); static inline bool inside_arc_dccm(void* p) { From 330c649075978d1718c7b590da38dea640f67698 Mon Sep 17 00:00:00 2001 From: jacco Date: Thu, 26 Mar 2020 17:25:37 +0100 Subject: [PATCH 12/45] weight slicing for depthwise and fully connected in ARC backend --- tensorflow/lite/micro/kernels/arc/conv.cc | 39 +-- .../lite/micro/kernels/arc/depthwise_conv.cc | 106 +++++-- .../lite/micro/kernels/arc/fully_connected.cc | 93 ++++-- .../lite/micro/kernels/arc/mli_slicers.cc | 2 +- tensorflow/lite/micro/kernels/arc/pooling.cc | 19 +- .../lite/micro/kernels/arc/scratch_buf_mgr.cc | 288 +++++++++++------- .../lite/micro/kernels/arc/scratch_buf_mgr.h | 71 ++++- .../lite/micro/kernels/arc/scratch_buffers.cc | 14 +- .../lite/micro/kernels/arc/scratch_buffers.h | 2 +- 9 files changed, 434 insertions(+), 200 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/arc/conv.cc index 9e9a37821e8..6cf26c7d6d9 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/arc/conv.cc @@ -201,16 +201,16 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, } // for height slicing - const int heightDimension = 1; - int inSliceHeight = 0; - int outSliceHeight = 0; - const int kernelHeight = static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); - const int overlap = kernelHeight - cfg.stride_height; + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; + const int kernel_height = static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); + const int overlap = kernel_height - cfg.stride_height; // for weight slicing (on output channels) - const int weightOutChDimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. - int sliceChannels = static_cast(mli_weights.shape[weightOutChDimension]); - const int outTensorChDimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + const int weight_out_ch_dimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. + int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); + const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; @@ -220,8 +220,8 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, &sliceChannels)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &in_slice_height, &out_slice_height)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); /* is_local indicates that the tensor is already in local memory, so in that case the original tensor can be used, @@ -231,14 +231,15 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const bool w_is_local = weights_local.data == mli_weights.data; const bool b_is_local = bias_local.data == mli_bias.data; - TensorSlicer w_slice(&mli_weights, weightOutChDimension, sliceChannels); - TensorSlicer b_slice(&mli_bias, weightOutChDimension, sliceChannels); - TensorSlicer out_ch_slice(&mli_out, outTensorChDimension, sliceChannels, 0, 0, 0, true); + TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels); + TensorSlicer b_slice(&mli_bias, weight_out_ch_dimension, slice_channels); + TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; - void *inputBufferPtr = NULL; + void *input_buffer_ptr = NULL; + int input_buffer_size = 0; while (!w_slice.Done()){ mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); @@ -249,12 +250,12 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, cfg.padding_top, cfg.padding_bottom, overlap); /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of output channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and height dimension. */ - TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); + TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, out_slice_height); /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; @@ -266,9 +267,10 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = in_slice.GetPaddingPost(); // if same input copy as previous iteration, skip the copy of input - if (in_slice.Sub()->data != inputBufferPtr) { + if ((in_slice.Sub()->data != input_buffer_ptr) || (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - inputBufferPtr = in_slice.Sub()->data; + input_buffer_ptr = in_slice.Sub()->data; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); } mli_krn_conv2d_nhwc_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); @@ -282,7 +284,6 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE(context, in_slice.Done()); } - free_arc_scratch_buffers(); } else { ConvParams op_params; op_params.input_offset = -input->params.zero_point; diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc index 00c46c442b7..74e48c8c064 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc @@ -191,12 +191,21 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = data->padding.height + data->padding.height_offset; } + // for height slicing const int heightDimension = 1; int inSliceHeight = 0; int outSliceHeight = 0; const int kernelHeight = static_cast(mli_weights.shape[KRNL_DW_H_DIM_HWC]); const int overlap = kernelHeight - cfg.stride_height; + // for weight slicing (on output channels) + const int weight_out_ch_dimension = 3; // HWCN layout for weigths, output channel dimension is the first dimension. + const int bias_out_ch_dimension = 0; // bias has only 1 dimension + const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + const int32_t in_channels = mli_in.shape[out_tensor_ch_dimension]; + const int32_t out_channels = mli_out.shape[out_tensor_ch_dimension]; + int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); + // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; @@ -206,38 +215,83 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - /* if the tensor is already in local memory, is_local is true */ + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ const bool in_is_local = in_local.data == mli_in.data; const bool out_is_local = out_local.data == mli_out.data; + const bool w_is_local = weights_local.data == mli_weights.data; + const bool b_is_local = bias_local.data == mli_bias.data; TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); - /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. - because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. - on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. - The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) - in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); - TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); - - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; - - mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); - mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); - - while (!out_slice.Done()) { - cfg.padding_top = in_slice.GetPaddingPre(); - cfg.padding_bottom = in_slice.GetPaddingPost(); - - mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, &weights_local, &bias_local, &cfg, out_ptr); - mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); - - in_slice.Next(); - out_slice.Next(); + /* if input channels is not equal to output channels, a channel multiplier is used. + in this case the slice channels needs to be rounded down to a multiple of the input channels */ + if (in_channels != out_channels) { + slice_channels = (slice_channels / in_channels) * in_channels; } - free_arc_scratch_buffers(); + + TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension, slice_channels); + TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + + mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void *input_buffer_ptr = NULL; + int input_buffer_size = 0; + int padding_top = cfg.padding_top; + int padding_bottom = cfg.padding_bottom; + + while (!w_slice.Done()){ + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + /* input tensor is alreade sliced in the channel dimension. out_ch_slice.Sub() is the tensor for the amount of + channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and + height dimension. + in_ch_slice.Sub() tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. + because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. + The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) + in chunks of 'sliceHeight' */ + TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight, padding_top, padding_bottom, overlap); + + /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of + output channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and + height dimension. */ + TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); + + /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + TF_LITE_ENSURE(context, !in_slice.Done()); + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + // if same input copy as previous iteration, skip the copy of input + if ((in_slice.Sub()->data != input_buffer_ptr) || (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + in_ch_slice.Next(); + TF_LITE_ENSURE(context, in_slice.Done()); + } + } else { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; diff --git a/tensorflow/lite/micro/kernels/arc/fully_connected.cc b/tensorflow/lite/micro/kernels/arc/fully_connected.cc index 42921037481..cc9b95c570a 100644 --- a/tensorflow/lite/micro/kernels/arc/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc/fully_connected.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" #include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" #include "mli_api.h" @@ -100,44 +101,80 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, ConvertToMliTensor(bias, &mli_bias); ConvertToMliTensor(output, &mli_out); - mli_point_to_subtsr_cfg subtsr_cfg_in = {{0, 0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg subtsr_cfg_out = {{0, 0}, 2, static_cast(mli_out.shape[1])}; - mli_tensor sub_mli_in = {0}; - mli_tensor sub_mli_out = {0}; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + /* The input tensor can have more than 2 dimensions. for the compute this doesn't make any difference + because all the inputs or a batch entry will be used anyway. because the MLI kernel doesn't recognize + the multiple dimensions, the tensor shape is casted to a {batchnum, inputsize} shape. */ + mli_in.shape[0] = mli_out.shape[0]; + mli_in.shape[1] = mli_weights.shape[1]; + mli_in.shape[2] = 0; + mli_in.shape[3] = 0; + mli_in.rank = 2; // Tensors for data in fast (local) memory and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; - mli_tensor in_local = sub_mli_in; - mli_tensor out_local = sub_mli_out; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; + const int weight_out_dimension = 0; + const int out_tensor_dimension = 1; + const int batch_dimension = 0; + int slice_size = mli_weights.shape[weight_out_dimension]; - mli_mov_tensor_sync(&mli_weights, ©_config, &weights_local); - mli_mov_tensor_sync(&mli_bias, ©_config, &bias_local); + /* allocate the local buffers, and compute the slice size */ + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_fully_connect_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_dimension, &slice_size)); + int max_out_slice_size = out_local.capacity / mli_hlp_tensor_element_size(&out_local); + if (slice_size > max_out_slice_size) slice_size = max_out_slice_size; - const int batches = - MatchingDim(GetTensorShape(input), 0, GetTensorShape(output), 0); + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = in_local.data == mli_in.data; + const bool out_is_local = out_local.data == mli_out.data; + const bool w_is_local = weights_local.data == mli_weights.data; + const bool b_is_local = bias_local.data == mli_bias.data; - for (int i = 0; i < batches; i++) { - mli_mov_tensor_sync(&sub_mli_in, ©_config, &in_local); - mli_krn_fully_connected_sa8_sa8_sa32(&in_local, &weights_local, &bias_local, &out_local); - mli_mov_tensor_sync(&out_local, ©_config, &sub_mli_out); - subtsr_cfg_in.start_coord[0]++; - subtsr_cfg_out.start_coord[0]++; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - if (in_is_local) { - in_local.data = sub_mli_in.data; - } - if (out_is_local) { - out_local.data = sub_mli_out.data; + TensorSlicer w_slice(&mli_weights, weight_out_dimension, slice_size); + TensorSlicer b_slice(&mli_bias, weight_out_dimension, slice_size); + TensorSlicer out_ch_slice(&mli_out, out_tensor_dimension, slice_size, 0, 0, 0, true); + + mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void *input_buffer_ptr = NULL; + + while (!w_slice.Done()){ + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + TensorSlicer in_slice(&mli_in, batch_dimension, 1); + + /* output tensor is alreade sliced in the output size dimension. out_ch_slice.Sub() is the tensor for the amount of + output size of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch */ + TensorSlicer out_slice(out_ch_slice.Sub(), batch_dimension, 1); + + /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + + // if same input copy as previous iteration, skip the copy of input + if (in_slice.Sub()->data != input_buffer_ptr) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + } + mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); } } else { FullyConnectedParams op_params; diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc index 6c6c89715f8..91bae5caa38 100644 --- a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc +++ b/tensorflow/lite/micro/kernels/arc/mli_slicers.cc @@ -48,7 +48,7 @@ TensorSlicer::TensorSlicer(const mli_tensor* full_tensor, int slice_dim, int sli sub_cfg_.sub_tensor_rank = full_tensor->rank; } else { - /* In the not interlevaed mode, the slicing happens from the outer most dimension up to the slice_dim + /* In the not interleaved mode, the slicing happens from the outer most dimension up to the slice_dim for example in an HWC layout this mode can be used to slice in the H dimension. in this mode the data of the slice is still contiguous in memory (if that was the case in the input tensor */ for (int i = 0; i< full_tensor->rank; i++){ diff --git a/tensorflow/lite/micro/kernels/arc/pooling.cc b/tensorflow/lite/micro/kernels/arc/pooling.cc index 0cfa5363d69..7a26a10e23b 100644 --- a/tensorflow/lite/micro/kernels/arc/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc/pooling.cc @@ -140,9 +140,9 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - const int heightDimension = 1; - int inSliceHeight = 0; - int outSliceHeight = 0; + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; const int overlap = cfg.kernel_height - cfg.stride_height; // Tensors for data in fast (local) memory and config to copy data from external to local memory @@ -150,19 +150,22 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, mli_tensor out_local = sub_mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_io_tensors(context, &in_local, &out_local)); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors(context, &in_local, &out_local)); bool in_is_local = in_local.data == sub_mli_in.data; bool out_is_local = out_local.data == sub_mli_out.data; - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, cfg.kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, cfg.kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &in_slice_height, &out_slice_height)); /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, heightDimension, inSliceHeight, cfg.padding_top, cfg.padding_bottom, overlap); - TensorSlicer out_slice(&mli_out, heightDimension, outSliceHeight); + TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer out_slice(&mli_out, height_dimension, out_slice_height); + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; @@ -177,7 +180,7 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, in_slice.Next(); out_slice.Next(); } - free_arc_scratch_buffers(); + } else { int32_t activation_min, activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc index e9adbb37e9e..5bd2d6aed22 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc @@ -23,21 +23,19 @@ namespace tflite { namespace ops { namespace micro { - - -void get_arc_two_buffer_sizes(int requestsize1, int requestsize2, int *grantsize1, int *grantsize2) { +static void get_arc_two_buffer_sizes(int request_size_1, int request_size_2, int *grant_size_1, int *grant_size_2) { int maxrequest = 0; int secondrequest = 0; int maxavailable = 0; int secondavail = 0; // determine the largest requested buffer. - if (requestsize1 > requestsize2) { - maxrequest = requestsize1; - secondrequest = requestsize2; + if (request_size_1 > request_size_2) { + maxrequest = request_size_1; + secondrequest = request_size_2; } else { - maxrequest = requestsize2; - secondrequest = requestsize1; + maxrequest = request_size_2; + secondrequest = request_size_1; } // find the two largest available buffers. @@ -45,40 +43,79 @@ void get_arc_two_buffer_sizes(int requestsize1, int requestsize2, int *grantsize // in case two buffers are available, the largest buffer can go to the largest request. if (secondavail > 0) { // this condition can be enhanced to prevent cases where the second buffer is so small that it is better to use one buffer and split it. - if (requestsize1 > requestsize2) { - *grantsize1 = maxavailable; - *grantsize2 = secondavail; + if (request_size_1 > request_size_2) { + *grant_size_1 = maxavailable; + *grant_size_2 = secondavail; } else { - *grantsize1 = secondavail; - *grantsize2 = maxavailable; + *grant_size_1 = secondavail; + *grant_size_2 = maxavailable; } } else { // In case only one buffer is available, // use only the max buffer, and split it. // TODO compute optimal split ratio based on request ratio. - *grantsize1 = maxavailable / 2; - *grantsize2 = maxavailable / 2; + *grant_size_1 = maxavailable / 2; + *grant_size_2 = maxavailable / 2; } } +static TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* out) { +#ifdef __Xxy + int request_size_in = 0; + int request_size_out = 0; + int grant_size_in = 0; + int grant_size_out = 0; + if (!inside_arc_ccm(in->data)) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the size + // of a single HWC tensor. that is why the start_rank is 1 in case of input rank 4 + int start_rank = in->rank - 3; + request_size_in = mli_hlp_count_elem_num(in, start_rank) * mli_hlp_tensor_element_size(in); + } + if (!inside_arc_ccm(out->data)) { + // In case the input tensor contains multiple batches, it has rank 4 + // because the mli kernel cannot operate on batches, we need to have the size + // of a single batch. that is why the start_rank is 1 in case of input rank 4 + int start_rank = out->rank - 3; + request_size_out = mli_hlp_count_elem_num(out, start_rank) * mli_hlp_tensor_element_size(out); + } + + get_arc_two_buffer_sizes(request_size_in, request_size_out, &grant_size_in, &grant_size_out); + + if (!inside_arc_ccm(in->data)) { + in->data = get_arc_scratch_buffer(grant_size_in); + in->capacity = grant_size_in; + if (in->data == NULL) return kTfLiteError; + } + if (!inside_arc_ccm(out->data)) { + out->data = get_arc_scratch_buffer(grant_size_out); + out->capacity = grant_size_out; + if (out->data == NULL) return kTfLiteError; + } +#endif + return kTfLiteOk; +} + TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* weights, - mli_tensor* bias, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, mli_tensor* out) { TfLiteStatus ret_val = kTfLiteOk; #ifdef __Xxy - + init_arc_scratch_buffers(); if (!inside_arc_ccm(weights->data)) { int weights_size = mli_hlp_count_elem_num(weights, 0) * mli_hlp_tensor_element_size(weights); - int maxWeightsSize = 0; + int max_weights_size = 0; weights->data = get_arc_scratch_buffer(weights_size); weights->capacity = weights_size; if (weights->data == NULL) { - get_arc_scratch_buffer_max_size(&maxWeightsSize); - weights->data = get_arc_scratch_buffer(maxWeightsSize); - weights->capacity = maxWeightsSize; - if (maxWeightsSize == 0) ret_val = kTfLiteError; + get_arc_scratch_buffer_max_size(&max_weights_size); + weights->data = get_arc_scratch_buffer(max_weights_size); + weights->capacity = max_weights_size; + if (max_weights_size == 0) ret_val = kTfLiteError; } if (weights->data == NULL) ret_val = kTfLiteError; } @@ -88,15 +125,92 @@ TfLiteStatus ret_val = kTfLiteOk; bias->data = get_arc_scratch_buffer(bias_mem_requirements); bias->capacity = bias_mem_requirements; } + if (ret_val == kTfLiteOk) { ret_val = get_arc_scratch_buffer_for_io_tensors(context, in, out); } + if (bias->data == NULL) { - int maxBiasSize = 0; - get_arc_scratch_buffer_max_size(&maxBiasSize); - bias->data = get_arc_scratch_buffer(maxBiasSize); - bias->capacity = maxBiasSize; - if (maxBiasSize == 0) ret_val = kTfLiteError; + int max_bias_size = 0; + get_arc_scratch_buffer_max_size(&max_bias_size); + bias->data = get_arc_scratch_buffer(max_bias_size); + bias->capacity = max_bias_size; + if (max_bias_size == 0) ret_val = kTfLiteError; + } + if (bias->data == NULL) ret_val = kTfLiteError; + +#endif + return ret_val; +} + +TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out) { +TfLiteStatus ret_val = kTfLiteOk; +#ifdef __Xxy + init_arc_scratch_buffers(); + /* strategy for FC kernels: + first allocate input, because this cannot be sliced. (in case of batch processing, only a single input needs to be allocated) + then weigths & bias because if fully loaded, they can be reused over batches. + then output. + The number of output channels (for weights slicing) depends on size of output and size of weights&bias */ + + if (!inside_arc_ccm(in->data)) { + /* In case the input tensor contains multiple batches, + only count the size if the inner most dimension */ + int size_in = mli_hlp_count_elem_num(in, in->rank - 1) * mli_hlp_tensor_element_size(in); + in->data = get_arc_scratch_buffer(size_in); + in->capacity = size_in; + if (in->data == NULL) { + in->capacity = 0; + ret_val = kTfLiteError; + } + } + + if (!inside_arc_ccm(weights->data)) { + int weights_size = mli_hlp_count_elem_num(weights, 0) * mli_hlp_tensor_element_size(weights); + int max_weights_size = 0; + weights->data = get_arc_scratch_buffer(weights_size); + weights->capacity = weights_size; + if (weights->data == NULL) { + get_arc_scratch_buffer_max_size(&max_weights_size); + weights->data = get_arc_scratch_buffer(max_weights_size); + weights->capacity = max_weights_size; + if (max_weights_size == 0) ret_val = kTfLiteError; + } + if (weights->data == NULL) ret_val = kTfLiteError; + } + + if (!inside_arc_ccm(bias->data)) { + int bias_mem_requirements = mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias); + bias->data = get_arc_scratch_buffer(bias_mem_requirements); + bias->capacity = bias_mem_requirements; + } + + if (!inside_arc_ccm(out->data)) { + /* In case the input tensor contains multiple batches, + only count the size if the inner most dimension */ + int out_size = mli_hlp_count_elem_num(out, out->rank - 1) * mli_hlp_tensor_element_size(out); + int max_out_size = 0; + out->data = get_arc_scratch_buffer(out_size); + out->capacity = out_size; + if (out->data == NULL) { + get_arc_scratch_buffer_max_size(&max_out_size); + out->data = get_arc_scratch_buffer(max_out_size); + out->capacity = max_out_size; + if (max_out_size == 0) ret_val = kTfLiteError; + } + if (out->data == NULL) ret_val = kTfLiteError; + } + + if (bias->data == NULL) { + int max_bias_size = 0; + get_arc_scratch_buffer_max_size(&max_bias_size); + bias->data = get_arc_scratch_buffer(max_bias_size); + bias->capacity = max_bias_size; + if (max_bias_size == 0) ret_val = kTfLiteError; } if (bias->data == NULL) ret_val = kTfLiteError; @@ -107,44 +221,44 @@ TfLiteStatus ret_val = kTfLiteOk; TfLiteStatus arc_scratch_buffer_calc_slice_size_io( const mli_tensor *in, const mli_tensor *out, - const int kernelHeight, - const int strideHeight, + const int kernel_height, + const int stride_height, const int padding_top, const int padding_bot, - int *inSliceHeight, - int *outSliceHeight) { - const int heightDimension = 1; // todo: compute from rank - const int inHeight = in->shape[heightDimension]; - const int outHeight = out->shape[heightDimension]; - const int lineSizeIn = mli_hlp_count_elem_num(in, heightDimension + 1) * mli_hlp_tensor_element_size(in); - const int lineSizeOut = mli_hlp_count_elem_num(out, heightDimension + 1) * mli_hlp_tensor_element_size(out); - int maxLinesIn = 0; - int maxLinesOut = 0; - int maxOutLinesForInput = 0; - bool fit = (in->capacity >= inHeight * lineSizeIn) && (out->capacity >= outHeight * lineSizeOut); + int *in_slice_height, + int *out_slice_height) { + const int height_dimension = 1; // todo: compute from rank + const int in_height = in->shape[height_dimension]; + const int out_height = out->shape[height_dimension]; + const int line_size_in = mli_hlp_count_elem_num(in, height_dimension + 1) * mli_hlp_tensor_element_size(in); + const int line_size_out = mli_hlp_count_elem_num(out, height_dimension + 1) * mli_hlp_tensor_element_size(out); + int max_lines_in = 0; + int max_lines_out = 0; + int max_out_lines_for_input = 0; + bool fit = (in->capacity >= in_height * line_size_in) && (out->capacity >= out_height * line_size_out); if (fit) { // in case both tensors completely fit in the capacity, there is no need for slicing - *inSliceHeight = inHeight; - *outSliceHeight = outHeight; + *in_slice_height = in_height; + *out_slice_height = out_height; } else { // First compute how many lines fit into the input tensor, and compute how many output lines can be computed with that. - maxLinesIn = MIN(inHeight, in->capacity / lineSizeIn); - if (maxLinesIn >= inHeight) { - maxOutLinesForInput = outHeight; - } else if (2 * maxLinesIn >= inHeight) { + max_lines_in = MIN(in_height, in->capacity / line_size_in); + if (max_lines_in >= in_height) { + max_out_lines_for_input = out_height; + } else if (2 * max_lines_in >= in_height) { // in this case only two slices are needed, so both could benefit from padding. take the MIN to get the worst case. - maxOutLinesForInput = (maxLinesIn + MIN(padding_top, padding_bot) - kernelHeight + 1) / strideHeight; + max_out_lines_for_input = (max_lines_in + MIN(padding_top, padding_bot) - kernel_height + 1) / stride_height; } else { - maxOutLinesForInput = (maxLinesIn - kernelHeight + 1) / strideHeight; // TODO add padding exceptions and test by makin fit=false; + max_out_lines_for_input = (max_lines_in - kernel_height + 1) / stride_height; // TODO add padding exceptions and test by makin fit=false; } // Ten compute how many ouput lines fit into the output tensor. - maxLinesOut = MIN(outHeight, out->capacity / lineSizeOut); + max_lines_out = MIN(out_height, out->capacity / line_size_out); // the smallest of the two determines the slice height for the output, and the derived sliceheight for the input. - *outSliceHeight = MIN(maxOutLinesForInput, maxLinesOut); - *inSliceHeight = *outSliceHeight * strideHeight; + *out_slice_height = MIN(max_out_lines_for_input, max_lines_out); + *in_slice_height = *out_slice_height * stride_height; } - if ((*inSliceHeight > 0) && (*outSliceHeight > 0)) { + if ((*in_slice_height > 0) && (*out_slice_height > 0)) { return kTfLiteOk; } else { return kTfLiteError; @@ -154,73 +268,43 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( const mli_tensor *weights, const mli_tensor *bias, - int *sliceChannels) { - const int weightOutChDimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. - const int channels = weights->shape[weightOutChDimension]; + const int weight_out_ch_dimension, + int *slice_channels) { + const int channels = weights->shape[weight_out_ch_dimension]; + const int ch_size_w = (mli_hlp_count_elem_num(weights, 0) / channels) * mli_hlp_tensor_element_size(weights); + const int ch_size_b = (mli_hlp_count_elem_num(bias, 0) / channels) * mli_hlp_tensor_element_size(bias); + int max_ch_weigths = 0; + int max_ch_bias = 0; - - const int chSizeW = mli_hlp_count_elem_num(weights, weightOutChDimension + 1) * mli_hlp_tensor_element_size(weights); - const int chSizeB = mli_hlp_count_elem_num(bias, weightOutChDimension + 1) * mli_hlp_tensor_element_size(bias); - int maxChWeights = 0; - int maxChBias = 0; - - bool fit = (weights->capacity >= channels * chSizeW) && (bias->capacity >= channels * chSizeB); + bool fit = (weights->capacity >= channels * ch_size_w) && (bias->capacity >= channels * ch_size_b); if (fit) { // in case both tensors completely fit in the capacity, there is no need for slicing - *sliceChannels = channels; + *slice_channels = channels; } else { // First compute how many channels fit into the weights tensor - maxChWeights = MIN(channels, weights->capacity / chSizeW); + max_ch_weigths = MIN(channels, weights->capacity / ch_size_w); // Ten compute how many channels fit into the bias tensor. - maxChBias = MIN(channels, bias->capacity / chSizeB); + max_ch_bias = MIN(channels, bias->capacity / ch_size_b); // the smallest of the two determines the slice size - *sliceChannels = MIN(maxChWeights, maxChBias); + *slice_channels = MIN(max_ch_weigths, max_ch_bias); } - if (*sliceChannels > 0) { + if (*slice_channels > 0) { return kTfLiteOk; } else { return kTfLiteError; } } -TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, +TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(TfLiteContext* context, mli_tensor* in, mli_tensor* out) { #ifdef __Xxy - int requestSizeIn = 0; - int requestSizeOut = 0; - int grantsizeIn = 0; - int grantsizeOut = 0; - if (!inside_arc_ccm(in->data)) { - // In case the input tensor contains multiple batches, it has rank 4 - // because the mli kernel cannot operate on batches, we need to have the size - // of a single HWC tensor. that is why the startRank is 1 in case of input rank 4 - int startRank = in->rank - 3; - requestSizeIn = mli_hlp_count_elem_num(in, startRank) * mli_hlp_tensor_element_size(in); - } - if (!inside_arc_ccm(out->data)) { - // In case the input tensor contains multiple batches, it has rank 4 - // because the mli kernel cannot operate on batches, we need to have the size - // of a single batch. that is why the startRank is 1 in case of input rank 4 - int startRank = out->rank - 3; - requestSizeOut = mli_hlp_count_elem_num(out, startRank) * mli_hlp_tensor_element_size(out); - } - - get_arc_two_buffer_sizes(requestSizeIn, requestSizeOut, &grantsizeIn, &grantsizeOut); - - if (!inside_arc_ccm(in->data)) { - in->data = get_arc_scratch_buffer(grantsizeIn); - in->capacity = grantsizeIn; - if (in->data == NULL) return kTfLiteError; - } - if (!inside_arc_ccm(out->data)) { - out->data = get_arc_scratch_buffer(grantsizeOut); - out->capacity = grantsizeOut; - if (out->data == NULL) return kTfLiteError; - } -#endif + init_arc_scratch_buffers(); + return get_arc_scratch_buffer_for_io_tensors(context, in, out); +#else return kTfLiteOk; +#endif } } // namespace micro diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h index fc348229235..276f976cf0f 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h @@ -38,13 +38,13 @@ namespace micro { * @return Tf Lite status code */ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, - mli_tensor* in, - mli_tensor* weights, - mli_tensor* bias, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, mli_tensor* out); /** - * @brief Function to allocate scratch buffers for kernels with only input and output buffers + * @brief Function to allocate scratch buffers for pooling kernels with only input and output buffers * * @detail This function will update the data pointers in the 2 tensors with pointers * to scratch buffers in fast local memory. @@ -55,10 +55,49 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context, * * @return Tf Lite status code */ -TfLiteStatus get_arc_scratch_buffer_for_io_tensors(TfLiteContext* context, +TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(TfLiteContext* context, mli_tensor* in, mli_tensor* out); +/** + * @brief Function to allocate scratch buffers for the fully connect tensors + * + * @detail This function will update the data pointers in the 4 tensors with pointers + * to scratch buffers in fast local memory. + * + * @param context [I] pointer to TfLite context (needed for error handling) + * @param in [IO] pointer to the input tensor + * @param weights [IO] pointer to the weights tensor + * @param bias [IO] pointer to the bias tensor + * @param output [IO] pointer to the output tensor + * + * @return Tf Lite status code + */ +TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(TfLiteContext* context, + mli_tensor* in, + mli_tensor* weights, + mli_tensor* bias, + mli_tensor* out); + +/** + * @brief Function to calculate slice size for io tensors + * + * @detail This function will calculate the slice size in the height dimension + * for input and output tensors. it takes into account the kernel size and the padding. + * the function will look at the capacity filed in the in and out tensor to + * determine the available buffersize. + * + * @param in [I] pointer to the input tensor + * @param out [I] pointer to the output tensor + * @param kernelHeight [I] size of the kernel in height dimension + * @param strideHeight [I] input stride in height dimension + * @param padding_top [I] number of lines with zeros at the top + * @param padding_bot [I] number of lines with zeros at the bottom + * @param inSliceHeight [O] slice size in height dimension for the input tensor + * @param outSliceHeight [O] slice size in height dimension for the output tensor + * + * @return Tf Lite status code + */ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( const mli_tensor *in, const mli_tensor *out, @@ -66,13 +105,29 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( const int strideHeight, const int padding_top, const int padding_bot, - int *inSliceHeight, - int *outSliceHeight); + int *in_slice_height, + int *out_slice_height); +/** + * @brief Function to calculate slice size for weight slicing + * + * @detail This function will calculate the slice size in the output channel dimension + * for weight and bias tensors. + * the function will look at the capacity filed in the weights and bias tensor to + * determine the available buffersize. + * + * @param weights [I] pointer to the input tensor + * @param bias [I] pointer to the output tensor + * @param weightOutChDimension [I] dimension of the output channels in the weights tensor + * @param sliceChannels [O] slice size in output channel dimension + * + * @return Tf Lite status code + */ TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( const mli_tensor *weights, const mli_tensor *bias, - int *sliceChannels); + const int weight_out_ch_dimension, + int *slice_channels); } // namespace micro } // namespace ops diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc index 106743cf471..f36059f82d2 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc @@ -74,9 +74,9 @@ void *get_arc_scratch_buffer(int size) { void *buf = NULL; int best_mem_idx = -1; int best_mem_delta = INT_MAX; - const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); + const int num_mem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); // find a local memory that fits the data size. - for (int mem_idx = 0; mem_idx < numMem; ++mem_idx) { + for (int mem_idx = 0; mem_idx < num_mem; ++mem_idx) { // Best Fit if ((size <= scratch_sizes[mem_idx]) && (scratch_sizes[mem_idx] - size < best_mem_delta)) { best_mem_idx = mem_idx; @@ -93,9 +93,9 @@ void *get_arc_scratch_buffer(int size) { void get_arc_scratch_buffer_max_size(int *size) { int maxavailable = 0; - const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); + const int num_mem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); // find the largest available buffer. - for (int i = 0; i < numMem; i++) { + for (int i = 0; i < num_mem; i++) { if (scratch_sizes[i] > maxavailable) { maxavailable = scratch_sizes[i]; } @@ -106,9 +106,9 @@ void get_arc_scratch_buffer_max_size(int *size) { void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2) { int maxavailable = 0; int secondavail = 0; - const int numMem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); + const int num_mem = sizeof(scratch_mem)/sizeof(scratch_mem[0]); // find the two largest available buffers. - for (int i = 0; i < numMem; i++) { + for (int i = 0; i < num_mem; i++) { if (scratch_sizes[i] > maxavailable) { secondavail = maxavailable; maxavailable = scratch_sizes[i]; @@ -120,7 +120,7 @@ void get_arc_scratch_buffer_two_max_sizes(int *size1, int *size2) { *size2 = secondavail; } -void free_arc_scratch_buffers(void) { +void init_arc_scratch_buffers(void) { scratch_mem[0] = scratch_mem_x; scratch_mem[1] = scratch_mem_y; scratch_mem[2] = scratch_mem_z; diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h index 927e480da5a..703c164e077 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h +++ b/tensorflow/lite/micro/kernels/arc/scratch_buffers.h @@ -24,7 +24,7 @@ namespace ops { namespace micro { -void free_arc_scratch_buffers(void); +void init_arc_scratch_buffers(void); void *get_arc_scratch_buffer(int size);// Function to assign fast memory from one of 3 scratch buffers. void get_arc_scratch_buffer_max_size(int *size); From 0b15d4264d6cc5695fca35b7f68dcf64e4353bcf Mon Sep 17 00:00:00 2001 From: jacco Date: Fri, 17 Jan 2020 19:30:30 +0300 Subject: [PATCH 13/45] Minor fixes to restore 'generate_projects' target functionality --- tensorflow/lite/micro/tools/make/targets/arc_makefile.inc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 5ce2e03bfc3..eb890ef1999 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -95,4 +95,10 @@ endif endif # USE_EMBARC_MLI +# These are microcontroller-specific rules for converting the ELF output +# of the linker into a binary image that can be loaded directly. + +# Not applicable for ARC, leaving it empty. +$(BINDIR)%.bin: + endif From e6f9f08acb00745c429baf199486cb8a6e07c08c Mon Sep 17 00:00:00 2001 From: jacco Date: Tue, 21 Jan 2020 20:11:27 +0300 Subject: [PATCH 14/45] Initial implementation of TCF and LCF files support for IoTDK and EMSDP platforms --- .../micro/tools/make/helper_functions.inc | 7 + .../tools/make/targets/arc/emsdp/emsdp.lcf | 47 + .../targets/arc/emsdp/emsdp_em11d_dfss.tcf | 4907 +++++++++++++++++ .../tools/make/targets/arc/iotdk/iotdk.lcf | 47 + .../tools/make/targets/arc/iotdk/iotdk.tcf | 4621 ++++++++++++++++ .../micro/tools/make/targets/arc_makefile.inc | 15 + 6 files changed, 9644 insertions(+) create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index 09771419843..a7f9bd788e3 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -145,6 +145,13 @@ ifneq ($(TCF_FILE_NAME), ) $(PRJDIR)$(3)/$(1)/$(TCF_FILE_NAME): $(TCF_FILE) @cp $$< $$@ endif + +# Special rule to copy LCF in case the local filesystem file name has been defined +ifneq ($(LCF_FILE), ) +$(PRJDIR)$(3)/$(1)/$(notdir $(LCF_FILE)): $(LCF_FILE) + @cp $$< $$@ +endif + endif endef diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf new file mode 100644 index 00000000000..fc34759d745 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf @@ -0,0 +1,47 @@ +# SYSTEM memory regions indicate where external memory might be located. +# The TCF has no specific knowledge of whether SYSTEM regions contain +# external memory or not. +# CCMWRAP memory regions indicate unusable portions of the address space +# due to CCM memory wrapping into upper addresses beyond its size + +MEMORY { + IVT : ORIGIN = 0x00000000, LENGTH = 0x60000000 + ICCM0 : ORIGIN = 0x60000000, LENGTH = 0x00020000 +# CCMWRAP0: ORIGIN = 0x60020000, LENGTH = 0x0ffe0000 +# SYSTEM1 : ORIGIN = 0x70000000, LENGTH = 0x10000000 + DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 +# CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 + XCCM : ORIGIN = 0x90000000, LENGTH = 0x00004000 +# CCMWRAP2: ORIGIN = 0x90004000, LENGTH = 0x0fffc000 + YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00004000 +# CCMWRAP3: ORIGIN = 0xa0004000, LENGTH = 0x0fffc000 + SYSTEM2 : ORIGIN = 0xb0000000, LENGTH = 0x50000000 + } +SECTIONS { + GROUP BLOCK(4): { + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + } > ICCM0 + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} + } > SYSTEM2 + GROUP BLOCK(4): { + .Xdata? : {} + } > XCCM + GROUP BLOCK(4): { + .Ydata? : {} + } > YCCM + GROUP BLOCK(4) : { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) + } > IVT + } + + diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf new file mode 100644 index 00000000000..833fa9ca9b9 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf @@ -0,0 +1,4907 @@ + + + + + + + + + + + + + + + + + + + + + + + +# +# option 16/L32/U32 Instructions +# ------ ---------- --------------------- +# +# none -/-/- None +# wlh1 1/1/1 MPYW/U, MPY/U, MPYH/U +# wlh2 2/2/2 MPYW/U, MPY/U, MPYH/U +# wlh3 2/3/3 MPYW/U, MPY/U, MPYH/U +# wlh4 2/4/5 MPYW/U, MPY/U, MPYH/U +# wlh5 5/9/9 MPYW/U, MPY/U, MPYH/U +# +# +-mpy_option none + +# code_protection --- The ARC EM architecture divides the memory into 16 regions, which can be protected individually. This feature adds a 16-bit input to the processor core, one bit per region. When the protect bit is set, the processor disables any load or store to the corresponding region. An attempt to access a protected region raises an EV_ProtV exception. +-code_protection false + +# stack_checking --- Stack checking is a mechanism for checking stack accesses and raising an exception when a stack overflow or underflow is detected. +-stack_checking true + +# unaligned_option --- This enables unaligned loads and stores. +-unaligned_option true + +# intvbase_preset --- This sets the interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE. +-intvbase_preset 0x0 + +# intvbase_preset_s --- This sets the secure interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE_S.This is effective only when 2+2 mode is enabled. +-intvbase_preset_s 0x0 + +# intvbase_ext --- Set this option to drive the upper 22 bits of the interrupt base vector externally, into signal intvbase_in. +-intvbase_ext false + +# nmi_option --- add Non-maskable external exception support +-nmi_option false + +# rgf_impl --- This defines whether the register file is implemented using flip-flops, or with a hard macro. +-rgf_impl flip_flops + +# rgf_num_regs --- This defines the size (in 32b register) of the processor register file. +-rgf_num_regs 32 + +# rgf_wr_ports --- This defines the number of write ports on the register file. +-rgf_wr_ports 2 + +# rgf_num_banks --- Dual register banks are useful if Fast IRQ has been configured, but may be selected even if not. +-rgf_num_banks 2 + +# rgf_banked_regs --- This selects the number of registers that are replicated in the second register-file bank. +-rgf_banked_regs 32 + +# turbo_boost --- This enables the Turbo Boost synthesis option. By enabling this option, the achievable clock frequency is increased, but at the cost of an additional cycle latency on branch instructions. +-turbo_boost false + +# infer_alu_adder --- infer: datapath is described as behavioral code: A + B +# instantiate: datapath is instantiated as a detailed multi-stage code of a carry-lookahead adder. It is generally preferable to use the infer option and add directives for your target synthesizer. +-infer_alu_adder infer + +# infer_mpy_wtree --- infer: datapath is described as behavioral code: A * B (applies to only wlh3, wlh4 and wlh5 designs) +# instantiate: datapath is instantiated as a detailed multi-stage code of a Wallace Tree multiplier It is generally preferable to use the infer option and add directives for your target synthesizer. +-infer_mpy_wtree instantiate + +# scantest_ram_bypass_mux --- This mux is used to make logic trapped between flops and memory (aka shadow logic) to be covered by scantest without requiring advanced sequential ATPG on the memory to be applied. Will add delay to functional access time +-scantest_ram_bypass_mux false + +# logic_bist --- This option will OR LBIST_EN with test_mode +-logic_bist false + +# power_domains --- Adds three separate power domains to the core, and propagates power-gate control signals to the top level of the core. Also generates UPF constraints and commands in the low-power scripts +-power_domains false + +# dvfs --- Adds logic to the core to allow dynamic controlling of voltage and frequency and propagates the associated control signals to the top level of core +-dvfs false + +# voltage_domains --- Creates a voltage domain split between RAM and std cell parts to support Ultra Low Voltage on cells and generates UPF constraints +-voltage_domains false + +# mem_bus_option --- The core supports two bus protocols for accessing external memory: AHB & AHB-Lite. AHB-Lite-single means instruction fetch and data access share a single AHB-Lite port. AHB-Lite-dual means separate AHB-Lite port for each initiator if present. +-mem_bus_option AHB + +# mem_bus_reg_interface --- Specifies whether the memory bus interface is registered. +-mem_bus_reg_interface true + +# dmi_burst_option --- This will enable high-throughput burst support on the DMI slave interfaces. By enabling this option, the peak DMI read throughput goes from 1 word per 3 cycles to N words per N+2 cycles, in which N is the AHB burst lengthDMI write throughput goes from 1 word per 3 cycles to 1 word per cycle. +-dmi_burst_option true + +# has_dmp_peripheral --- This option enables the redirection of load/store accesses to one segment (1/16) of the addressable space to a dedicated peripheral bus. This offers high system integration and reduces overall system cost. +-has_dmp_peripheral true + +# per0_base --- This option specifies the memory region assignment for this peripheral aperture +-per0_base 15 + +# per0_limit --- This option specifies the end of this peripheral aperture +-per0_limit 0 + +# per_bus_option --- The core supports one bus protocol for accessing the peripheral space, when enabled: AHB-Lite. +-per_bus_option AHB-Lite + +# per_bus_reg_interface --- Specifies whether the peripheral bus interface is registered. +-per_bus_reg_interface true + +# clock_gating --- This enables the insertion of architectural clock gate elements in the design. By enabling this option, the clocks to various parts of the design will be disabled when the logic they drive is not in use to save power. +-clock_gating false + +# back_compat --- This enables the addition of rst_a input in the clkgate module to support backward compatibility with the older EM and Subsystem releases. +-back_compat true + +# byte_parity --- If parity protection on the CCMs or Cache is configured, this option enables parity protection on a per-byte basis. Otherwise, parity is per word basis +-byte_parity false + +# prot_pipelined --- Check the box if CCM memories are configured for ECC, and you want single-bit errors to be corrected, written back to memory, and re-fetched. When unchecked, single bit errors are corrected when read from memory, but the offending memory location itself is not corrected with a writeback, no influence on Cache protection +-prot_pipelined false + +# cct_test_ena --- When ECC is configured, this option enables single bit error injection in CCT RAM models to demonstrate ECC protection on the RAMs. When enabled, the RAM models can only be used in HDL CCT simulation (no xCAM support) and are not intended for use in SoC level integration. +-cct_test_ena false + +# err_prot_ehce --- Enabled enhanced ECC architecture for CCM. Instruction fetch with single bit error is not replayed; ecc cac modules are shared to reduce area and timing opt. +-err_prot_ehce false + + +######## dsp_trig --- com.arc.hardware.dfss.dsp_trig.1_0 ######## + +# Create dsp_trig +-create com.arc.hardware.dfss.dsp_trig.1_0 System.CPUisle.ARCv2EM.dsp_trig + +# dsp_trig --- Command line option for EIA extension component 'dsp_trig'. +-dsp_trig true + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio0 --- com.arc.hardware.dfss.io_gpio0.1_0 ######## + +# Create io_gpio0 +-create com.arc.hardware.dfss.io_gpio0.1_0 System.CPUisle.ARCv2EM.io_gpio0 + +# io_gpio0 --- Command line option for EIA extension component 'io_gpio0'. +-io_gpio0 true + +# io_gpio0_debounce --- Selects the inclusion of Debounce logic +-io_gpio0_debounce 1 + +# io_gpio0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio0_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + +# io_gpio0_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. +-io_gpio0_direction_rst_value 0 + +# io_gpio0_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. +-io_gpio0_output_rst_value 0x0 + + +######## io_i2c_mst0 --- com.arc.hardware.dfss.io_i2c_mst0.1_0 ######## + +# Create io_i2c_mst0 +-create com.arc.hardware.dfss.io_i2c_mst0.1_0 System.CPUisle.ARCv2EM.io_i2c_mst0 + +# io_i2c_mst0 --- Command line option for APEX extension component 'io_i2c_mst0'. +-io_i2c_mst0 true + +# io_i2c_mst0_fs --- RX/TX FIFO size +-io_i2c_mst0_fs 16 + +# io_i2c_mst0_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst0_dma_support None + +# io_i2c_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst0_cdc_included 0 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2c_slv0 --- com.arc.hardware.dfss.io_i2c_slv0.1_0 ######## + +# Create io_i2c_slv0 +-create com.arc.hardware.dfss.io_i2c_slv0.1_0 System.CPUisle.ARCv2EM.io_i2c_slv0 + +# io_i2c_slv0 --- Command line option for APEX extension component 'io_i2c_slv0'. +-io_i2c_slv0 true + +# io_i2c_slv0_fs --- RX/TX FIFO size +-io_i2c_slv0_fs 16 + +# io_i2c_slv0_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_slv0_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst0 --- com.arc.hardware.dfss.io_spi_mst0.1_0 ######## + +# Create io_spi_mst0 +-create com.arc.hardware.dfss.io_spi_mst0.1_0 System.CPUisle.ARCv2EM.io_spi_mst0 + +# io_spi_mst0 --- Command line option for APEX extension component 'io_spi_mst0'. +-io_spi_mst0 true + +# io_spi_mst0_fz --- RX/TX FIFO depth +-io_spi_mst0_fs 16 + +# io_spi_mst0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst0_max_xfer_size 16 + +# io_spi_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst0_cdc_included 0 + +# io_spi_mst0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst0_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## subsys_bcr --- com.arc.hardware.dfss.subsys_bcr.1_0 ######## + +# Create subsys_bcr +-create com.arc.hardware.dfss.subsys_bcr.1_0 System.CPUisle.ARCv2EM.subsys_bcr + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst1 --- com.arc.hardware.dfss.io_spi_mst1.1_0 ######## + +# Create io_spi_mst1 +-create com.arc.hardware.dfss.io_spi_mst1.1_0 System.CPUisle.ARCv2EM.io_spi_mst1 + +# io_spi_mst1 --- Command line option for APEX extension component 'io_spi_mst1'. +-io_spi_mst1 true + +# io_spi_mst1_fz --- RX/TX FIFO depth +-io_spi_mst1_fs 16 + +# io_spi_mst1_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst1_max_xfer_size 16 + +# io_spi_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst1_cdc_included 0 + +# io_spi_mst1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst1_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst2 --- com.arc.hardware.dfss.io_spi_mst2.1_0 ######## + +# Create io_spi_mst2 +-create com.arc.hardware.dfss.io_spi_mst2.1_0 System.CPUisle.ARCv2EM.io_spi_mst2 + +# io_spi_mst2 --- Command line option for APEX extension component 'io_spi_mst2'. +-io_spi_mst2 true + +# io_spi_mst2_fz --- RX/TX FIFO depth +-io_spi_mst2_fs 16 + +# io_spi_mst2_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst2_max_xfer_size 16 + +# io_spi_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst2_cdc_included 0 + +# io_spi_mst2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst2_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_slv0 --- com.arc.hardware.dfss.io_spi_slv0.1_0 ######## + +# Create io_spi_slv0 +-create com.arc.hardware.dfss.io_spi_slv0.1_0 System.CPUisle.ARCv2EM.io_spi_slv0 + +# io_spi_slv0 --- Command line option for APEX extension component 'io_spi_slv0'. +-io_spi_slv0 true + +# io_spi_slv0_fz --- RX/TX FIFO depth +-io_spi_slv0_fs 16 + +# io_spi_slv0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_slv0_max_xfer_size 16 + +# io_spi_slv0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_slv0_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio1 --- com.arc.hardware.dfss.io_gpio1.1_0 ######## + +# Create io_gpio1 +-create com.arc.hardware.dfss.io_gpio1.1_0 System.CPUisle.ARCv2EM.io_gpio1 + +# io_gpio1 --- Command line option for EIA extension component 'io_gpio1'. +-io_gpio1 true + +# io_gpio1_debounce --- Selects the inclusion of Debounce logic +-io_gpio1_debounce 1 + +# io_gpio1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio1_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + +# io_gpio1_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. +-io_gpio1_direction_rst_value 0 + +# io_gpio1_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. +-io_gpio1_output_rst_value 0x0 + + +######## io_gpio2 --- com.arc.hardware.dfss.io_gpio2.1_0 ######## + +# Create io_gpio2 +-create com.arc.hardware.dfss.io_gpio2.1_0 System.CPUisle.ARCv2EM.io_gpio2 + +# io_gpio2 --- Command line option for EIA extension component 'io_gpio2'. +-io_gpio2 true + +# io_gpio2_debounce --- Selects the inclusion of Debounce logic +-io_gpio2_debounce 1 + +# io_gpio2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio2_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + +# io_gpio2_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. +-io_gpio2_direction_rst_value 0 + +# io_gpio2_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. +-io_gpio2_output_rst_value 0x0 + + +######## io_i2c_mst1 --- com.arc.hardware.dfss.io_i2c_mst1.1_0 ######## + +# Create io_i2c_mst1 +-create com.arc.hardware.dfss.io_i2c_mst1.1_0 System.CPUisle.ARCv2EM.io_i2c_mst1 + +# io_i2c_mst1 --- Command line option for APEX extension component 'io_i2c_mst1'. +-io_i2c_mst1 true + +# io_i2c_mst1_fs --- RX/TX FIFO size +-io_i2c_mst1_fs 16 + +# io_i2c_mst1_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst1_dma_support None + +# io_i2c_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst1_cdc_included 0 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2c_mst2 --- com.arc.hardware.dfss.io_i2c_mst2.1_0 ######## + +# Create io_i2c_mst2 +-create com.arc.hardware.dfss.io_i2c_mst2.1_0 System.CPUisle.ARCv2EM.io_i2c_mst2 + +# io_i2c_mst2 --- Command line option for APEX extension component 'io_i2c_mst2'. +-io_i2c_mst2 true + +# io_i2c_mst2_fs --- RX/TX FIFO size +-io_i2c_mst2_fs 16 + +# io_i2c_mst2_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst2_dma_support None + +# io_i2c_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst2_cdc_included 0 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart0 --- com.arc.hardware.dfss.io_uart0.1_0 ######## + +# Create io_uart0 +-create com.arc.hardware.dfss.io_uart0.1_0 System.CPUisle.ARCv2EM.io_uart0 + +# io_uart0 --- Command line option for EIA extension component 'io_uart0'. +-io_uart0 true + +# io_uart0_fifo_mode --- Set the UART FIFO mode +-io_uart0_fifo_mode 16 + +# io_uart0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart0_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart1 --- com.arc.hardware.dfss.io_uart1.1_0 ######## + +# Create io_uart1 +-create com.arc.hardware.dfss.io_uart1.1_0 System.CPUisle.ARCv2EM.io_uart1 + +# io_uart1 --- Command line option for EIA extension component 'io_uart1'. +-io_uart1 true + +# io_uart1_fifo_mode --- Set the UART FIFO mode +-io_uart1_fifo_mode 16 + +# io_uart1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart1_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart2 --- com.arc.hardware.dfss.io_uart2.1_0 ######## + +# Create io_uart2 +-create com.arc.hardware.dfss.io_uart2.1_0 System.CPUisle.ARCv2EM.io_uart2 + +# io_uart2 --- Command line option for EIA extension component 'io_uart2'. +-io_uart2 true + +# io_uart2_fifo_mode --- Set the UART FIFO mode +-io_uart2_fifo_mode 16 + +# io_uart2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart2_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart3 --- com.arc.hardware.dfss.io_uart3.1_0 ######## + +# Create io_uart3 +-create com.arc.hardware.dfss.io_uart3.1_0 System.CPUisle.ARCv2EM.io_uart3 + +# io_uart3 --- Command line option for EIA extension component 'io_uart3'. +-io_uart3 true + +# io_uart3_fifo_mode --- Set the UART FIFO mode +-io_uart3_fifo_mode 16 + +# io_uart3_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart3_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2s_rx_mst0 --- com.arc.hardware.dfss.io_i2s_rx_mst0.1_0 ######## + +# Create io_i2s_rx_mst0 +-create com.arc.hardware.dfss.io_i2s_rx_mst0.1_0 System.CPUisle.ARCv2EM.io_i2s_rx_mst0 + +# io_i2s_rx_mst0 --- Command line option for APEX extension component 'io_i2s_rx_mst0'. +-io_i2s_rx_mst0 true + +# io_i2s_rx_mst0_fs --- RX FIFO size +-io_i2s_rx_mst0_fs 8 + +# io_i2s_rx_mst0_fw --- RX FIFO width +-io_i2s_rx_mst0_fw 16 + +# io_i2s_rx_mst0_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2s_rx_mst0_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2s_tx_mst0 --- com.arc.hardware.dfss.io_i2s_tx_mst0.1_0 ######## + +# Create io_i2s_tx_mst0 +-create com.arc.hardware.dfss.io_i2s_tx_mst0.1_0 System.CPUisle.ARCv2EM.io_i2s_tx_mst0 + +# io_i2s_tx_mst0 --- Command line option for APEX extension component 'io_i2s_tx_mst0'. +-io_i2s_tx_mst0 true + +# io_i2s_tx_mst0_fs --- TX FIFO size +-io_i2s_tx_mst0_fs 8 + +# io_i2s_tx_mst0_fw --- TX FIFO width +-io_i2s_tx_mst0_fw 16 + +# io_i2s_tx_mst0_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2s_tx_mst0_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_pdm_rx0 --- com.arc.hardware.dfss.io_pdm_rx0.1_0 ######## + +# Create io_pdm_rx0 +-create com.arc.hardware.dfss.io_pdm_rx0.1_0 System.CPUisle.ARCv2EM.io_pdm_rx0 + +# io_pdm_rx0 --- Command line option for APEX extension component 'io_pdm_rx0'. +-io_pdm_rx0 true + +# io_pdm_rx0_ch --- Number of Stereo Channels +-io_pdm_rx0_ch 1 + +# io_pdm_rx0_fs --- RX FIFO size +-io_pdm_rx0_fs 16 + +# io_pdm_rx0_ns --- Maximum number of CIC stages +-io_pdm_rx0_ns 4 + +# io_pdm_rx0_ds --- Maximum delay in the COMB filter of the CIC filter +-io_pdm_rx0_ds 2 + +# io_pdm_rx0_dma_support --- Specifies whether the DMA handshake interface is included +-io_pdm_rx0_dma_support Memory-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## DCCM --- com.arc.hardware.DCCM.1_0 ######## + +# Create DCCM +-create com.arc.hardware.DCCM.1_0 System.CPUisle.ARCv2EM.DCCM + +# dccm_size --- This defines the size of the Data Closely Coupled Memory (DCCM) in bytes +-dccm_size 131072 + +# dccm_base --- Sets the initial memory region assignment for DCCM +-dccm_base 8 + +# dccm_interleave --- Split DCCM into even/odd memory banks. +-dccm_interleave false + +# dccm_prot --- Specifies the type of protection built for the DCCM. +-dccm_prot None + +# dccm_prot_level --- Specifies the level protection. +-dccm_prot_level Data_Only + +# dccm_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the DCCM +-dccm_prot_exceptions true + +# dccm_sec_lvl --- Specifies the level of secure DCCM. +-dccm_sec_lvl Non_Secure + +# dccm_dmi --- This enables external access through a DMI (direct memory interface) port. +-dccm_dmi true + + +######## DMA Controller --- com.arc.hardware.DMA_Controller.1_0 ######## + +# Create DMA Controller +-create com.arc.hardware.DMA_Controller.1_0 "System.CPUisle.ARCv2EM.DMA Controller" + +# dmac_channels --- This options specifies the number of DMA channels implemented in the DMA controller +-dmac_channels 16 + +# dmac_fifo_depth --- This option specifies the DMA transfer FIFO depth in 32b words. +-dmac_fifo_depth 2 + +# dmac_int_config --- None: the DMA controller cannot raise an interrupt +# Single-External: single done and single error interrupt signal for all DMA channels, and the interrupt signals are routed to a port at the top of the EM logical hierarchy +# Multiple-External: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are routed to ports at the top of the EM logical hierarchy +# Single-Internal: single done and single error interrupt signals for all DMA channels, and the interrupt signals are internal to the EM core +# Multiple-Internal: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are internal to the EM core +-dmac_int_config Multiple-Internal + +# dmac_separate_error_interrupts --- This specifies whether there is a separate error interrupt per DMA channel, or just one. +-dmac_separate_error_interrupts false + +# dmac_registers --- This option defines the number of DMA channels with their registers located in auxiliary space. +-dmac_registers 0 + +# dmac_mem_if --- This option specifies whether the DMA controller system memory interface is integrated into the existing EM system memory interfaces or has its own interface. +-dmac_mem_if integrated + +# dmac_per_if --- Internal vs DW peripheral interface. Specify (in hex) which channels have the DW interface, where bit 0 corresponds to DMA channel 0, bit 1 for DMA channel 1, etc. +# Example: 4 channel DMA controller where -dmac_per_if is set to 0x9 = DMA Channels 0 and 3 configured with the DW req interface, DMA Channels 1 and 2 configured with the internal req interface. +-dmac_per_if 0x7e00 + + +######## DSP --- com.arc.hardware.DSP.1_0 ######## + +# Create DSP +-create com.arc.hardware.DSP.1_0 System.CPUisle.ARCv2EM.DSP + +# dsp_complex --- Enable/disable support for single cycle 16b+16b complex instructions and butterfly operations, else 2-cycle complex instructions only without butterfly support +-dsp_complex true + +# dsp_itu --- Enable/disable support for ITU bit-accurate 1 bit fractional shift before accumulation, else 1-bit fractional shift result after accumulation only +-dsp_itu true + +# dsp_divsqrt --- Enable/disable support for divide and square root operations: DIV(U), REM(U), SQRT +-dsp_divsqrt radix2 + +# dsp_accshift --- Select support for accumulator shift operations: no supported, limited shift support only or full shift support and convergent rounding +-dsp_accshift full + +# dsp_impl --- The datapath components may be inferred from Verilog for better area or optimized using carry-save components for better timing +-dsp_impl optimized + + +######## Data Cache --- com.arc.hardware.Data_Cache.1_0 ######## + +# Create Data Cache +-create com.arc.hardware.Data_Cache.1_0 "System.CPUisle.ARCv2EM.Data Cache" + +# dc_size --- This defines the total size of the Data Cache in bytes. +-dc_size 16384 + +# dc_ways --- This defines the number of cache ways. +-dc_ways 2 + +# dc_bsize --- This defines the cache line length in bytes. +-dc_bsize 32 + +# dc_feature_level --- Feature Level, indicates locking and debug feature level 00 = Basic cache, with no locking or debug features 01 = Lock and flush features supported 10 = Lock, flush and advanced debug features supported 11 = Reserved +-dc_feature_level 2 + +# dc_uncached_region --- Enable an uncached region defined by aux reg +-dc_uncached_region false + +# dc_prot --- Specifies the type of protection built for DCACHE. +-dc_prot None + +# dc_prot_level --- Specifies the level of protection. +-dc_prot_level Data_Only + +# dc_prot_exceptions --- Builds exception generation hardware for uncorrectable (fatal) errors detected on DCACHE. +-dc_prot_exceptions true + + +######## Debug Interface --- com.arc.hardware.Debug_Interface.1_0 ######## + +# Create Debug Interface +-create com.arc.hardware.Debug_Interface.1_0 "System.CPUisle.ARCv2EM.Debug Interface" + +# dbg_en_option --- Adds an enable pin to the existing debug interface +-dbg_en_option false + +# secure_debug --- This enables secure debug feature +-secure_debug false + +# scdbg_aux_unlk --- An internal demo module will be included when enable +-scdbg_aux_unlk false + +# dbg_apb_option --- Adds an additional APB debug port alongside the BVCI one +-dbg_apb_option false + + +######## ICCM0 --- com.arc.hardware.ICCM0.1_0 ######## + +# Create ICCM0 +-create com.arc.hardware.ICCM0.1_0 System.CPUisle.ARCv2EM.ICCM0 + +# iccm0_size --- This defines the size of ICCM0 in bytes.This ICCM has 0 wait states. +-iccm0_size 131072 + +# iccm0_base --- Sets the initial memory region assignment for ICCM0 +-iccm0_base 6 + +# iccm0_wide --- Creates ICCM0 as 64b memory to reduce accesses. +-iccm0_wide false + +# iccm0_prot --- Specifies the type of protection built for ICCM0. +-iccm0_prot None + +# iccm0_prot_level --- Specifies the level of protection. +-iccm0_prot_level Data_Only + +# iccm0_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the ICCM0 +-iccm0_prot_exceptions true + +# iccm0_sec_lvl --- Specifies the level of secure ICCM0. +-iccm0_sec_lvl Non_Secure + +# iccm0_dmi --- This enables external access through a DMI (direct memory interface) port. +-iccm0_dmi true + + +######## Instruction Cache --- com.arc.hardware.Instruction_Cache.1_0 ######## + +# Create Instruction Cache +-create com.arc.hardware.Instruction_Cache.1_0 "System.CPUisle.ARCv2EM.Instruction Cache" + +# ic_size --- This defines the total size of the instruction cache in bytes. +-ic_size 16384 + +# ic_ways --- This defines the number of cache ways +-ic_ways 2 + +# ic_bsize --- This defines the cache line length in bytes. +-ic_bsize 64 + +# ic_disable_on_reset --- The instruction cache may be enabled immediately after reset, depending on this option. If this option is enabled, the last cache operation is set to failed, and the direct cache-RAM access is enabled. Furthermore, the instruction cache is invalidated all cache lines are invalidated and unlocked, and the tag RAM is cleared. +-ic_disable_on_reset false + +# ic_feature_level --- This defines the feature level of the cache. +-ic_feature_level 1 + +# ic_pwr_opt_level --- This selects power-optimization options in the micro-architecture of the instruction cache. +-ic_pwr_opt_level 0 + +# ic_prot --- Specifies the type of protection built for ICACHE. +-ic_prot None + +# ic_prot_level --- Specifies the level of protection. +-ic_prot_level Data_Only + +# ic_prot_exceptions --- Builds exception generation hardware for uncorrectable (fatal) errors detected on ICACHE. +-ic_prot_exceptions true + + +######## Interrupt Controller --- com.arc.hardware.Interrupt_Controller.1_0 ######## + +# Create Interrupt Controller +-create com.arc.hardware.Interrupt_Controller.1_0 "System.CPUisle.ARCv2EM.Interrupt Controller" + +# number_of_interrupts --- This is the total number of interrupts available to the core. Some interrupts are allocated statically to a specific interrupt line (for example, timer interrupts). For more information on Interrupt and register-file options, see DesignWare ARCv2 ISA Programmers Reference Manual. +-number_of_interrupts 96 + +# number_of_levels --- Priority levels in the interrupt controller. +-number_of_levels 4 + +# external_interrupts --- This is the total number of interrupt pins available for external system components. This parameter must be less than the total number of interrupts. +-external_interrupts 77 + +# firq_option --- This enables the fast-interrupts option, (priority level 0 interrupts), which uses an alternate register bank (if configured) instead of saving the context to memory. +-firq_option true + + +######## JTAG Interface --- com.arc.hardware.JTAG_Interface.1_0 ######## + +# Create JTAG Interface +-create com.arc.hardware.JTAG_Interface.1_0 "System.CPUisle.ARCv2EM.JTAG Interface" + +######## Timer 0 --- com.arc.hardware.Timer_0.1_0 ######## + +# Create Timer 0 +-create com.arc.hardware.Timer_0.1_0 "System.CPUisle.ARCv2EM.Timer 0" + +# timer_0_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 0. +-timer_0_int_level 1 + + +######## Watchdog Timer --- com.arc.hardware.Watchdog_Timer.1_0 ######## + +# Create Watchdog Timer +-create com.arc.hardware.Watchdog_Timer.1_0 "System.CPUisle.ARCv2EM.Watchdog Timer" + +# watchdog_size --- Specifies the bit width of timer's internal counter. +-watchdog_size 32 + +# watchdog_clk --- Specifies whether the timer should be driven from a separate clock. +-watchdog_clk false + + +######## Real-time Counter --- com.arc.hardware.Real_time_Counter.1_0 ######## + +# Create Real-time Counter +-create com.arc.hardware.Real_time_Counter.1_0 "System.CPUisle.ARCv2EM.Real-time Counter" + +######## Performance Monitor --- com.arc.hardware.Performance_Monitor.1_0 ######## + +# Create Performance Monitor +-create com.arc.hardware.Performance_Monitor.1_0 "System.CPUisle.ARCv2EM.Performance Monitor" + +# pct_counters --- Number of counters for performance monitoring. +-pct_counters 8 + + +######## SmaRT --- com.arc.hardware.SmaRT.1_0 ######## + +# Create SmaRT +-create com.arc.hardware.SmaRT.1_0 System.CPUisle.ARCv2EM.SmaRT + +# smart_stack_entries --- This specifies the number of entries in the trace buffer. +-smart_stack_entries 8 + +# smart_implementation --- Flip-flop = FF-based design. Memory = memory-based design (provides better density for larger trace buffers). +-smart_implementation flip-flop + + +######## XY --- com.arc.hardware.XY.1_0 ######## + +# Create XY +-create com.arc.hardware.XY.1_0 System.CPUisle.ARCv2EM.XY + +# xy_config --- XY memory configuration: +# One memory: DCCM only. +# Two memories: DCCM + Y. +# Three memories: DCCM + X + Y. +-xy_config dccm_x_y + +# xy_size --- Size of X and Y memories if included. +# X and Y memories both have the same configured size. +-xy_size 16384 + +# xy_interleave --- Split XY memories into odd/even instances to enable single cycle unaligned access. +-xy_interleave false + +# xy_x_base --- Base region for X memory. All accesses to this region will initiate a transfer on the X memory. +-xy_x_base 9 + +# xy_y_base --- Base region for Y memory. All accesses to this region will initiate a transfer on the Y memory. +-xy_y_base 10 + + +######## AGU --- com.arc.hardware.AGU.1_0 ######## + +# Create AGU +-create com.arc.hardware.AGU.1_0 System.CPUisle.ARCv2EM.AGU + +# agu_size --- Predefined configurations of modifiers, address +# pointers and offset registers +#

+# 
+#         address     address                     
+#         pointers    offset regs      modifiers  
+#        ----------- --------------- ------------ 
+# small:     4           2                 4      
+# medium:    8           4                 12     
+# large:     12          8                 24     
+# 
+# +-agu_size large + +# agu_accord --- Enable the accordion stage if operating frequency is critical +-agu_accord true + +# agu_wb_depth --- Write buffer depth +-agu_wb_depth 4 + + +######## Actionpoints --- com.arc.hardware.Actionpoints.1_0 ######## + +# Create Actionpoints +-create com.arc.hardware.Actionpoints.1_0 System.CPUisle.ARCv2EM.Actionpoints + +# num_actionpoints --- This is the number of trigger events available. +-num_actionpoints 8 + +# aps_feature --- Selects Actionpoint feature set +-aps_feature min + + +######## Bit stream --- com.arc.hardware.Bit_stream.1_0 ######## + +# Create Bit stream +-create com.arc.hardware.Bit_stream.1_0 "System.CPUisle.ARCv2EM.Bit stream" + +######## Floating-point unit --- com.arc.hardware.Floating_point_unit.1_0 ######## + +# Create Floating-point unit +-create com.arc.hardware.Floating_point_unit.1_0 "System.CPUisle.ARCv2EM.Floating-point unit" + +# fpu_dp_assist --- This enables double-precision acceleration instructions. +-fpu_dp_assist true + +# fpu_fma_option --- This enables the fused multiply-add & multiply-subtract instructions. +-fpu_fma_option true + +# fpu_mas_cycles --- Make mul/add/sub multicycle to achieve a higher clock speed. +-fpu_mas_cycles 2 + +# fpu_pipe_impl --- FPU pipelined implementation +-fpu_pipe_impl true + +# fpu_div_option --- This enables divide & square-root acceleration +-fpu_div_option true + +# fpu_div_cycles --- Controls div/sqrt implementation. +-fpu_div_cycles 17 + + +######## Memory Protection Unit --- com.arc.hardware.Memory_Protection_Unit.1_0 ######## + +# Create Memory Protection Unit +-create com.arc.hardware.Memory_Protection_Unit.1_0 "System.CPUisle.ARCv2EM.Memory Protection Unit" + +# mpu_num_regions --- Number of configured memory regions. +-mpu_num_regions 16 + +# mpu_32b --- Set the minimal region size to be 32 byte instead of 2KB. +-mpu_32b false + +# mpu_sid_option --- It will enable SID support in Secure Shield +-mpu_sid_option false + + +######## Real-time trace producer --- com.arc.hardware.Real_time_trace_producer.1_0 ######## + +# Create Real-time trace producer +-create com.arc.hardware.Real_time_trace_producer.1_0 "System.CPUisle.ARCv2EM.Real-time trace producer" + +# rtt_feature_level --- 'small' means that program trace only is available. `medium' adds data trace. `full' adds core and aux register trace. +-rtt_feature_level full + + +######## ARCv2EM CCT --- cct.1_0 ######## + +# Create ARCv2EM CCT +-create cct.1_0 "System.ARCv2EM CCT" + +# cct --- +# Option used to add a CCT to the design for command-line builds +# Without this architect can't add this component to a build +# via a cmdline -create command. +# with old scripts. +# +-cct true + +# no_hostlink --- +# This prevents the inclusion of the hostlink library when compiling +# C or C++ programs. The resultant executable, if it contains printfs, +# will print to an internal fixed buffer __mwwrite_buf. +# Other hostlink operations that require debugger assistance, such as file +# opens, will fail. +# +# Hostlink references incur memory cycles at unpredictable times and +# so can perturb cycle-timing results. Without hostlink, +# the debugger will not in any way interfere with the target while it is running. +# Therefore this option is useful for simulation in which you want precisely the +# same cycle timing to occur each time you run, or for accurate power consumption results. +# +-cct_no_hostlink false + +# has_subsystem_cct_flow --- +# The above option will check for the presence of subsystem component in the build configuration and suitably modifies the Makefile for the sub-system environment. +# +-has_subsystem_cct_flow false + + +######## BusFabric --- com.arc.hardware.ARCv2MSS.BusFabric.1_0 ######## + +# Create BusFabric +-create com.arc.hardware.ARCv2MSS.BusFabric.1_0 System.BusFabric + +######## ClkCtrl --- com.arc.hardware.ARCv2MSS.ClkCtrl.1_0 ######## + +# Create ClkCtrl +-create com.arc.hardware.ARCv2MSS.ClkCtrl.1_0 System.ClkCtrl + +######## DSP Software --- com.arc.software.dfss.sw_dsp.1_0 ######## + +# Create DSP Software +-create com.arc.software.dfss.sw_dsp.1_0 "System.DSP Software" + +# sw_dsp --- Command line option for Software element 'DSP Software' +-sw_dsp true + + +######## EMSDP_BOARD --- com.arc.hardware.ARCv2MSS.EMSDP_BOARD.1_0 ######## + +# Create EMSDP_BOARD +-create com.arc.hardware.ARCv2MSS.EMSDP_BOARD.1_0 System.EMSDP_BOARD + +# emsdp_sys_freq --- Select the core frequency. +-emsdp_sys_freq 40 + + +######## IO Software --- com.arc.software.dfss.sw_io.1_0 ######## + +# Create IO Software +-create com.arc.software.dfss.sw_io.1_0 "System.IO Software" + +# sw_io --- Command line option for Software element 'IO Software' +-sw_io true + + +######## Implementation --- com.arc.hardware.implementation.1_0 ######## + +# Create Implementation +-create com.arc.hardware.implementation.1_0 System.Implementation + +# ClockSpeed --- Target clock speed of the system +-clock_speed 10 + +# DDR2_clk_Ratio --- DDR2 Clock Vs System Clock Ratio +# 2x +# 3x +# 4x +-ddr2_clk_ratio 3x + +# ClockSkew --- The clock skew for the system +-clock_skew 0.2 + +# HoldMargin --- Margin for hold time checks +-hold_margin 0.05 + +# Floorplan --- Floorplan definition for relative placement of RAMs (at CPU-level) or the placement of the rams and CPU hard cores (at multicore level) +-floorplan em4_sensor + +# JTAGFrequency --- Select the frequency of the JTAG clock Tck (in MHz). +# +# The JTAG clock speed has to be less than 1/2 of the cpu clock otherwise the signals on the BVCI interface are not guaranteed to be valid. +# +# NOTE: The RTL simulations will work when the JTAG clock frequency is set to half the CPU clock, however this may not be the case when simulating at gate level due to delays on the IO pads. +# +# The default is set to 10 MHz so that there is no conflict when simulating with an ARCangel3 at 30MHz. (30 > 10*2) +# +# The speed of simulation can be greatly increased by using a faster JTAG clock, but a dependency will warn if it exceeds 1/2 of the cpu clock. +# +-jtag_tclk 4 + +# execution_trace_level --- +# This traces committed instructions as they execute, and gathers statistics +# visible in the debugger for counting instructions & cycle delays. +# At the "stats" level ony the statistics are gathered and no trace is printed. +# "file" is equivalent to "full", but the results go to a trace .txt file instead. +# +-execution_trace_level stats + +# tb_trace --- +# Enable instruction execution trace. +# This is available to arc_dev licensees (internal developers) only. +# +-tb_trace false + +# zero_based_arcnum --- +# In a multicore build, number ARCs from 0. +# If this is not selected, arcs are numbered from 1. +# (This provides the initial value to the arcnum signal.) +# +-zero_based_arcnum true + +# generate_ipxact --- +# Generate ipxact.xml file describing the CPUisle or archipelago frontier +# +-generate_ipxact false + +# ipxact_relative_path_names --- +# Use relative path names for Verilog files in the ipxact. +# Otherwise, absolute path names are used. +# +-ipxact_relative_path_names true + +# optional_encryption --- +# When selected, encrypted RTL output is generated. +# +-optional_encryption false + +# ignore_encrypt_license --- +# When selected, pretend the encryption license is missing. For testing. +# +-ignore_encrypt_license false + +# ignore_clear_license --- +# When selected, pretend the cleartest license is missing. For testing. +# +-ignore_clear_license false + +# OPTION_require_archipelago --- +# When selected, force use of archipelago. This is for testing purposes. +# +-require_archipelago false + + +######## Infrastructure Software --- com.arc.software.dfss.sw_infra.1_0 ######## + +# Create Infrastructure Software +-create com.arc.software.dfss.sw_infra.1_0 "System.Infrastructure Software" + +# sw_infra --- Command line option for Software element 'Infrastructure Software' +-sw_infra true + +# templateName --- Template name +-template_name siss_combo_sensor_dsp + + +######## subsys_infra --- com.arc.hardware.dfss.subsys_infra.1_0 ######## + +# Create subsys_infra +-create com.arc.hardware.dfss.subsys_infra.1_0 System.subsys_infra + +# subsys_infra --- Command line option for EIA glue logic. +-subsys_infra true + +# internal_interrupt --- Connect the IO interrupts internally +-internal_interrupt true + +# internal_dma_handshake --- Connect the DMA handshake signals internally +-internal_dma_handshake true + +# spi_tb_sw_test_mode --- +# This is a secret option, not seen by customers. +# If you check this, the SPI peripheral's testbenches will be set to SW test mode: +# The serial interface of the first SPI master io_spi_mstN peripheral is connected to all SPI slave peripherals io_spi_slvN. +# This is used for testing the SW drivers. +# +-spi_tb_sw_test_mode false + +# i3c_tb_sw_test_mode --- +# This is a secret option, not seen by customers. +# If you check this, the I3C peripheral's testbenches will be set to SW test mode: +# The serial interface of the io_i3cN peripheral is connected to the I2C slave peripherals io_i2c_slv0. +# This is used for testing the SW drivers. +# +-i3c_tb_sw_test_mode false + +# subsys_apex_offset --- Subsystem APEX address offset in the AUX address space. The aperture used by the subsystem is fixed to 0x0010_0000. In general, the APEX address offset must be in the range from 0x0010_0000 to 0xFFF0_0000. However, if your design includes the "UAUX Interface" component, then the APEX address offset must be in the range from 0x0010_0000 to 0x7FF0_0000 to avoid address conflicts with any UAUX components. +-subsys_apex_offset 0x8000_0000 + +# subsys_uaux_offset --- Subsystem UAUX address offset in the UAUX address space. The UAUX address offset must be an integer multiple of 0x0010_0000 in the range from 0x0000_0000 to 0x7FF0_0000. The aperture reserved for the subsystem is fixed to 0x0010_0000. +-subsys_uaux_offset 0x10_0000 + + +######## ARC_RTT --- com.arc.hardware.ARC_RTT.1_0 ######## + +# Create ARC_RTT +-create com.arc.hardware.ARC_RTT.1_0 System.ARC_RTT + +# has_nexus_if --- Please select Nexus interface to offload the data from RTT +-has_nexus_if true + +# has_on_chip_mem --- Please select the on-chip memory option to store the trace data in shared memory +-has_on_chip_mem true + +# nexus_data_wdt --- Please select the Nexus Data Width to offload the data from RTT +-nexus_data_wdt 16 + +# internal_memory_size --- Please select internal memory size to capture the trace data +-internal_memory_size 16k + +# ram_type --- Please select Types of internal memories to be inferred for the logic +-ram_type 1_PORT + +# power_domains --- Adds isolation signal inputs/power switch controls for use in UPF flow when configuring power domains. +-rtt_power_domains false + + +######## Tool Configuration --- cgen.1_0 ######## + +# Create Tool Configuration +-create cgen.1_0 "System.Tool Configuration" + +# mwdt_version --- Selects the MetaWare version to be used with the TCF file. +# Change from the default to an older or newer toolset version if you want the TCF file to be used with an older or newer version of the MetaWare tools. +-mwdt_version O-2018.09 + +# code_base_addr --- +# The base address to assign to the executable code segment in the linker command file when there is no ICCM in the build. This value is ignored when there is an ICCM. +# +-code_base_addr 0x0 + +# data_base_addr --- +# The base address to assign to the data segment in the linker command file when the data is not being mapped to a DCCM. This value is ignored when the data segment is mapped to a DCCM, as in that case the base address of the DCCM memory is used. +# +# A value of 0xffffffff means that the data segment will not be mapped to any specific address. +# +-data_base_addr 0xffff_ffff + +# underscores_in_numbers --- Use underscores in hex numbers to improve readability. +-underscores_in_numbers false + +# tcf_rebrand --- Alternate branding of TCF (not used) +-rebrand false + + +]]>
+
+ + + + + + + + + + + + + + ICCM0 + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} + } > SYSTEM2 + GROUP BLOCK(4): { + .Xdata? : {} + } > XCCM + GROUP BLOCK(4): { + .Ydata? : {} + } > YCCM + GROUP BLOCK(4) : { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) + } > IVT + } + +]]> + + + + + + 0x07, sub_opcode => 0x1E , latency_cycles => 8) + +// User extension instruction - dsp_sin +extern long dsp_sin(long); +#pragma intrinsic(dsp_sin, opcode => 0x07, sub_opcode => 0x1F , latency_cycles => 8) + +// User extension instruction - dsp_tan +extern long dsp_tan(long); +#pragma intrinsic(dsp_tan, opcode => 0x07, sub_opcode => 0x22 , latency_cycles => 11) + +// User extension instruction - dsp_acos +extern long dsp_acos(long); +#pragma intrinsic(dsp_acos, opcode => 0x07, sub_opcode => 0x23 , latency_cycles => 31) + +// User extension instruction - dsp_asin +extern long dsp_asin(long); +#pragma intrinsic(dsp_asin, opcode => 0x07, sub_opcode => 0x24 , latency_cycles => 31) + +// User extension instruction - dsp_atan +extern long dsp_atan(long); +#pragma intrinsic(dsp_atan, opcode => 0x07, sub_opcode => 0x25 , latency_cycles => 13) + +// User extension instruction - dsp_sqrt +extern long dsp_sqrt(long); +#pragma intrinsic(dsp_sqrt, opcode => 0x07, sub_opcode => 0x20 , latency_cycles => 31) + +// User extension instruction - dsp_sqrt15 +extern long dsp_sqrt15(long); +#pragma intrinsic(dsp_sqrt15, opcode => 0x07, sub_opcode => 0x21 , latency_cycles => 15) + +#define APEX_COM_ARC_HARDWARE_DFSS_DSP_TRIG_PRESENT 1 +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO0_PRESENT 1 + +// User extension aux register io_gpio0_debounce +#define AR_IO_GPIO0_DEBOUNCE 0x80017048 +#pragma Aux_register(0x80017048, name=>"io_gpio0_debounce") + +// User extension aux register io_gpio0_clken +#define AR_IO_GPIO0_CLKEN 0x80017080 +#pragma Aux_register(0x80017080, name=>"io_gpio0_clken") + +// User extension aux register io_gpio0_swporta_dr +#define AR_IO_GPIO0_SWPORTA_DR 0x80017000 +#pragma Aux_register(0x80017000, name=>"io_gpio0_swporta_dr") + +// User extension aux register io_gpio0_swporta_ddr +#define AR_IO_GPIO0_SWPORTA_DDR 0x80017004 +#pragma Aux_register(0x80017004, name=>"io_gpio0_swporta_ddr") + +// User extension aux register io_gpio0_inten +#define AR_IO_GPIO0_INTEN 0x80017030 +#pragma Aux_register(0x80017030, name=>"io_gpio0_inten") + +// User extension aux register io_gpio0_intmask +#define AR_IO_GPIO0_INTMASK 0x80017034 +#pragma Aux_register(0x80017034, name=>"io_gpio0_intmask") + +// User extension aux register io_gpio0_inttype_level +#define AR_IO_GPIO0_INTTYPE_LEVEL 0x80017038 +#pragma Aux_register(0x80017038, name=>"io_gpio0_inttype_level") + +// User extension aux register io_gpio0_int_polarity +#define AR_IO_GPIO0_INT_POLARITY 0x8001703c +#pragma Aux_register(0x8001703c, name=>"io_gpio0_int_polarity") + +// User extension aux register io_gpio0_intstatus +#define AR_IO_GPIO0_INTSTATUS 0x80017040 +#pragma Aux_register(0x80017040, name=>"io_gpio0_intstatus") + +// User extension aux register io_gpio0_raw_intstatus +#define AR_IO_GPIO0_RAW_INTSTATUS 0x80017044 +#pragma Aux_register(0x80017044, name=>"io_gpio0_raw_intstatus") + +// User extension aux register io_gpio0_porta_eoi +#define AR_IO_GPIO0_PORTA_EOI 0x8001704c +#pragma Aux_register(0x8001704c, name=>"io_gpio0_porta_eoi") + +// User extension aux register io_gpio0_ext_porta +#define AR_IO_GPIO0_EXT_PORTA 0x80017050 +#pragma Aux_register(0x80017050, name=>"io_gpio0_ext_porta") + +// User extension aux register io_gpio0_ls_sync +#define AR_IO_GPIO0_LS_SYNC 0x80017060 +#pragma Aux_register(0x80017060, name=>"io_gpio0_ls_sync") + +// User extension aux register io_gpio0_int_bothedge +#define AR_IO_GPIO0_INT_BOTHEDGE 0x80017068 +#pragma Aux_register(0x80017068, name=>"io_gpio0_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST0_PRESENT 1 + +// User extension aux register io_i2c_mst0_clken +#define AR_IO_I2C_MST0_CLKEN 0x800120c0 +#pragma Aux_register(0x800120c0, name=>"io_i2c_mst0_clken") + +// User extension aux register io_i2c_mst0_con +#define AR_IO_I2C_MST0_CON 0x80012000 +#pragma Aux_register(0x80012000, name=>"io_i2c_mst0_con") + +// User extension aux register io_i2c_mst0_tar +#define AR_IO_I2C_MST0_TAR 0x80012004 +#pragma Aux_register(0x80012004, name=>"io_i2c_mst0_tar") + +// User extension aux register io_i2c_mst0_data_cmd +#define AR_IO_I2C_MST0_DATA_CMD 0x80012010 +#pragma Aux_register(0x80012010, name=>"io_i2c_mst0_data_cmd") + +// User extension aux register io_i2c_mst0_ss_scl_hcnt +#define AR_IO_I2C_MST0_SS_SCL_HCNT 0x80012014 +#pragma Aux_register(0x80012014, name=>"io_i2c_mst0_ss_scl_hcnt") + +// User extension aux register io_i2c_mst0_ss_scl_lcnt +#define AR_IO_I2C_MST0_SS_SCL_LCNT 0x80012018 +#pragma Aux_register(0x80012018, name=>"io_i2c_mst0_ss_scl_lcnt") + +// User extension aux register io_i2c_mst0_fs_scl_hcnt +#define AR_IO_I2C_MST0_FS_SCL_HCNT 0x8001201c +#pragma Aux_register(0x8001201c, name=>"io_i2c_mst0_fs_scl_hcnt") + +// User extension aux register io_i2c_mst0_fs_scl_lcnt +#define AR_IO_I2C_MST0_FS_SCL_LCNT 0x80012020 +#pragma Aux_register(0x80012020, name=>"io_i2c_mst0_fs_scl_lcnt") + +// User extension aux register io_i2c_mst0_intr_stat +#define AR_IO_I2C_MST0_INTR_STAT 0x8001202c +#pragma Aux_register(0x8001202c, name=>"io_i2c_mst0_intr_stat") + +// User extension aux register io_i2c_mst0_intr_mask +#define AR_IO_I2C_MST0_INTR_MASK 0x80012030 +#pragma Aux_register(0x80012030, name=>"io_i2c_mst0_intr_mask") + +// User extension aux register io_i2c_mst0_raw_intr_stat +#define AR_IO_I2C_MST0_RAW_INTR_STAT 0x80012034 +#pragma Aux_register(0x80012034, name=>"io_i2c_mst0_raw_intr_stat") + +// User extension aux register io_i2c_mst0_rx_tl +#define AR_IO_I2C_MST0_RX_TL 0x80012038 +#pragma Aux_register(0x80012038, name=>"io_i2c_mst0_rx_tl") + +// User extension aux register io_i2c_mst0_tx_tl +#define AR_IO_I2C_MST0_TX_TL 0x8001203c +#pragma Aux_register(0x8001203c, name=>"io_i2c_mst0_tx_tl") + +// User extension aux register io_i2c_mst0_clr_intr +#define AR_IO_I2C_MST0_CLR_INTR 0x80012040 +#pragma Aux_register(0x80012040, name=>"io_i2c_mst0_clr_intr") + +// User extension aux register io_i2c_mst0_clr_rx_under +#define AR_IO_I2C_MST0_CLR_RX_UNDER 0x80012044 +#pragma Aux_register(0x80012044, name=>"io_i2c_mst0_clr_rx_under") + +// User extension aux register io_i2c_mst0_clr_rx_over +#define AR_IO_I2C_MST0_CLR_RX_OVER 0x80012048 +#pragma Aux_register(0x80012048, name=>"io_i2c_mst0_clr_rx_over") + +// User extension aux register io_i2c_mst0_clr_tx_over +#define AR_IO_I2C_MST0_CLR_TX_OVER 0x8001204c +#pragma Aux_register(0x8001204c, name=>"io_i2c_mst0_clr_tx_over") + +// User extension aux register io_i2c_mst0_clr_tx_abrt +#define AR_IO_I2C_MST0_CLR_TX_ABRT 0x80012054 +#pragma Aux_register(0x80012054, name=>"io_i2c_mst0_clr_tx_abrt") + +// User extension aux register io_i2c_mst0_clr_activity +#define AR_IO_I2C_MST0_CLR_ACTIVITY 0x8001205c +#pragma Aux_register(0x8001205c, name=>"io_i2c_mst0_clr_activity") + +// User extension aux register io_i2c_mst0_clr_stop_det +#define AR_IO_I2C_MST0_CLR_STOP_DET 0x80012060 +#pragma Aux_register(0x80012060, name=>"io_i2c_mst0_clr_stop_det") + +// User extension aux register io_i2c_mst0_clr_start_det +#define AR_IO_I2C_MST0_CLR_START_DET 0x80012064 +#pragma Aux_register(0x80012064, name=>"io_i2c_mst0_clr_start_det") + +// User extension aux register io_i2c_mst0_enable +#define AR_IO_I2C_MST0_ENABLE 0x8001206c +#pragma Aux_register(0x8001206c, name=>"io_i2c_mst0_enable") + +// User extension aux register io_i2c_mst0_status +#define AR_IO_I2C_MST0_STATUS 0x80012070 +#pragma Aux_register(0x80012070, name=>"io_i2c_mst0_status") + +// User extension aux register io_i2c_mst0_txflr +#define AR_IO_I2C_MST0_TXFLR 0x80012074 +#pragma Aux_register(0x80012074, name=>"io_i2c_mst0_txflr") + +// User extension aux register io_i2c_mst0_rxflr +#define AR_IO_I2C_MST0_RXFLR 0x80012078 +#pragma Aux_register(0x80012078, name=>"io_i2c_mst0_rxflr") + +// User extension aux register io_i2c_mst0_sda_hold +#define AR_IO_I2C_MST0_SDA_HOLD 0x8001207c +#pragma Aux_register(0x8001207c, name=>"io_i2c_mst0_sda_hold") + +// User extension aux register io_i2c_mst0_tx_abrt_source +#define AR_IO_I2C_MST0_TX_ABRT_SOURCE 0x80012080 +#pragma Aux_register(0x80012080, name=>"io_i2c_mst0_tx_abrt_source") + +// User extension aux register io_i2c_mst0_enable_status +#define AR_IO_I2C_MST0_ENABLE_STATUS 0x8001209c +#pragma Aux_register(0x8001209c, name=>"io_i2c_mst0_enable_status") + +// User extension aux register io_i2c_mst0_fs_spklen +#define AR_IO_I2C_MST0_FS_SPKLEN 0x800120a0 +#pragma Aux_register(0x800120a0, name=>"io_i2c_mst0_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_SLV0_PRESENT 1 + +// User extension aux register io_i2c_slv0_clken +#define AR_IO_I2C_SLV0_CLKEN 0x800130c0 +#pragma Aux_register(0x800130c0, name=>"io_i2c_slv0_clken") + +// User extension aux register io_i2c_slv0_con +#define AR_IO_I2C_SLV0_CON 0x80013000 +#pragma Aux_register(0x80013000, name=>"io_i2c_slv0_con") + +// User extension aux register io_i2c_slv0_sar +#define AR_IO_I2C_SLV0_SAR 0x80013008 +#pragma Aux_register(0x80013008, name=>"io_i2c_slv0_sar") + +// User extension aux register io_i2c_slv0_data_cmd +#define AR_IO_I2C_SLV0_DATA_CMD 0x80013010 +#pragma Aux_register(0x80013010, name=>"io_i2c_slv0_data_cmd") + +// User extension aux register io_i2c_slv0_intr_stat +#define AR_IO_I2C_SLV0_INTR_STAT 0x8001302c +#pragma Aux_register(0x8001302c, name=>"io_i2c_slv0_intr_stat") + +// User extension aux register io_i2c_slv0_intr_mask +#define AR_IO_I2C_SLV0_INTR_MASK 0x80013030 +#pragma Aux_register(0x80013030, name=>"io_i2c_slv0_intr_mask") + +// User extension aux register io_i2c_slv0_raw_intr_stat +#define AR_IO_I2C_SLV0_RAW_INTR_STAT 0x80013034 +#pragma Aux_register(0x80013034, name=>"io_i2c_slv0_raw_intr_stat") + +// User extension aux register io_i2c_slv0_rx_tl +#define AR_IO_I2C_SLV0_RX_TL 0x80013038 +#pragma Aux_register(0x80013038, name=>"io_i2c_slv0_rx_tl") + +// User extension aux register io_i2c_slv0_tx_tl +#define AR_IO_I2C_SLV0_TX_TL 0x8001303c +#pragma Aux_register(0x8001303c, name=>"io_i2c_slv0_tx_tl") + +// User extension aux register io_i2c_slv0_clr_intr +#define AR_IO_I2C_SLV0_CLR_INTR 0x80013040 +#pragma Aux_register(0x80013040, name=>"io_i2c_slv0_clr_intr") + +// User extension aux register io_i2c_slv0_clr_rx_under +#define AR_IO_I2C_SLV0_CLR_RX_UNDER 0x80013044 +#pragma Aux_register(0x80013044, name=>"io_i2c_slv0_clr_rx_under") + +// User extension aux register io_i2c_slv0_clr_rx_over +#define AR_IO_I2C_SLV0_CLR_RX_OVER 0x80013048 +#pragma Aux_register(0x80013048, name=>"io_i2c_slv0_clr_rx_over") + +// User extension aux register io_i2c_slv0_clr_tx_over +#define AR_IO_I2C_SLV0_CLR_TX_OVER 0x8001304c +#pragma Aux_register(0x8001304c, name=>"io_i2c_slv0_clr_tx_over") + +// User extension aux register io_i2c_slv0_clr_rd_req +#define AR_IO_I2C_SLV0_CLR_RD_REQ 0x80013050 +#pragma Aux_register(0x80013050, name=>"io_i2c_slv0_clr_rd_req") + +// User extension aux register io_i2c_slv0_clr_tx_abrt +#define AR_IO_I2C_SLV0_CLR_TX_ABRT 0x80013054 +#pragma Aux_register(0x80013054, name=>"io_i2c_slv0_clr_tx_abrt") + +// User extension aux register io_i2c_slv0_clr_rx_done +#define AR_IO_I2C_SLV0_CLR_RX_DONE 0x80013058 +#pragma Aux_register(0x80013058, name=>"io_i2c_slv0_clr_rx_done") + +// User extension aux register io_i2c_slv0_clr_activity +#define AR_IO_I2C_SLV0_CLR_ACTIVITY 0x8001305c +#pragma Aux_register(0x8001305c, name=>"io_i2c_slv0_clr_activity") + +// User extension aux register io_i2c_slv0_clr_stop_det +#define AR_IO_I2C_SLV0_CLR_STOP_DET 0x80013060 +#pragma Aux_register(0x80013060, name=>"io_i2c_slv0_clr_stop_det") + +// User extension aux register io_i2c_slv0_clr_start_det +#define AR_IO_I2C_SLV0_CLR_START_DET 0x80013064 +#pragma Aux_register(0x80013064, name=>"io_i2c_slv0_clr_start_det") + +// User extension aux register io_i2c_slv0_enable +#define AR_IO_I2C_SLV0_ENABLE 0x8001306c +#pragma Aux_register(0x8001306c, name=>"io_i2c_slv0_enable") + +// User extension aux register io_i2c_slv0_status +#define AR_IO_I2C_SLV0_STATUS 0x80013070 +#pragma Aux_register(0x80013070, name=>"io_i2c_slv0_status") + +// User extension aux register io_i2c_slv0_txflr +#define AR_IO_I2C_SLV0_TXFLR 0x80013074 +#pragma Aux_register(0x80013074, name=>"io_i2c_slv0_txflr") + +// User extension aux register io_i2c_slv0_rxflr +#define AR_IO_I2C_SLV0_RXFLR 0x80013078 +#pragma Aux_register(0x80013078, name=>"io_i2c_slv0_rxflr") + +// User extension aux register io_i2c_slv0_sda_hold +#define AR_IO_I2C_SLV0_SDA_HOLD 0x8001307c +#pragma Aux_register(0x8001307c, name=>"io_i2c_slv0_sda_hold") + +// User extension aux register io_i2c_slv0_tx_abrt_source +#define AR_IO_I2C_SLV0_TX_ABRT_SOURCE 0x80013080 +#pragma Aux_register(0x80013080, name=>"io_i2c_slv0_tx_abrt_source") + +// User extension aux register io_i2c_slv0_sda_setup +#define AR_IO_I2C_SLV0_SDA_SETUP 0x80013094 +#pragma Aux_register(0x80013094, name=>"io_i2c_slv0_sda_setup") + +// User extension aux register io_i2c_slv0_enable_status +#define AR_IO_I2C_SLV0_ENABLE_STATUS 0x8001309c +#pragma Aux_register(0x8001309c, name=>"io_i2c_slv0_enable_status") + +// User extension aux register io_i2c_slv0_fs_spklen +#define AR_IO_I2C_SLV0_FS_SPKLEN 0x800130a0 +#pragma Aux_register(0x800130a0, name=>"io_i2c_slv0_fs_spklen") + +// User extension aux register io_i2c_slv0_clr_restart_det +#define AR_IO_I2C_SLV0_CLR_RESTART_DET 0x800130a8 +#pragma Aux_register(0x800130a8, name=>"io_i2c_slv0_clr_restart_det") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST0_PRESENT 1 + +// User extension aux register io_spi_mst0_ctrlr0 +#define AR_IO_SPI_MST0_CTRLR0 0x80010000 +#pragma Aux_register(0x80010000, name=>"io_spi_mst0_ctrlr0") + +// User extension aux register io_spi_mst0_ctrlr1 +#define AR_IO_SPI_MST0_CTRLR1 0x80010001 +#pragma Aux_register(0x80010001, name=>"io_spi_mst0_ctrlr1") + +// User extension aux register io_spi_mst0_spien +#define AR_IO_SPI_MST0_SPIEN 0x80010002 +#pragma Aux_register(0x80010002, name=>"io_spi_mst0_spien") + +// User extension aux register io_spi_mst0_ser +#define AR_IO_SPI_MST0_SER 0x80010004 +#pragma Aux_register(0x80010004, name=>"io_spi_mst0_ser") + +// User extension aux register io_spi_mst0_baudr +#define AR_IO_SPI_MST0_BAUDR 0x80010005 +#pragma Aux_register(0x80010005, name=>"io_spi_mst0_baudr") + +// User extension aux register io_spi_mst0_txftlr +#define AR_IO_SPI_MST0_TXFTLR 0x80010006 +#pragma Aux_register(0x80010006, name=>"io_spi_mst0_txftlr") + +// User extension aux register io_spi_mst0_rxftlr +#define AR_IO_SPI_MST0_RXFTLR 0x80010007 +#pragma Aux_register(0x80010007, name=>"io_spi_mst0_rxftlr") + +// User extension aux register io_spi_mst0_txflr +#define AR_IO_SPI_MST0_TXFLR 0x80010008 +#pragma Aux_register(0x80010008, name=>"io_spi_mst0_txflr") + +// User extension aux register io_spi_mst0_rxflr +#define AR_IO_SPI_MST0_RXFLR 0x80010009 +#pragma Aux_register(0x80010009, name=>"io_spi_mst0_rxflr") + +// User extension aux register io_spi_mst0_sr +#define AR_IO_SPI_MST0_SR 0x8001000a +#pragma Aux_register(0x8001000a, name=>"io_spi_mst0_sr") + +// User extension aux register io_spi_mst0_imr +#define AR_IO_SPI_MST0_IMR 0x8001000b +#pragma Aux_register(0x8001000b, name=>"io_spi_mst0_imr") + +// User extension aux register io_spi_mst0_isr +#define AR_IO_SPI_MST0_ISR 0x8001000c +#pragma Aux_register(0x8001000c, name=>"io_spi_mst0_isr") + +// User extension aux register io_spi_mst0_risr +#define AR_IO_SPI_MST0_RISR 0x8001000d +#pragma Aux_register(0x8001000d, name=>"io_spi_mst0_risr") + +// User extension aux register io_spi_mst0_txoicr +#define AR_IO_SPI_MST0_TXOICR 0x8001000e +#pragma Aux_register(0x8001000e, name=>"io_spi_mst0_txoicr") + +// User extension aux register io_spi_mst0_rxoicr +#define AR_IO_SPI_MST0_RXOICR 0x8001000f +#pragma Aux_register(0x8001000f, name=>"io_spi_mst0_rxoicr") + +// User extension aux register io_spi_mst0_rxuicr +#define AR_IO_SPI_MST0_RXUICR 0x80010010 +#pragma Aux_register(0x80010010, name=>"io_spi_mst0_rxuicr") + +// User extension aux register io_spi_mst0_icr +#define AR_IO_SPI_MST0_ICR 0x80010012 +#pragma Aux_register(0x80010012, name=>"io_spi_mst0_icr") + +// User extension aux register io_spi_mst0_clken +#define AR_IO_SPI_MST0_CLKEN 0x80010016 +#pragma Aux_register(0x80010016, name=>"io_spi_mst0_clken") + +// User extension aux register io_spi_mst0_dr +#define AR_IO_SPI_MST0_DR 0x80010018 +#pragma Aux_register(0x80010018, name=>"io_spi_mst0_dr") + +// User extension aux register io_spi_mst0_rx_sample_dly +#define AR_IO_SPI_MST0_RX_SAMPLE_DLY 0x8001003c +#pragma Aux_register(0x8001003c, name=>"io_spi_mst0_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_SUBSYS_BCR_PRESENT 1 + +// User extension aux register SUBSYS_BUILD +#define AR_SUBSYS_BUILD 0xf0 +#pragma Aux_register(0xf0, name=>"SUBSYS_BUILD") + +// User extension aux register SUBSYS_DSP_0_BUILD +#define AR_SUBSYS_DSP_0_BUILD 0xa00 +#pragma Aux_register(0xa00, name=>"SUBSYS_DSP_0_BUILD") + +// User extension aux register SUBSYS_DSP_0_CONFIG +#define AR_SUBSYS_DSP_0_CONFIG 0xa02 +#pragma Aux_register(0xa02, name=>"SUBSYS_DSP_0_CONFIG") + +// User extension aux register SUBSYS_IO_0_BUILD +#define AR_SUBSYS_IO_0_BUILD 0xa04 +#pragma Aux_register(0xa04, name=>"SUBSYS_IO_0_BUILD") + +// User extension aux register SUBSYS_IO_1_BUILD +#define AR_SUBSYS_IO_1_BUILD 0xa05 +#pragma Aux_register(0xa05, name=>"SUBSYS_IO_1_BUILD") + +// User extension aux register SUBSYS_IO_2_BUILD +#define AR_SUBSYS_IO_2_BUILD 0xa06 +#pragma Aux_register(0xa06, name=>"SUBSYS_IO_2_BUILD") + +// User extension aux register SUBSYS_UAUX_OFFSET +#define AR_SUBSYS_UAUX_OFFSET 0xa1e +#pragma Aux_register(0xa1e, name=>"SUBSYS_UAUX_OFFSET") + +// User extension aux register SUBSYS_APEX_OFFSET +#define AR_SUBSYS_APEX_OFFSET 0xa1f +#pragma Aux_register(0xa1f, name=>"SUBSYS_APEX_OFFSET") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST1_PRESENT 1 + +// User extension aux register io_spi_mst1_ctrlr0 +#define AR_IO_SPI_MST1_CTRLR0 0x80010100 +#pragma Aux_register(0x80010100, name=>"io_spi_mst1_ctrlr0") + +// User extension aux register io_spi_mst1_ctrlr1 +#define AR_IO_SPI_MST1_CTRLR1 0x80010101 +#pragma Aux_register(0x80010101, name=>"io_spi_mst1_ctrlr1") + +// User extension aux register io_spi_mst1_spien +#define AR_IO_SPI_MST1_SPIEN 0x80010102 +#pragma Aux_register(0x80010102, name=>"io_spi_mst1_spien") + +// User extension aux register io_spi_mst1_ser +#define AR_IO_SPI_MST1_SER 0x80010104 +#pragma Aux_register(0x80010104, name=>"io_spi_mst1_ser") + +// User extension aux register io_spi_mst1_baudr +#define AR_IO_SPI_MST1_BAUDR 0x80010105 +#pragma Aux_register(0x80010105, name=>"io_spi_mst1_baudr") + +// User extension aux register io_spi_mst1_txftlr +#define AR_IO_SPI_MST1_TXFTLR 0x80010106 +#pragma Aux_register(0x80010106, name=>"io_spi_mst1_txftlr") + +// User extension aux register io_spi_mst1_rxftlr +#define AR_IO_SPI_MST1_RXFTLR 0x80010107 +#pragma Aux_register(0x80010107, name=>"io_spi_mst1_rxftlr") + +// User extension aux register io_spi_mst1_txflr +#define AR_IO_SPI_MST1_TXFLR 0x80010108 +#pragma Aux_register(0x80010108, name=>"io_spi_mst1_txflr") + +// User extension aux register io_spi_mst1_rxflr +#define AR_IO_SPI_MST1_RXFLR 0x80010109 +#pragma Aux_register(0x80010109, name=>"io_spi_mst1_rxflr") + +// User extension aux register io_spi_mst1_sr +#define AR_IO_SPI_MST1_SR 0x8001010a +#pragma Aux_register(0x8001010a, name=>"io_spi_mst1_sr") + +// User extension aux register io_spi_mst1_imr +#define AR_IO_SPI_MST1_IMR 0x8001010b +#pragma Aux_register(0x8001010b, name=>"io_spi_mst1_imr") + +// User extension aux register io_spi_mst1_isr +#define AR_IO_SPI_MST1_ISR 0x8001010c +#pragma Aux_register(0x8001010c, name=>"io_spi_mst1_isr") + +// User extension aux register io_spi_mst1_risr +#define AR_IO_SPI_MST1_RISR 0x8001010d +#pragma Aux_register(0x8001010d, name=>"io_spi_mst1_risr") + +// User extension aux register io_spi_mst1_txoicr +#define AR_IO_SPI_MST1_TXOICR 0x8001010e +#pragma Aux_register(0x8001010e, name=>"io_spi_mst1_txoicr") + +// User extension aux register io_spi_mst1_rxoicr +#define AR_IO_SPI_MST1_RXOICR 0x8001010f +#pragma Aux_register(0x8001010f, name=>"io_spi_mst1_rxoicr") + +// User extension aux register io_spi_mst1_rxuicr +#define AR_IO_SPI_MST1_RXUICR 0x80010110 +#pragma Aux_register(0x80010110, name=>"io_spi_mst1_rxuicr") + +// User extension aux register io_spi_mst1_icr +#define AR_IO_SPI_MST1_ICR 0x80010112 +#pragma Aux_register(0x80010112, name=>"io_spi_mst1_icr") + +// User extension aux register io_spi_mst1_clken +#define AR_IO_SPI_MST1_CLKEN 0x80010116 +#pragma Aux_register(0x80010116, name=>"io_spi_mst1_clken") + +// User extension aux register io_spi_mst1_dr +#define AR_IO_SPI_MST1_DR 0x80010118 +#pragma Aux_register(0x80010118, name=>"io_spi_mst1_dr") + +// User extension aux register io_spi_mst1_rx_sample_dly +#define AR_IO_SPI_MST1_RX_SAMPLE_DLY 0x8001013c +#pragma Aux_register(0x8001013c, name=>"io_spi_mst1_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST2_PRESENT 1 + +// User extension aux register io_spi_mst2_ctrlr0 +#define AR_IO_SPI_MST2_CTRLR0 0x80010200 +#pragma Aux_register(0x80010200, name=>"io_spi_mst2_ctrlr0") + +// User extension aux register io_spi_mst2_ctrlr1 +#define AR_IO_SPI_MST2_CTRLR1 0x80010201 +#pragma Aux_register(0x80010201, name=>"io_spi_mst2_ctrlr1") + +// User extension aux register io_spi_mst2_spien +#define AR_IO_SPI_MST2_SPIEN 0x80010202 +#pragma Aux_register(0x80010202, name=>"io_spi_mst2_spien") + +// User extension aux register io_spi_mst2_ser +#define AR_IO_SPI_MST2_SER 0x80010204 +#pragma Aux_register(0x80010204, name=>"io_spi_mst2_ser") + +// User extension aux register io_spi_mst2_baudr +#define AR_IO_SPI_MST2_BAUDR 0x80010205 +#pragma Aux_register(0x80010205, name=>"io_spi_mst2_baudr") + +// User extension aux register io_spi_mst2_txftlr +#define AR_IO_SPI_MST2_TXFTLR 0x80010206 +#pragma Aux_register(0x80010206, name=>"io_spi_mst2_txftlr") + +// User extension aux register io_spi_mst2_rxftlr +#define AR_IO_SPI_MST2_RXFTLR 0x80010207 +#pragma Aux_register(0x80010207, name=>"io_spi_mst2_rxftlr") + +// User extension aux register io_spi_mst2_txflr +#define AR_IO_SPI_MST2_TXFLR 0x80010208 +#pragma Aux_register(0x80010208, name=>"io_spi_mst2_txflr") + +// User extension aux register io_spi_mst2_rxflr +#define AR_IO_SPI_MST2_RXFLR 0x80010209 +#pragma Aux_register(0x80010209, name=>"io_spi_mst2_rxflr") + +// User extension aux register io_spi_mst2_sr +#define AR_IO_SPI_MST2_SR 0x8001020a +#pragma Aux_register(0x8001020a, name=>"io_spi_mst2_sr") + +// User extension aux register io_spi_mst2_imr +#define AR_IO_SPI_MST2_IMR 0x8001020b +#pragma Aux_register(0x8001020b, name=>"io_spi_mst2_imr") + +// User extension aux register io_spi_mst2_isr +#define AR_IO_SPI_MST2_ISR 0x8001020c +#pragma Aux_register(0x8001020c, name=>"io_spi_mst2_isr") + +// User extension aux register io_spi_mst2_risr +#define AR_IO_SPI_MST2_RISR 0x8001020d +#pragma Aux_register(0x8001020d, name=>"io_spi_mst2_risr") + +// User extension aux register io_spi_mst2_txoicr +#define AR_IO_SPI_MST2_TXOICR 0x8001020e +#pragma Aux_register(0x8001020e, name=>"io_spi_mst2_txoicr") + +// User extension aux register io_spi_mst2_rxoicr +#define AR_IO_SPI_MST2_RXOICR 0x8001020f +#pragma Aux_register(0x8001020f, name=>"io_spi_mst2_rxoicr") + +// User extension aux register io_spi_mst2_rxuicr +#define AR_IO_SPI_MST2_RXUICR 0x80010210 +#pragma Aux_register(0x80010210, name=>"io_spi_mst2_rxuicr") + +// User extension aux register io_spi_mst2_icr +#define AR_IO_SPI_MST2_ICR 0x80010212 +#pragma Aux_register(0x80010212, name=>"io_spi_mst2_icr") + +// User extension aux register io_spi_mst2_clken +#define AR_IO_SPI_MST2_CLKEN 0x80010216 +#pragma Aux_register(0x80010216, name=>"io_spi_mst2_clken") + +// User extension aux register io_spi_mst2_dr +#define AR_IO_SPI_MST2_DR 0x80010218 +#pragma Aux_register(0x80010218, name=>"io_spi_mst2_dr") + +// User extension aux register io_spi_mst2_rx_sample_dly +#define AR_IO_SPI_MST2_RX_SAMPLE_DLY 0x8001023c +#pragma Aux_register(0x8001023c, name=>"io_spi_mst2_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_SLV0_PRESENT 1 + +// User extension aux register io_spi_slv0_ctrlr0 +#define AR_IO_SPI_SLV0_CTRLR0 0x80011000 +#pragma Aux_register(0x80011000, name=>"io_spi_slv0_ctrlr0") + +// User extension aux register io_spi_slv0_spien +#define AR_IO_SPI_SLV0_SPIEN 0x80011002 +#pragma Aux_register(0x80011002, name=>"io_spi_slv0_spien") + +// User extension aux register io_spi_slv0_txftlr +#define AR_IO_SPI_SLV0_TXFTLR 0x80011006 +#pragma Aux_register(0x80011006, name=>"io_spi_slv0_txftlr") + +// User extension aux register io_spi_slv0_rxftlr +#define AR_IO_SPI_SLV0_RXFTLR 0x80011007 +#pragma Aux_register(0x80011007, name=>"io_spi_slv0_rxftlr") + +// User extension aux register io_spi_slv0_txflr +#define AR_IO_SPI_SLV0_TXFLR 0x80011008 +#pragma Aux_register(0x80011008, name=>"io_spi_slv0_txflr") + +// User extension aux register io_spi_slv0_rxflr +#define AR_IO_SPI_SLV0_RXFLR 0x80011009 +#pragma Aux_register(0x80011009, name=>"io_spi_slv0_rxflr") + +// User extension aux register io_spi_slv0_sr +#define AR_IO_SPI_SLV0_SR 0x8001100a +#pragma Aux_register(0x8001100a, name=>"io_spi_slv0_sr") + +// User extension aux register io_spi_slv0_imr +#define AR_IO_SPI_SLV0_IMR 0x8001100b +#pragma Aux_register(0x8001100b, name=>"io_spi_slv0_imr") + +// User extension aux register io_spi_slv0_isr +#define AR_IO_SPI_SLV0_ISR 0x8001100c +#pragma Aux_register(0x8001100c, name=>"io_spi_slv0_isr") + +// User extension aux register io_spi_slv0_risr +#define AR_IO_SPI_SLV0_RISR 0x8001100d +#pragma Aux_register(0x8001100d, name=>"io_spi_slv0_risr") + +// User extension aux register io_spi_slv0_txoicr +#define AR_IO_SPI_SLV0_TXOICR 0x8001100e +#pragma Aux_register(0x8001100e, name=>"io_spi_slv0_txoicr") + +// User extension aux register io_spi_slv0_rxoicr +#define AR_IO_SPI_SLV0_RXOICR 0x8001100f +#pragma Aux_register(0x8001100f, name=>"io_spi_slv0_rxoicr") + +// User extension aux register io_spi_slv0_rxuicr +#define AR_IO_SPI_SLV0_RXUICR 0x80011010 +#pragma Aux_register(0x80011010, name=>"io_spi_slv0_rxuicr") + +// User extension aux register io_spi_slv0_icr +#define AR_IO_SPI_SLV0_ICR 0x80011012 +#pragma Aux_register(0x80011012, name=>"io_spi_slv0_icr") + +// User extension aux register io_spi_slv0_clken +#define AR_IO_SPI_SLV0_CLKEN 0x80011016 +#pragma Aux_register(0x80011016, name=>"io_spi_slv0_clken") + +// User extension aux register io_spi_slv0_dr +#define AR_IO_SPI_SLV0_DR 0x80011018 +#pragma Aux_register(0x80011018, name=>"io_spi_slv0_dr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO1_PRESENT 1 + +// User extension aux register io_gpio1_debounce +#define AR_IO_GPIO1_DEBOUNCE 0x80017148 +#pragma Aux_register(0x80017148, name=>"io_gpio1_debounce") + +// User extension aux register io_gpio1_clken +#define AR_IO_GPIO1_CLKEN 0x80017180 +#pragma Aux_register(0x80017180, name=>"io_gpio1_clken") + +// User extension aux register io_gpio1_swporta_dr +#define AR_IO_GPIO1_SWPORTA_DR 0x80017100 +#pragma Aux_register(0x80017100, name=>"io_gpio1_swporta_dr") + +// User extension aux register io_gpio1_swporta_ddr +#define AR_IO_GPIO1_SWPORTA_DDR 0x80017104 +#pragma Aux_register(0x80017104, name=>"io_gpio1_swporta_ddr") + +// User extension aux register io_gpio1_inten +#define AR_IO_GPIO1_INTEN 0x80017130 +#pragma Aux_register(0x80017130, name=>"io_gpio1_inten") + +// User extension aux register io_gpio1_intmask +#define AR_IO_GPIO1_INTMASK 0x80017134 +#pragma Aux_register(0x80017134, name=>"io_gpio1_intmask") + +// User extension aux register io_gpio1_inttype_level +#define AR_IO_GPIO1_INTTYPE_LEVEL 0x80017138 +#pragma Aux_register(0x80017138, name=>"io_gpio1_inttype_level") + +// User extension aux register io_gpio1_int_polarity +#define AR_IO_GPIO1_INT_POLARITY 0x8001713c +#pragma Aux_register(0x8001713c, name=>"io_gpio1_int_polarity") + +// User extension aux register io_gpio1_intstatus +#define AR_IO_GPIO1_INTSTATUS 0x80017140 +#pragma Aux_register(0x80017140, name=>"io_gpio1_intstatus") + +// User extension aux register io_gpio1_raw_intstatus +#define AR_IO_GPIO1_RAW_INTSTATUS 0x80017144 +#pragma Aux_register(0x80017144, name=>"io_gpio1_raw_intstatus") + +// User extension aux register io_gpio1_porta_eoi +#define AR_IO_GPIO1_PORTA_EOI 0x8001714c +#pragma Aux_register(0x8001714c, name=>"io_gpio1_porta_eoi") + +// User extension aux register io_gpio1_ext_porta +#define AR_IO_GPIO1_EXT_PORTA 0x80017150 +#pragma Aux_register(0x80017150, name=>"io_gpio1_ext_porta") + +// User extension aux register io_gpio1_ls_sync +#define AR_IO_GPIO1_LS_SYNC 0x80017160 +#pragma Aux_register(0x80017160, name=>"io_gpio1_ls_sync") + +// User extension aux register io_gpio1_int_bothedge +#define AR_IO_GPIO1_INT_BOTHEDGE 0x80017168 +#pragma Aux_register(0x80017168, name=>"io_gpio1_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO2_PRESENT 1 + +// User extension aux register io_gpio2_debounce +#define AR_IO_GPIO2_DEBOUNCE 0x80017248 +#pragma Aux_register(0x80017248, name=>"io_gpio2_debounce") + +// User extension aux register io_gpio2_clken +#define AR_IO_GPIO2_CLKEN 0x80017280 +#pragma Aux_register(0x80017280, name=>"io_gpio2_clken") + +// User extension aux register io_gpio2_swporta_dr +#define AR_IO_GPIO2_SWPORTA_DR 0x80017200 +#pragma Aux_register(0x80017200, name=>"io_gpio2_swporta_dr") + +// User extension aux register io_gpio2_swporta_ddr +#define AR_IO_GPIO2_SWPORTA_DDR 0x80017204 +#pragma Aux_register(0x80017204, name=>"io_gpio2_swporta_ddr") + +// User extension aux register io_gpio2_inten +#define AR_IO_GPIO2_INTEN 0x80017230 +#pragma Aux_register(0x80017230, name=>"io_gpio2_inten") + +// User extension aux register io_gpio2_intmask +#define AR_IO_GPIO2_INTMASK 0x80017234 +#pragma Aux_register(0x80017234, name=>"io_gpio2_intmask") + +// User extension aux register io_gpio2_inttype_level +#define AR_IO_GPIO2_INTTYPE_LEVEL 0x80017238 +#pragma Aux_register(0x80017238, name=>"io_gpio2_inttype_level") + +// User extension aux register io_gpio2_int_polarity +#define AR_IO_GPIO2_INT_POLARITY 0x8001723c +#pragma Aux_register(0x8001723c, name=>"io_gpio2_int_polarity") + +// User extension aux register io_gpio2_intstatus +#define AR_IO_GPIO2_INTSTATUS 0x80017240 +#pragma Aux_register(0x80017240, name=>"io_gpio2_intstatus") + +// User extension aux register io_gpio2_raw_intstatus +#define AR_IO_GPIO2_RAW_INTSTATUS 0x80017244 +#pragma Aux_register(0x80017244, name=>"io_gpio2_raw_intstatus") + +// User extension aux register io_gpio2_porta_eoi +#define AR_IO_GPIO2_PORTA_EOI 0x8001724c +#pragma Aux_register(0x8001724c, name=>"io_gpio2_porta_eoi") + +// User extension aux register io_gpio2_ext_porta +#define AR_IO_GPIO2_EXT_PORTA 0x80017250 +#pragma Aux_register(0x80017250, name=>"io_gpio2_ext_porta") + +// User extension aux register io_gpio2_ls_sync +#define AR_IO_GPIO2_LS_SYNC 0x80017260 +#pragma Aux_register(0x80017260, name=>"io_gpio2_ls_sync") + +// User extension aux register io_gpio2_int_bothedge +#define AR_IO_GPIO2_INT_BOTHEDGE 0x80017268 +#pragma Aux_register(0x80017268, name=>"io_gpio2_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST1_PRESENT 1 + +// User extension aux register io_i2c_mst1_clken +#define AR_IO_I2C_MST1_CLKEN 0x800121c0 +#pragma Aux_register(0x800121c0, name=>"io_i2c_mst1_clken") + +// User extension aux register io_i2c_mst1_con +#define AR_IO_I2C_MST1_CON 0x80012100 +#pragma Aux_register(0x80012100, name=>"io_i2c_mst1_con") + +// User extension aux register io_i2c_mst1_tar +#define AR_IO_I2C_MST1_TAR 0x80012104 +#pragma Aux_register(0x80012104, name=>"io_i2c_mst1_tar") + +// User extension aux register io_i2c_mst1_data_cmd +#define AR_IO_I2C_MST1_DATA_CMD 0x80012110 +#pragma Aux_register(0x80012110, name=>"io_i2c_mst1_data_cmd") + +// User extension aux register io_i2c_mst1_ss_scl_hcnt +#define AR_IO_I2C_MST1_SS_SCL_HCNT 0x80012114 +#pragma Aux_register(0x80012114, name=>"io_i2c_mst1_ss_scl_hcnt") + +// User extension aux register io_i2c_mst1_ss_scl_lcnt +#define AR_IO_I2C_MST1_SS_SCL_LCNT 0x80012118 +#pragma Aux_register(0x80012118, name=>"io_i2c_mst1_ss_scl_lcnt") + +// User extension aux register io_i2c_mst1_fs_scl_hcnt +#define AR_IO_I2C_MST1_FS_SCL_HCNT 0x8001211c +#pragma Aux_register(0x8001211c, name=>"io_i2c_mst1_fs_scl_hcnt") + +// User extension aux register io_i2c_mst1_fs_scl_lcnt +#define AR_IO_I2C_MST1_FS_SCL_LCNT 0x80012120 +#pragma Aux_register(0x80012120, name=>"io_i2c_mst1_fs_scl_lcnt") + +// User extension aux register io_i2c_mst1_intr_stat +#define AR_IO_I2C_MST1_INTR_STAT 0x8001212c +#pragma Aux_register(0x8001212c, name=>"io_i2c_mst1_intr_stat") + +// User extension aux register io_i2c_mst1_intr_mask +#define AR_IO_I2C_MST1_INTR_MASK 0x80012130 +#pragma Aux_register(0x80012130, name=>"io_i2c_mst1_intr_mask") + +// User extension aux register io_i2c_mst1_raw_intr_stat +#define AR_IO_I2C_MST1_RAW_INTR_STAT 0x80012134 +#pragma Aux_register(0x80012134, name=>"io_i2c_mst1_raw_intr_stat") + +// User extension aux register io_i2c_mst1_rx_tl +#define AR_IO_I2C_MST1_RX_TL 0x80012138 +#pragma Aux_register(0x80012138, name=>"io_i2c_mst1_rx_tl") + +// User extension aux register io_i2c_mst1_tx_tl +#define AR_IO_I2C_MST1_TX_TL 0x8001213c +#pragma Aux_register(0x8001213c, name=>"io_i2c_mst1_tx_tl") + +// User extension aux register io_i2c_mst1_clr_intr +#define AR_IO_I2C_MST1_CLR_INTR 0x80012140 +#pragma Aux_register(0x80012140, name=>"io_i2c_mst1_clr_intr") + +// User extension aux register io_i2c_mst1_clr_rx_under +#define AR_IO_I2C_MST1_CLR_RX_UNDER 0x80012144 +#pragma Aux_register(0x80012144, name=>"io_i2c_mst1_clr_rx_under") + +// User extension aux register io_i2c_mst1_clr_rx_over +#define AR_IO_I2C_MST1_CLR_RX_OVER 0x80012148 +#pragma Aux_register(0x80012148, name=>"io_i2c_mst1_clr_rx_over") + +// User extension aux register io_i2c_mst1_clr_tx_over +#define AR_IO_I2C_MST1_CLR_TX_OVER 0x8001214c +#pragma Aux_register(0x8001214c, name=>"io_i2c_mst1_clr_tx_over") + +// User extension aux register io_i2c_mst1_clr_tx_abrt +#define AR_IO_I2C_MST1_CLR_TX_ABRT 0x80012154 +#pragma Aux_register(0x80012154, name=>"io_i2c_mst1_clr_tx_abrt") + +// User extension aux register io_i2c_mst1_clr_activity +#define AR_IO_I2C_MST1_CLR_ACTIVITY 0x8001215c +#pragma Aux_register(0x8001215c, name=>"io_i2c_mst1_clr_activity") + +// User extension aux register io_i2c_mst1_clr_stop_det +#define AR_IO_I2C_MST1_CLR_STOP_DET 0x80012160 +#pragma Aux_register(0x80012160, name=>"io_i2c_mst1_clr_stop_det") + +// User extension aux register io_i2c_mst1_clr_start_det +#define AR_IO_I2C_MST1_CLR_START_DET 0x80012164 +#pragma Aux_register(0x80012164, name=>"io_i2c_mst1_clr_start_det") + +// User extension aux register io_i2c_mst1_enable +#define AR_IO_I2C_MST1_ENABLE 0x8001216c +#pragma Aux_register(0x8001216c, name=>"io_i2c_mst1_enable") + +// User extension aux register io_i2c_mst1_status +#define AR_IO_I2C_MST1_STATUS 0x80012170 +#pragma Aux_register(0x80012170, name=>"io_i2c_mst1_status") + +// User extension aux register io_i2c_mst1_txflr +#define AR_IO_I2C_MST1_TXFLR 0x80012174 +#pragma Aux_register(0x80012174, name=>"io_i2c_mst1_txflr") + +// User extension aux register io_i2c_mst1_rxflr +#define AR_IO_I2C_MST1_RXFLR 0x80012178 +#pragma Aux_register(0x80012178, name=>"io_i2c_mst1_rxflr") + +// User extension aux register io_i2c_mst1_sda_hold +#define AR_IO_I2C_MST1_SDA_HOLD 0x8001217c +#pragma Aux_register(0x8001217c, name=>"io_i2c_mst1_sda_hold") + +// User extension aux register io_i2c_mst1_tx_abrt_source +#define AR_IO_I2C_MST1_TX_ABRT_SOURCE 0x80012180 +#pragma Aux_register(0x80012180, name=>"io_i2c_mst1_tx_abrt_source") + +// User extension aux register io_i2c_mst1_enable_status +#define AR_IO_I2C_MST1_ENABLE_STATUS 0x8001219c +#pragma Aux_register(0x8001219c, name=>"io_i2c_mst1_enable_status") + +// User extension aux register io_i2c_mst1_fs_spklen +#define AR_IO_I2C_MST1_FS_SPKLEN 0x800121a0 +#pragma Aux_register(0x800121a0, name=>"io_i2c_mst1_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST2_PRESENT 1 + +// User extension aux register io_i2c_mst2_clken +#define AR_IO_I2C_MST2_CLKEN 0x800122c0 +#pragma Aux_register(0x800122c0, name=>"io_i2c_mst2_clken") + +// User extension aux register io_i2c_mst2_con +#define AR_IO_I2C_MST2_CON 0x80012200 +#pragma Aux_register(0x80012200, name=>"io_i2c_mst2_con") + +// User extension aux register io_i2c_mst2_tar +#define AR_IO_I2C_MST2_TAR 0x80012204 +#pragma Aux_register(0x80012204, name=>"io_i2c_mst2_tar") + +// User extension aux register io_i2c_mst2_data_cmd +#define AR_IO_I2C_MST2_DATA_CMD 0x80012210 +#pragma Aux_register(0x80012210, name=>"io_i2c_mst2_data_cmd") + +// User extension aux register io_i2c_mst2_ss_scl_hcnt +#define AR_IO_I2C_MST2_SS_SCL_HCNT 0x80012214 +#pragma Aux_register(0x80012214, name=>"io_i2c_mst2_ss_scl_hcnt") + +// User extension aux register io_i2c_mst2_ss_scl_lcnt +#define AR_IO_I2C_MST2_SS_SCL_LCNT 0x80012218 +#pragma Aux_register(0x80012218, name=>"io_i2c_mst2_ss_scl_lcnt") + +// User extension aux register io_i2c_mst2_fs_scl_hcnt +#define AR_IO_I2C_MST2_FS_SCL_HCNT 0x8001221c +#pragma Aux_register(0x8001221c, name=>"io_i2c_mst2_fs_scl_hcnt") + +// User extension aux register io_i2c_mst2_fs_scl_lcnt +#define AR_IO_I2C_MST2_FS_SCL_LCNT 0x80012220 +#pragma Aux_register(0x80012220, name=>"io_i2c_mst2_fs_scl_lcnt") + +// User extension aux register io_i2c_mst2_intr_stat +#define AR_IO_I2C_MST2_INTR_STAT 0x8001222c +#pragma Aux_register(0x8001222c, name=>"io_i2c_mst2_intr_stat") + +// User extension aux register io_i2c_mst2_intr_mask +#define AR_IO_I2C_MST2_INTR_MASK 0x80012230 +#pragma Aux_register(0x80012230, name=>"io_i2c_mst2_intr_mask") + +// User extension aux register io_i2c_mst2_raw_intr_stat +#define AR_IO_I2C_MST2_RAW_INTR_STAT 0x80012234 +#pragma Aux_register(0x80012234, name=>"io_i2c_mst2_raw_intr_stat") + +// User extension aux register io_i2c_mst2_rx_tl +#define AR_IO_I2C_MST2_RX_TL 0x80012238 +#pragma Aux_register(0x80012238, name=>"io_i2c_mst2_rx_tl") + +// User extension aux register io_i2c_mst2_tx_tl +#define AR_IO_I2C_MST2_TX_TL 0x8001223c +#pragma Aux_register(0x8001223c, name=>"io_i2c_mst2_tx_tl") + +// User extension aux register io_i2c_mst2_clr_intr +#define AR_IO_I2C_MST2_CLR_INTR 0x80012240 +#pragma Aux_register(0x80012240, name=>"io_i2c_mst2_clr_intr") + +// User extension aux register io_i2c_mst2_clr_rx_under +#define AR_IO_I2C_MST2_CLR_RX_UNDER 0x80012244 +#pragma Aux_register(0x80012244, name=>"io_i2c_mst2_clr_rx_under") + +// User extension aux register io_i2c_mst2_clr_rx_over +#define AR_IO_I2C_MST2_CLR_RX_OVER 0x80012248 +#pragma Aux_register(0x80012248, name=>"io_i2c_mst2_clr_rx_over") + +// User extension aux register io_i2c_mst2_clr_tx_over +#define AR_IO_I2C_MST2_CLR_TX_OVER 0x8001224c +#pragma Aux_register(0x8001224c, name=>"io_i2c_mst2_clr_tx_over") + +// User extension aux register io_i2c_mst2_clr_tx_abrt +#define AR_IO_I2C_MST2_CLR_TX_ABRT 0x80012254 +#pragma Aux_register(0x80012254, name=>"io_i2c_mst2_clr_tx_abrt") + +// User extension aux register io_i2c_mst2_clr_activity +#define AR_IO_I2C_MST2_CLR_ACTIVITY 0x8001225c +#pragma Aux_register(0x8001225c, name=>"io_i2c_mst2_clr_activity") + +// User extension aux register io_i2c_mst2_clr_stop_det +#define AR_IO_I2C_MST2_CLR_STOP_DET 0x80012260 +#pragma Aux_register(0x80012260, name=>"io_i2c_mst2_clr_stop_det") + +// User extension aux register io_i2c_mst2_clr_start_det +#define AR_IO_I2C_MST2_CLR_START_DET 0x80012264 +#pragma Aux_register(0x80012264, name=>"io_i2c_mst2_clr_start_det") + +// User extension aux register io_i2c_mst2_enable +#define AR_IO_I2C_MST2_ENABLE 0x8001226c +#pragma Aux_register(0x8001226c, name=>"io_i2c_mst2_enable") + +// User extension aux register io_i2c_mst2_status +#define AR_IO_I2C_MST2_STATUS 0x80012270 +#pragma Aux_register(0x80012270, name=>"io_i2c_mst2_status") + +// User extension aux register io_i2c_mst2_txflr +#define AR_IO_I2C_MST2_TXFLR 0x80012274 +#pragma Aux_register(0x80012274, name=>"io_i2c_mst2_txflr") + +// User extension aux register io_i2c_mst2_rxflr +#define AR_IO_I2C_MST2_RXFLR 0x80012278 +#pragma Aux_register(0x80012278, name=>"io_i2c_mst2_rxflr") + +// User extension aux register io_i2c_mst2_sda_hold +#define AR_IO_I2C_MST2_SDA_HOLD 0x8001227c +#pragma Aux_register(0x8001227c, name=>"io_i2c_mst2_sda_hold") + +// User extension aux register io_i2c_mst2_tx_abrt_source +#define AR_IO_I2C_MST2_TX_ABRT_SOURCE 0x80012280 +#pragma Aux_register(0x80012280, name=>"io_i2c_mst2_tx_abrt_source") + +// User extension aux register io_i2c_mst2_enable_status +#define AR_IO_I2C_MST2_ENABLE_STATUS 0x8001229c +#pragma Aux_register(0x8001229c, name=>"io_i2c_mst2_enable_status") + +// User extension aux register io_i2c_mst2_fs_spklen +#define AR_IO_I2C_MST2_FS_SPKLEN 0x800122a0 +#pragma Aux_register(0x800122a0, name=>"io_i2c_mst2_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART0_PRESENT 1 + +// User extension aux register io_uart0_clken +#define AR_IO_UART0_CLKEN 0x800140c0 +#pragma Aux_register(0x800140c0, name=>"io_uart0_clken") + +// User extension aux register io_uart0_rbr_thr_dll +#define AR_IO_UART0_RBR_THR_DLL 0x80014000 +#pragma Aux_register(0x80014000, name=>"io_uart0_rbr_thr_dll") + +// User extension aux register io_uart0_ier_dlh +#define AR_IO_UART0_IER_DLH 0x80014004 +#pragma Aux_register(0x80014004, name=>"io_uart0_ier_dlh") + +// User extension aux register io_uart0_iir_fcr +#define AR_IO_UART0_IIR_FCR 0x80014008 +#pragma Aux_register(0x80014008, name=>"io_uart0_iir_fcr") + +// User extension aux register io_uart0_lcr +#define AR_IO_UART0_LCR 0x8001400c +#pragma Aux_register(0x8001400c, name=>"io_uart0_lcr") + +// User extension aux register io_uart0_mcr +#define AR_IO_UART0_MCR 0x80014010 +#pragma Aux_register(0x80014010, name=>"io_uart0_mcr") + +// User extension aux register io_uart0_lsr +#define AR_IO_UART0_LSR 0x80014014 +#pragma Aux_register(0x80014014, name=>"io_uart0_lsr") + +// User extension aux register io_uart0_msr +#define AR_IO_UART0_MSR 0x80014018 +#pragma Aux_register(0x80014018, name=>"io_uart0_msr") + +// User extension aux register io_uart0_usr +#define AR_IO_UART0_USR 0x8001407c +#pragma Aux_register(0x8001407c, name=>"io_uart0_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART1_PRESENT 1 + +// User extension aux register io_uart1_clken +#define AR_IO_UART1_CLKEN 0x800141c0 +#pragma Aux_register(0x800141c0, name=>"io_uart1_clken") + +// User extension aux register io_uart1_rbr_thr_dll +#define AR_IO_UART1_RBR_THR_DLL 0x80014100 +#pragma Aux_register(0x80014100, name=>"io_uart1_rbr_thr_dll") + +// User extension aux register io_uart1_ier_dlh +#define AR_IO_UART1_IER_DLH 0x80014104 +#pragma Aux_register(0x80014104, name=>"io_uart1_ier_dlh") + +// User extension aux register io_uart1_iir_fcr +#define AR_IO_UART1_IIR_FCR 0x80014108 +#pragma Aux_register(0x80014108, name=>"io_uart1_iir_fcr") + +// User extension aux register io_uart1_lcr +#define AR_IO_UART1_LCR 0x8001410c +#pragma Aux_register(0x8001410c, name=>"io_uart1_lcr") + +// User extension aux register io_uart1_mcr +#define AR_IO_UART1_MCR 0x80014110 +#pragma Aux_register(0x80014110, name=>"io_uart1_mcr") + +// User extension aux register io_uart1_lsr +#define AR_IO_UART1_LSR 0x80014114 +#pragma Aux_register(0x80014114, name=>"io_uart1_lsr") + +// User extension aux register io_uart1_msr +#define AR_IO_UART1_MSR 0x80014118 +#pragma Aux_register(0x80014118, name=>"io_uart1_msr") + +// User extension aux register io_uart1_usr +#define AR_IO_UART1_USR 0x8001417c +#pragma Aux_register(0x8001417c, name=>"io_uart1_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART2_PRESENT 1 + +// User extension aux register io_uart2_clken +#define AR_IO_UART2_CLKEN 0x800142c0 +#pragma Aux_register(0x800142c0, name=>"io_uart2_clken") + +// User extension aux register io_uart2_rbr_thr_dll +#define AR_IO_UART2_RBR_THR_DLL 0x80014200 +#pragma Aux_register(0x80014200, name=>"io_uart2_rbr_thr_dll") + +// User extension aux register io_uart2_ier_dlh +#define AR_IO_UART2_IER_DLH 0x80014204 +#pragma Aux_register(0x80014204, name=>"io_uart2_ier_dlh") + +// User extension aux register io_uart2_iir_fcr +#define AR_IO_UART2_IIR_FCR 0x80014208 +#pragma Aux_register(0x80014208, name=>"io_uart2_iir_fcr") + +// User extension aux register io_uart2_lcr +#define AR_IO_UART2_LCR 0x8001420c +#pragma Aux_register(0x8001420c, name=>"io_uart2_lcr") + +// User extension aux register io_uart2_mcr +#define AR_IO_UART2_MCR 0x80014210 +#pragma Aux_register(0x80014210, name=>"io_uart2_mcr") + +// User extension aux register io_uart2_lsr +#define AR_IO_UART2_LSR 0x80014214 +#pragma Aux_register(0x80014214, name=>"io_uart2_lsr") + +// User extension aux register io_uart2_msr +#define AR_IO_UART2_MSR 0x80014218 +#pragma Aux_register(0x80014218, name=>"io_uart2_msr") + +// User extension aux register io_uart2_usr +#define AR_IO_UART2_USR 0x8001427c +#pragma Aux_register(0x8001427c, name=>"io_uart2_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART3_PRESENT 1 + +// User extension aux register io_uart3_clken +#define AR_IO_UART3_CLKEN 0x800143c0 +#pragma Aux_register(0x800143c0, name=>"io_uart3_clken") + +// User extension aux register io_uart3_rbr_thr_dll +#define AR_IO_UART3_RBR_THR_DLL 0x80014300 +#pragma Aux_register(0x80014300, name=>"io_uart3_rbr_thr_dll") + +// User extension aux register io_uart3_ier_dlh +#define AR_IO_UART3_IER_DLH 0x80014304 +#pragma Aux_register(0x80014304, name=>"io_uart3_ier_dlh") + +// User extension aux register io_uart3_iir_fcr +#define AR_IO_UART3_IIR_FCR 0x80014308 +#pragma Aux_register(0x80014308, name=>"io_uart3_iir_fcr") + +// User extension aux register io_uart3_lcr +#define AR_IO_UART3_LCR 0x8001430c +#pragma Aux_register(0x8001430c, name=>"io_uart3_lcr") + +// User extension aux register io_uart3_mcr +#define AR_IO_UART3_MCR 0x80014310 +#pragma Aux_register(0x80014310, name=>"io_uart3_mcr") + +// User extension aux register io_uart3_lsr +#define AR_IO_UART3_LSR 0x80014314 +#pragma Aux_register(0x80014314, name=>"io_uart3_lsr") + +// User extension aux register io_uart3_msr +#define AR_IO_UART3_MSR 0x80014318 +#pragma Aux_register(0x80014318, name=>"io_uart3_msr") + +// User extension aux register io_uart3_usr +#define AR_IO_UART3_USR 0x8001437c +#pragma Aux_register(0x8001437c, name=>"io_uart3_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2S_RX_MST0_PRESENT 1 + +// User extension aux register io_i2s_rx_mst0_ier +#define AR_IO_I2S_RX_MST0_IER 0x8001a000 +#pragma Aux_register(0x8001a000, name=>"io_i2s_rx_mst0_ier") + +// User extension aux register io_i2s_rx_mst0_irer +#define AR_IO_I2S_RX_MST0_IRER 0x8001a004 +#pragma Aux_register(0x8001a004, name=>"io_i2s_rx_mst0_irer") + +// User extension aux register io_i2s_rx_mst0_cer +#define AR_IO_I2S_RX_MST0_CER 0x8001a00c +#pragma Aux_register(0x8001a00c, name=>"io_i2s_rx_mst0_cer") + +// User extension aux register io_i2s_rx_mst0_ccr +#define AR_IO_I2S_RX_MST0_CCR 0x8001a010 +#pragma Aux_register(0x8001a010, name=>"io_i2s_rx_mst0_ccr") + +// User extension aux register io_i2s_rx_mst0_rxffr +#define AR_IO_I2S_RX_MST0_RXFFR 0x8001a014 +#pragma Aux_register(0x8001a014, name=>"io_i2s_rx_mst0_rxffr") + +// User extension aux register io_i2s_rx_mst0_lrbr +#define AR_IO_I2S_RX_MST0_LRBR 0x8001a020 +#pragma Aux_register(0x8001a020, name=>"io_i2s_rx_mst0_lrbr") + +// User extension aux register io_i2s_rx_mst0_rrbr +#define AR_IO_I2S_RX_MST0_RRBR 0x8001a024 +#pragma Aux_register(0x8001a024, name=>"io_i2s_rx_mst0_rrbr") + +// User extension aux register io_i2s_rx_mst0_rer +#define AR_IO_I2S_RX_MST0_RER 0x8001a028 +#pragma Aux_register(0x8001a028, name=>"io_i2s_rx_mst0_rer") + +// User extension aux register io_i2s_rx_mst0_rcr +#define AR_IO_I2S_RX_MST0_RCR 0x8001a030 +#pragma Aux_register(0x8001a030, name=>"io_i2s_rx_mst0_rcr") + +// User extension aux register io_i2s_rx_mst0_isr +#define AR_IO_I2S_RX_MST0_ISR 0x8001a038 +#pragma Aux_register(0x8001a038, name=>"io_i2s_rx_mst0_isr") + +// User extension aux register io_i2s_rx_mst0_imr +#define AR_IO_I2S_RX_MST0_IMR 0x8001a03c +#pragma Aux_register(0x8001a03c, name=>"io_i2s_rx_mst0_imr") + +// User extension aux register io_i2s_rx_mst0_ror +#define AR_IO_I2S_RX_MST0_ROR 0x8001a040 +#pragma Aux_register(0x8001a040, name=>"io_i2s_rx_mst0_ror") + +// User extension aux register io_i2s_rx_mst0_rfcr +#define AR_IO_I2S_RX_MST0_RFCR 0x8001a048 +#pragma Aux_register(0x8001a048, name=>"io_i2s_rx_mst0_rfcr") + +// User extension aux register io_i2s_rx_mst0_rff +#define AR_IO_I2S_RX_MST0_RFF 0x8001a050 +#pragma Aux_register(0x8001a050, name=>"io_i2s_rx_mst0_rff") + +// User extension aux register io_i2s_rx_mst0_rxdma +#define AR_IO_I2S_RX_MST0_RXDMA 0x8001a1c0 +#pragma Aux_register(0x8001a1c0, name=>"io_i2s_rx_mst0_rxdma") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2S_TX_MST0_PRESENT 1 + +// User extension aux register io_i2s_tx_mst0_ier +#define AR_IO_I2S_TX_MST0_IER 0x80019000 +#pragma Aux_register(0x80019000, name=>"io_i2s_tx_mst0_ier") + +// User extension aux register io_i2s_tx_mst0_iter +#define AR_IO_I2S_TX_MST0_ITER 0x80019008 +#pragma Aux_register(0x80019008, name=>"io_i2s_tx_mst0_iter") + +// User extension aux register io_i2s_tx_mst0_cer +#define AR_IO_I2S_TX_MST0_CER 0x8001900c +#pragma Aux_register(0x8001900c, name=>"io_i2s_tx_mst0_cer") + +// User extension aux register io_i2s_tx_mst0_ccr +#define AR_IO_I2S_TX_MST0_CCR 0x80019010 +#pragma Aux_register(0x80019010, name=>"io_i2s_tx_mst0_ccr") + +// User extension aux register io_i2s_tx_mst0_txffr +#define AR_IO_I2S_TX_MST0_TXFFR 0x80019018 +#pragma Aux_register(0x80019018, name=>"io_i2s_tx_mst0_txffr") + +// User extension aux register io_i2s_tx_mst0_lthr +#define AR_IO_I2S_TX_MST0_LTHR 0x80019020 +#pragma Aux_register(0x80019020, name=>"io_i2s_tx_mst0_lthr") + +// User extension aux register io_i2s_tx_mst0_rthr +#define AR_IO_I2S_TX_MST0_RTHR 0x80019024 +#pragma Aux_register(0x80019024, name=>"io_i2s_tx_mst0_rthr") + +// User extension aux register io_i2s_tx_mst0_ter +#define AR_IO_I2S_TX_MST0_TER 0x8001902c +#pragma Aux_register(0x8001902c, name=>"io_i2s_tx_mst0_ter") + +// User extension aux register io_i2s_tx_mst0_tcr +#define AR_IO_I2S_TX_MST0_TCR 0x80019034 +#pragma Aux_register(0x80019034, name=>"io_i2s_tx_mst0_tcr") + +// User extension aux register io_i2s_tx_mst0_isr +#define AR_IO_I2S_TX_MST0_ISR 0x80019038 +#pragma Aux_register(0x80019038, name=>"io_i2s_tx_mst0_isr") + +// User extension aux register io_i2s_tx_mst0_imr +#define AR_IO_I2S_TX_MST0_IMR 0x8001903c +#pragma Aux_register(0x8001903c, name=>"io_i2s_tx_mst0_imr") + +// User extension aux register io_i2s_tx_mst0_tor +#define AR_IO_I2S_TX_MST0_TOR 0x80019044 +#pragma Aux_register(0x80019044, name=>"io_i2s_tx_mst0_tor") + +// User extension aux register io_i2s_tx_mst0_tfcr +#define AR_IO_I2S_TX_MST0_TFCR 0x8001904c +#pragma Aux_register(0x8001904c, name=>"io_i2s_tx_mst0_tfcr") + +// User extension aux register io_i2s_tx_mst0_tff +#define AR_IO_I2S_TX_MST0_TFF 0x80019054 +#pragma Aux_register(0x80019054, name=>"io_i2s_tx_mst0_tff") + +// User extension aux register io_i2s_tx_mst0_txdma +#define AR_IO_I2S_TX_MST0_TXDMA 0x800191c8 +#pragma Aux_register(0x800191c8, name=>"io_i2s_tx_mst0_txdma") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_PDM_RX0_PRESENT 1 + +// User extension aux register io_pdm_rx0_pdm_en +#define AR_IO_PDM_RX0_PDM_EN 0x8001b000 +#pragma Aux_register(0x8001b000, name=>"io_pdm_rx0_pdm_en") + +// User extension aux register io_pdm_rx0_pdm_ren +#define AR_IO_PDM_RX0_PDM_REN 0x8001b004 +#pragma Aux_register(0x8001b004, name=>"io_pdm_rx0_pdm_ren") + +// User extension aux register io_pdm_rx0_cer +#define AR_IO_PDM_RX0_CER 0x8001b00c +#pragma Aux_register(0x8001b00c, name=>"io_pdm_rx0_cer") + +// User extension aux register io_pdm_rx0_rxffr +#define AR_IO_PDM_RX0_RXFFR 0x8001b014 +#pragma Aux_register(0x8001b014, name=>"io_pdm_rx0_rxffr") + +// User extension aux register io_pdm_rx0_rer0 +#define AR_IO_PDM_RX0_RER0 0x8001b028 +#pragma Aux_register(0x8001b028, name=>"io_pdm_rx0_rer0") + +// User extension aux register io_pdm_rx0_isr +#define AR_IO_PDM_RX0_ISR 0x8001b038 +#pragma Aux_register(0x8001b038, name=>"io_pdm_rx0_isr") + +// User extension aux register io_pdm_rx0_imr +#define AR_IO_PDM_RX0_IMR 0x8001b03c +#pragma Aux_register(0x8001b03c, name=>"io_pdm_rx0_imr") + +// User extension aux register io_pdm_rx0_ror +#define AR_IO_PDM_RX0_ROR 0x8001b040 +#pragma Aux_register(0x8001b040, name=>"io_pdm_rx0_ror") + +// User extension aux register io_pdm_rx0_rfcr +#define AR_IO_PDM_RX0_RFCR 0x8001b048 +#pragma Aux_register(0x8001b048, name=>"io_pdm_rx0_rfcr") + +// User extension aux register io_pdm_rx0_rxdma +#define AR_IO_PDM_RX0_RXDMA 0x8001b1c0 +#pragma Aux_register(0x8001b1c0, name=>"io_pdm_rx0_rxdma") + +// User extension aux register io_pdm_rx0_pdm_rr +#define AR_IO_PDM_RX0_PDM_RR 0x8001b1d0 +#pragma Aux_register(0x8001b1d0, name=>"io_pdm_rx0_pdm_rr") + +// User extension aux register io_pdm_rx0_cic_n +#define AR_IO_PDM_RX0_CIC_N 0x8001b1d4 +#pragma Aux_register(0x8001b1d4, name=>"io_pdm_rx0_cic_n") + +// User extension aux register io_pdm_rx0_cic_d +#define AR_IO_PDM_RX0_CIC_D 0x8001b1d8 +#pragma Aux_register(0x8001b1d8, name=>"io_pdm_rx0_cic_d") + +// User extension aux register io_pdm_rx0_dcrc +#define AR_IO_PDM_RX0_DCRC 0x8001b1dc +#pragma Aux_register(0x8001b1dc, name=>"io_pdm_rx0_dcrc") + +// User extension aux register io_pdm_rx0_brc_b0 +#define AR_IO_PDM_RX0_BRC_B0 0x8001b1e0 +#pragma Aux_register(0x8001b1e0, name=>"io_pdm_rx0_brc_b0") + +// User extension aux register io_pdm_rx0_brc_clp +#define AR_IO_PDM_RX0_BRC_CLP 0x8001b1f0 +#pragma Aux_register(0x8001b1f0, name=>"io_pdm_rx0_brc_clp") +#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_PRESENT 1 + +// User extension aux register fpu_build +#define AR_FPU_BUILD 0xc8 +#pragma Aux_register(0xc8, name=>"fpu_build") + +// User extension aux register fpu_ctrl +#define AR_FPU_CTRL 0x300 +#pragma Aux_register(0x300, name=>"fpu_ctrl") + +// User extension aux register fpu_status +#define AR_FPU_STATUS 0x301 +#pragma Aux_register(0x301, name=>"fpu_status") + +// User extension instruction fsmadd +extern int fsmadd(int,int); +#pragma intrinsic(fsmadd,opcode=>6,sub_opcode=>5, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsmsub +extern int fsmsub(int,int); +#pragma intrinsic(fsmsub,opcode=>6,sub_opcode=>6, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsmul +extern int fsmul(int,int); +#pragma intrinsic(fsmul,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsadd +extern int fsadd(int,int); +#pragma intrinsic(fsadd,opcode=>6,sub_opcode=>1, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fssub +extern int fssub(int,int); +#pragma intrinsic(fssub,opcode=>6,sub_opcode=>2, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fcvt32 +extern int fcvt32(int,int); +#pragma intrinsic(fcvt32,opcode=>6,sub_opcode=>8, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsdiv +extern int fsdiv(int,int); +#pragma intrinsic(fsdiv,opcode=>6,sub_opcode=>7, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmp +extern int fscmp(int,int); +#pragma intrinsic(fscmp,opcode=>6,sub_opcode=>3, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmp +extern int fscmp_f(int,int); +#pragma intrinsic(fscmp_f,opcode=>6,sub_opcode=>3, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmpf +extern int fscmpf(int,int); +#pragma intrinsic(fscmpf,opcode=>6,sub_opcode=>4, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmpf +extern int fscmpf_f(int,int); +#pragma intrinsic(fscmpf_f,opcode=>6,sub_opcode=>4, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fssqrt +extern int fssqrt(int); +#pragma intrinsic(fssqrt,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") +#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_DP_ASSIST_PRESENT 1 + +// User extension aux register aux_dpfp1l +#define AR_AUX_DPFP1L 0x302 +#pragma Aux_register(0x302, name=>"aux_dpfp1l") + +// User extension aux register aux_dpfp1h +#define AR_AUX_DPFP1H 0x303 +#pragma Aux_register(0x303, name=>"aux_dpfp1h") + +// User extension aux register aux_dpfp2l +#define AR_AUX_DPFP2L 0x304 +#pragma Aux_register(0x304, name=>"aux_dpfp2l") + +// User extension aux register aux_dpfp2h +#define AR_AUX_DPFP2H 0x305 +#pragma Aux_register(0x305, name=>"aux_dpfp2h") + +// User extension instruction dmulh11 +extern int dmulh11(int,int); +#pragma intrinsic(dmulh11,opcode=>6,sub_opcode=>48,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh11 +extern int dmulh11_f(int,int); +#pragma intrinsic(dmulh11_f,opcode=>6,sub_opcode=>48, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh12 +extern int dmulh12(int,int); +#pragma intrinsic(dmulh12,opcode=>6,sub_opcode=>49,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh12 +extern int dmulh12_f(int,int); +#pragma intrinsic(dmulh12_f,opcode=>6,sub_opcode=>49, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh21 +extern int dmulh21(int,int); +#pragma intrinsic(dmulh21,opcode=>6,sub_opcode=>50,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh21 +extern int dmulh21_f(int,int); +#pragma intrinsic(dmulh21_f,opcode=>6,sub_opcode=>50, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh22 +extern int dmulh22(int,int); +#pragma intrinsic(dmulh22,opcode=>6,sub_opcode=>51,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh22 +extern int dmulh22_f(int,int); +#pragma intrinsic(dmulh22_f,opcode=>6,sub_opcode=>51, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh11 +extern int daddh11(int,int); +#pragma intrinsic(daddh11,opcode=>6,sub_opcode=>52,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh11 +extern int daddh11_f(int,int); +#pragma intrinsic(daddh11_f,opcode=>6,sub_opcode=>52, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh12 +extern int daddh12(int,int); +#pragma intrinsic(daddh12,opcode=>6,sub_opcode=>53,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh12 +extern int daddh12_f(int,int); +#pragma intrinsic(daddh12_f,opcode=>6,sub_opcode=>53, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh21 +extern int daddh21(int,int); +#pragma intrinsic(daddh21,opcode=>6,sub_opcode=>54,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh21 +extern int daddh21_f(int,int); +#pragma intrinsic(daddh21_f,opcode=>6,sub_opcode=>54, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh22 +extern int daddh22(int,int); +#pragma intrinsic(daddh22,opcode=>6,sub_opcode=>55,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh22 +extern int daddh22_f(int,int); +#pragma intrinsic(daddh22_f,opcode=>6,sub_opcode=>55, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh11 +extern int dsubh11(int,int); +#pragma intrinsic(dsubh11,opcode=>6,sub_opcode=>56,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh11 +extern int dsubh11_f(int,int); +#pragma intrinsic(dsubh11_f,opcode=>6,sub_opcode=>56, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh12 +extern int dsubh12(int,int); +#pragma intrinsic(dsubh12,opcode=>6,sub_opcode=>57,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh12 +extern int dsubh12_f(int,int); +#pragma intrinsic(dsubh12_f,opcode=>6,sub_opcode=>57, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh21 +extern int dsubh21(int,int); +#pragma intrinsic(dsubh21,opcode=>6,sub_opcode=>58,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh21 +extern int dsubh21_f(int,int); +#pragma intrinsic(dsubh21_f,opcode=>6,sub_opcode=>58, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh22 +extern int dsubh22(int,int); +#pragma intrinsic(dsubh22,opcode=>6,sub_opcode=>59,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh22 +extern int dsubh22_f(int,int); +#pragma intrinsic(dsubh22_f,opcode=>6,sub_opcode=>59, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dexcl1 +extern int dexcl1(int,int); +#pragma intrinsic(dexcl1,opcode=>6,sub_opcode=>60, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dexcl2 +extern int dexcl2(int,int); +#pragma intrinsic(dexcl2,opcode=>6,sub_opcode=>61, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + + +#endif + + +]]> + + + + +
+ diff --git a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf new file mode 100644 index 00000000000..da39ae911ff --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf @@ -0,0 +1,47 @@ +# SYSTEM memory regions indicate where external memory might be located. +# The TCF has no specific knowledge of whether SYSTEM regions contain +# external memory or not. +# CCMWRAP memory regions indicate unusable portions of the address space +# due to CCM memory wrapping into upper addresses beyond its size + +MEMORY { +# SYSTEM0 : ORIGIN = 0x00000000, LENGTH = 0x20000000 + ICCM0 : ORIGIN = 0x20000000, LENGTH = 0x00040000 +# CCMWRAP0: ORIGIN = 0x20040000, LENGTH = 0x0ffc0000 +# SYSTEM1 : ORIGIN = 0x30000000, LENGTH = 0x50000000 + DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 +# CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 +# SYSTEM2 : ORIGIN = 0x90000000, LENGTH = 0x30000000 + XCCM : ORIGIN = 0xc0000000, LENGTH = 0x00008000 +# CCMWRAP2: ORIGIN = 0xc0008000, LENGTH = 0x0fff8000 +# SYSTEM3 : ORIGIN = 0xd0000000, LENGTH = 0x10000000 + YCCM : ORIGIN = 0xe0000000, LENGTH = 0x00008000 +# CCMWRAP3: ORIGIN = 0xe0008000, LENGTH = 0x0fff8000 +# SYSTEM4 : ORIGIN = 0xf0000000, LENGTH = 0x10000000 + } +SECTIONS { + GROUP: { + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + } > ICCM0 + + GROUP: { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:32768): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} + } > DCCM + GROUP: { + .Xdata? : {} + } > XCCM + GROUP: { + .Ydata? : {} + } > YCCM + GROUP BIND(0x0): { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:684): {} = FILL(0xa5a5a5a5,4) + } + } diff --git a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf new file mode 100644 index 00000000000..004215a2f6a --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf @@ -0,0 +1,4621 @@ + + + + + + + + + + + + + + + + + + + + 10*2) +# +# The speed of simulation can be greatly increased by using a faster JTAG clock, but a dependency will warn if it exceeds 1/2 of the cpu clock. +# +-jtag_tclk 4 + +# execution_trace_level --- +# This traces committed instructions as they execute, and gathers statistics +# visible in the debugger for counting instructions & cycle delays. +# At the "stats" level ony the statistics are gathered and no trace is printed. +# "file" is equivalent to "full", but the results go to a trace .txt file instead. +# +-execution_trace_level stats + +# generate_ipxact --- +# Generate ipxact.xml file describing the CPUisle or archipelago frontier +# +-generate_ipxact false + +# ipxact_relative_path_names --- +# Use relative path names for Verilog files in the ipxact. +# Otherwise, absolute path names are used. +# +-ipxact_relative_path_names true + +# optional_encryption --- +# When selected, encrypted RTL output is generated. +# +-optional_encryption false + +# ignore_encrypt_license --- +# When selected, pretend the encryption license is missing. For testing. +# +-ignore_encrypt_license false + +# ignore_clear_license --- +# When selected, pretend the cleartest license is missing. For testing. +# +-ignore_clear_license false + + +######## Tool Configuration --- cgen.1_0 ######## + +# Create Tool Configuration +-create cgen.1_0 "System.Tool Configuration" + +# mwdt_version --- Selects the MetaWare version to be used with the TCF file. +# Change from the default to an older or newer toolset version if you want the TCF file to be used with an older or newer version of the MetaWare tools. +-mwdt_version K-2015.09 + +# code_base_addr --- +# The base address to assign to the executable code segment in the linker command file when there is no ICCM in the build. This value is ignored when there is an ICCM. +# +-code_base_addr 0 + +# data_base_addr --- +# The base address to assign to the data segment in the linker command file when the data is not being mapped to a DCCM. This value is ignored when the data segment is mapped to a DCCM, as in that case the base address of the DCCM memory is used. +# +# A value of 0xffffffff means that the data segment will not be mapped to any specific address. +# +-data_base_addr 4294967295 + + +######## IO Software --- com.arc.software.dfss.sw_io.1_0 ######## + +# Create IO Software +-create com.arc.software.dfss.sw_io.1_0 "System.IO Software" + +# sw_io --- Command line option for Software element 'IO Software' +-sw_io true + + +######## DSP Software --- com.arc.software.dfss.sw_dsp.1_0 ######## + +# Create DSP Software +-create com.arc.software.dfss.sw_dsp.1_0 "System.DSP Software" + +# sw_dsp --- Command line option for Software element 'DSP Software' +-sw_dsp true + + +######## Infrastructure Software --- com.arc.software.dfss.sw_infra.1_0 ######## + +# Create Infrastructure Software +-create com.arc.software.dfss.sw_infra.1_0 "System.Infrastructure Software" + +# sw_infra --- Command line option for Software element 'Infrastructure Software' +-sw_infra true + + +######## CPUisle --- com.arc.hardware.CPU_isle.1_0 ######## + +# Create CPUisle +-create com.arc.hardware.CPU_isle.1_0 System.CPUisle + +# unique_name --- verilog module modifier prefix +-unique_name "" + +# ArcNum --- The processor number as read back in the ARCNUM field of the IDENTITY register. +-arc_num 1 + +# instances --- +# The number of instantiations of this core. +# +-instances 1 + +# CPUFloorplan --- Floorplan giving relative placement of the RAMs for the given configuration of ARCv2HS or ARCv2EM in this CPUisle +-cpu_floorplan em9d_xyccm + +# userCPUFloorplanPath --- Pathname of user floorplan for the CPU when using a hierarchical implementation +-usercpufloorplan_path "" + +# pinLocationConstraintsFile --- Pathname+filename of the physical pin location constraints file or just "side1" (all pins on l.h.s) or "side2" (pins on top only) or "side3" (pins on r.h.s. only) or "side4" (pins on bottom only) to get a template file generated +-pin_location_constraints_file "" + + +######## ARCv2EM --- com.arc.hardware.ARCv2EM.1_0 ######## + +# Create ARCv2EM +-create com.arc.hardware.ARCv2EM.1_0 System.CPUisle.ARCv2EM + +# arcv2em --- Description to follow +-arcv2em true + +# def_div2ref --- This specifies the clock division factor at reset. It is used for mss clock controller to generate core clock, and the value N means core is running at (1/N) x ref_clk. +-def_div2ref 1 + +# addr_size --- This defines the address bus width (in bits). +-addr_size 32 + +# pc_size --- This defines the program counter (in bits). +-pc_size 32 + +# lpc_size --- This defines the size of the loop counter (in bits). +-lpc_size 32 + +# halt_on_reset --- This defines whether the core is halted initially on reset. +-halt_on_reset true + +# byte_order --- This defines the endianness of the core. +-byte_order little + +# code_density_option --- This reduces the size of program memory by adding instructions that condense commonly used instruction patterns with some marginal increase in processor gate count. The added instructions are ENTER_S, LEAVE_S, JLI_S, BI, BIH. +-code_density_option true + +# bitscan_option --- This adds instructions for efficient search of bits within a 32 bit word, including normalize (NORM, NORMH, NORMW) and find first or last set bit (FFS, FLS) instructions. +-bitscan_option true + +# shift_option --- The Shift ISA option adds variable and multi-length shift rotation instructions: (0) No shift/rotation instructions (1) ASR16, ASR8, LSR8, LSL8, ROL8, ROR8 (2) ASRM, ASLM, LSRM, RORM (3) ASR16, ASR8, LSR8, LSL8, ROL8, ROR8, ASRM, ASLM, LSRM, RORM +-shift_option 3 + +# swap_option --- This adds two instructions used to swap half-words or bytes in a 32b word. Useful for converting between little to big endianess and vice-versa. +-swap_option true + +# div_rem_option --- The DIV/REM option adds non-blocking multi-cycle implementation of integer divide/remainder functions. Added instructions are DIV, DIVU (integer divide), REM and REMU (integer divide remainder).radix2 takes 33 cycles. radix4_enhanced takes 3 to 19 cycles per operation. +-div_rem_option none + +# mpy_option --- The Multiplier ISA option allows selection between several multiplier configurations to tradeoff performance with silicon area. +# For select multiply options, when the DIV/REM option is also selected, some datapath resources will be shared between the multiply and divide pipeline to minimize total area. +# +# Cycle count (16-bit, lower 32-bit or upper 32-bit) for the different configurations is as follows: +#
+# 
+# option  16/L32/U32  Instructions
+# ------  ----------  ---------------------
+#       
+# none	  -/-/-     None
+# wlh1	  1/1/1     MPYW/U, MPY/U, MPYH/U
+# wlh2	  2/2/2     MPYW/U, MPY/U, MPYH/U
+# wlh3	  2/3/3     MPYW/U, MPY/U, MPYH/U
+# wlh4	  2/4/5     MPYW/U, MPY/U, MPYH/U
+# wlh5	  5/9/9     MPYW/U, MPY/U, MPYH/U
+# 
+# +-mpy_option none + +# code_protection --- The ARC EM architecture divides the memory into 16 regions, which can be protected individually. This feature adds a 16-bit input to the processor core, one bit per region. When the protect bit is set, the processor disables any load or store to the corresponding region. An attempt to access a protected region raises an EV_ProtV exception. +-code_protection true + +# stack_checking --- Stack checking is a mechanism for checking stack accesses and raising an exception when a stack overflow or underflow is detected. +-stack_checking true + +# unaligned_option --- This enables unaligned loads and stores. +-unaligned_option true + +# intvbase_preset --- This sets the interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE. +-intvbase_preset 0 + +# rgf_impl --- This defines whether the register file is implemented using flip-flops, or with a hard macro. +-rgf_impl flip_flops + +# rgf_num_regs --- This defines the size (in 32b register) of the processor register file. +-rgf_num_regs 32 + +# rgf_wr_ports --- This defines the number of write ports on the register file. +-rgf_wr_ports 2 + +# rgf_num_banks --- Dual register banks are useful if Fast IRQ has been configured, but may be selected even if not. +-rgf_num_banks 2 + +# rgf_banked_regs --- This selects the number of registers that are replicated in the second register-file bank. +-rgf_banked_regs 32 + +# turbo_boost --- This enables the Turbo Boost synthesis option. By enabling this option, the achievable clock frequency is increased, but at the cost of an additional cycle latency on branch instructions. +-turbo_boost false + +# infer_alu_adder --- infer: datapath is described as behavioral code: A + B +# instantiate: datapath is instantiated as a detailed multi-stage code of a carry-lookahead adder. It is generally preferable to use the infer option and add directives for your target synthesizer. +-infer_alu_adder infer + +# infer_mpy_wtree --- infer: datapath is described as behavioral code: A * B (applies to only wlh3, wlh4 and wlh5 designs) +# instantiate: datapath is instantiated as a detailed multi-stage code of a Wallace Tree multiplier It is generally preferable to use the infer option and add directives for your target synthesizer. +-infer_mpy_wtree instantiate + +# power_domains --- Adds three separate power domains to the core, and propagates power-gate control signals to the top level of the core. Also generates UPF constraints and commands in the low-power scripts +-power_domains true + +# dvfs --- Adds logic to the core to allow dynamic controlling of voltage and frequency and propagates the associated control signals to the top level of core +-dvfs true + +# voltage_domains --- Creates a voltage domain split between RAM and std cell parts to support Ultra Low Voltage on cells and generates UPF constraints +-voltage_domains false + +# mem_bus_option --- The core supports three bus protocols for accessing external memory: AHB & AHB-Lite. AHB-Lite-single means instruction fetch and data access share a single AHB-Lite port. AHB-Lite-dual means separate AHB-Lite port for each initiator. +-mem_bus_option AHB-Lite-dual + +# mem_bus_reg_interface --- Specifies whether the memory bus interface is registered. +-mem_bus_reg_interface true + +# dmi_burst_option --- This will enable high-throughput burst support on the DMI slave interfaces. By enabling this option, the peak DMI read throughput goes from 1 word per 3 cycles to N words per N+2 cycles, in which N is the AHB burst lengthDMI write throughput goes from 1 word per 3 cycles to 1 word per cycle. +-dmi_burst_option false + +# has_dmp_peripheral --- This option enables the redirection of load/store accesses to one segment (1/16) of the addressable space to a dedicated peripheral bus. This offers high system integration and reduces overall system cost. +-has_dmp_peripheral false + +# per_bus_option --- The core supports one bus protocol for accessing the peripheral space, when enabled: AHB-Lite. +-per_bus_option AHB-Lite + +# per_bus_reg_interface --- Specifies whether the peripheral bus interface is registered. +-per_bus_reg_interface false + +# clock_gating --- This enables the insertion of architectural clock gate elements in the design. By enabling this option, the clocks to various parts of the design will be disabled when the logic they drive is not in use to save power. +-clock_gating true + +# byte_parity --- If parity protection on the CCMs is configured, this option is used to enable parity protection on a per-byte basis. Otherwise, parity will be per word basis +-byte_parity false + +# prot_pipelined --- Check the box if CCM memories are configured for ECC, and you want single-bit errors to be corrected, written back to memory, and re-fetched. When unchecked, single bit errors are corrected when read from memory, but the offending memory location itself is not corrected with a writeback +-prot_pipelined false + +# cct_test_ena --- When ECC is configured, this option enables automatic generation of error conditions in relevant testbench memories to exercise error detection and correction features +-cct_test_ena false + + +######## AGU --- com.arc.hardware.AGU.1_0 ######## + +# Create AGU +-create com.arc.hardware.AGU.1_0 System.CPUisle.ARCv2EM.AGU + +# agu_size --- Predefined configurations of modifiers, address +# pointers and offset registers +#
+# 
+#         address     address                     
+#         pointers    offset regs      modifiers  
+#        ----------- --------------- ------------ 
+# small:     4           2                 4      
+# medium:    8           4                 12     
+# large:     12          8                 24     
+# 
+# +-agu_size small + +# agu_accord --- Enable the accordion stage if operating frequency is critical +-agu_accord true + +# agu_wb_depth --- Write buffer depth +-agu_wb_depth 2 + + +######## DSP --- com.arc.hardware.DSP.1_0 ######## + +# Create DSP +-create com.arc.hardware.DSP.1_0 System.CPUisle.ARCv2EM.DSP + +# dsp_complex --- Enable/disable support for single cycle 16b+16b complex instructions and butterfly operations, else 2-cycle complex instructions only without butterfly support +-dsp_complex true + +# dsp_itu --- Enable/disable support for ITU bit-accurate 1 bit fractional shift before accumulation, else 1-bit fractional shift result after accumulation only +-dsp_itu true + +# dsp_divsqrt --- Enable/disable support for divide and square root operations: DIV(U), REM(U), SQRT +-dsp_divsqrt radix2 + +# dsp_accshift --- Select support for accumulator shift operations: no supported, limited shift support only or full shift support and convergent rounding +-dsp_accshift full + +# dsp_impl --- The datapath components may be inferred from Verilog for better area or optimized using carry-save components for better timing +-dsp_impl optimized + + +######## Interrupt Controller --- com.arc.hardware.Interrupt_Controller.1_0 ######## + +# Create Interrupt Controller +-create com.arc.hardware.Interrupt_Controller.1_0 "System.CPUisle.ARCv2EM.Interrupt Controller" + +# number_of_interrupts --- This is the total number of interrupts available to the core. Some interrupts are allocated statically to a specific interrupt line (for example, timer interrupts). For more information on Interrupt and register-file options, see DesignWare ARCv2 ISA Programmers Reference Manual. +-number_of_interrupts 95 + +# number_of_levels --- Priority levels in the interrupt controller. +-number_of_levels 4 + +# external_interrupts --- This is the total number of interrupt pins available for external system components. This parameter must be less than the total number of interrupts. +-external_interrupts 60 + +# firq_option --- This enables the fast-interrupts option, (priority level 0 interrupts), which uses an alternate register bank (if configured) instead of saving the context to memory. +-firq_option true + + +######## Timer 0 --- com.arc.hardware.Timer_0.1_0 ######## + +# Create Timer 0 +-create com.arc.hardware.Timer_0.1_0 "System.CPUisle.ARCv2EM.Timer 0" + +# timer_0_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 0. +-timer_0_int_level 1 + + +######## Timer 1 --- com.arc.hardware.Timer_1.1_0 ######## + +# Create Timer 1 +-create com.arc.hardware.Timer_1.1_0 "System.CPUisle.ARCv2EM.Timer 1" + +# timer_1_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 1. +-timer_1_int_level 0 + + +######## Watchdog Timer --- com.arc.hardware.Watchdog_Timer.1_0 ######## + +# Create Watchdog Timer +-create com.arc.hardware.Watchdog_Timer.1_0 "System.CPUisle.ARCv2EM.Watchdog Timer" + +# watchdog_size --- Specifies the bit width of the internal counter used within the timer. +-watchdog_size 16 + +# watchdog_clk --- Specifies whether the timer should be driven from a separate clock. +-watchdog_clk true + + +######## Data Memory Initiator --- com.arc.hardware.Data_Memory_Initiator.1_0 ######## + +# Create Data Memory Initiator +-create com.arc.hardware.Data_Memory_Initiator.1_0 "System.CPUisle.ARCv2EM.Data Memory Initiator" + +######## Instruction Fetch Queue --- com.arc.hardware.Instruction_Fetch_Queue.1_0 ######## + +# Create Instruction Fetch Queue +-create com.arc.hardware.Instruction_Fetch_Queue.1_0 "System.CPUisle.ARCv2EM.Instruction Fetch Queue" + +# ifqueue_size --- This defines the number of entires in the Instruction Fetch Queue. +-ifqueue_size 4 + +# ifqueue_burst_size --- This sets the burst size for bus data transfers (in 32-bit words). It cannot exceed the number of entries. +-ifqueue_burst_size 2 + + +######## DCCM --- com.arc.hardware.DCCM.1_0 ######## + +# Create DCCM +-create com.arc.hardware.DCCM.1_0 System.CPUisle.ARCv2EM.DCCM + +# dccm_size --- This defines the size of the Data Closely Coupled Memory (DCCM) in bytes +-dccm_size 131072 + +# dccm_base --- Sets the initial memory region assignment for DCCM +-dccm_base 8 + +# dccm_interleave --- Split DCCM into even/odd memory banks. +-dccm_interleave false + +# dccm_prot --- Specifies the type of protection built for the DCCM. +-dccm_prot None + +# dccm_prot_level --- Specifies the level protection. +-dccm_prot_level Data_Only + +# dccm_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the DCCM +-dccm_prot_exceptions true + +# dccm_dmi --- This enables external access through a DMI (direct memory interface) port. +-dccm_dmi true + + +######## ICCM0 --- com.arc.hardware.ICCM0.1_0 ######## + +# Create ICCM0 +-create com.arc.hardware.ICCM0.1_0 System.CPUisle.ARCv2EM.ICCM0 + +# iccm0_size --- This defines the size of ICCM0 in bytes.This ICCM has 0 wait states. +-iccm0_size 262144 + +# iccm0_base --- Sets the initial memory region assignment for ICCM0 +-iccm0_base 2 + +# iccm0_wide --- Creates ICCM0 as 64b memory to reduce accesses. +-iccm0_wide true + +# iccm0_prot --- Specifies the type of protection built for ICCM0. +-iccm0_prot None + +# iccm0_prot_level --- Specifies the level of protection. +-iccm0_prot_level Data_Only + +# iccm0_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the ICCM0 +-iccm0_prot_exceptions true + +# iccm0_dmi --- This enables external access through a DMI (direct memory interface) port. +-iccm0_dmi true + + +######## XY --- com.arc.hardware.XY.1_0 ######## + +# Create XY +-create com.arc.hardware.XY.1_0 System.CPUisle.ARCv2EM.XY + +# xy_config --- XY memory configuration: +# One memory: DCCM only. +# Two memories: DCCM + Y. +# Three memories: DCCM + X + Y. +-xy_config dccm_x_y + +# xy_size --- Size of X and Y memories if included. +# X and Y memories both have the same configured size. +-xy_size 32768 + +# xy_interleave --- Split XY memories into odd/even instances to enable single cycle unaligned access. +-xy_interleave true + +# xy_x_base --- Base region for X memory. All accesses to this region will initiate a transfer on the X memory. +-xy_x_base 12 + +# xy_y_base --- Base region for Y memory. All accesses to this region will initiate a transfer on the Y memory. +-xy_y_base 14 + + +######## DMA Controller --- com.arc.hardware.DMA_Controller.1_0 ######## + +# Create DMA Controller +-create com.arc.hardware.DMA_Controller.1_0 "System.CPUisle.ARCv2EM.DMA Controller" + +# dmac_channels --- This options specifies the number of DMA channels implemented in the DMA controller +-dmac_channels 16 + +# dmac_fifo_depth --- This option specifies the DMA transfer FIFO depth in 32b words. +-dmac_fifo_depth 4 + +# dmac_int_config --- None: the DMA controller cannot raise an interrupt +# Single-External: single done and single error interrupt signal for all DMA channels, and the interrupt signals are routed to a port at the top of the EM logical hierarchy +# Multiple-External: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are routed to ports at the top of the EM logical hierarchy +# Single-Internal: single done and single error interrupt signals for all DMA channels, and the interrupt signals are internal to the EM core +# Multiple-Internal: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are internal to the EM core +-dmac_int_config Multiple-Internal + +# dmac_registers --- This option defines the number of DMA channels with their registers located in auxiliary space. +-dmac_registers 16 + +# dmac_mem_if --- This option specifies whether the DMA controller system memory interface is integrated into the existing EM system memory interfaces or has its own interface. +-dmac_mem_if separate + + +######## JTAG Interface --- com.arc.hardware.JTAG_Interface.1_0 ######## + +# Create JTAG Interface +-create com.arc.hardware.JTAG_Interface.1_0 "System.CPUisle.ARCv2EM.JTAG Interface" + +######## Debug Interface --- com.arc.hardware.Debug_Interface.1_0 ######## + +# Create Debug Interface +-create com.arc.hardware.Debug_Interface.1_0 "System.CPUisle.ARCv2EM.Debug Interface" + +######## Actionpoints --- com.arc.hardware.Actionpoints.1_0 ######## + +# Create Actionpoints +-create com.arc.hardware.Actionpoints.1_0 System.CPUisle.ARCv2EM.Actionpoints + +# num_actionpoints --- This is the number of trigger events available. +-num_actionpoints 8 + +# aps_feature --- Selects Actionpoint feature set +-aps_feature min + + +######## SmaRT --- com.arc.hardware.SmaRT.1_0 ######## + +# Create SmaRT +-create com.arc.hardware.SmaRT.1_0 System.CPUisle.ARCv2EM.SmaRT + +# smart_stack_entries --- This specifies the number of entries in the trace buffer. +-smart_stack_entries 64 + +# smart_implementation --- Flip-flop = FF-based design. Memory = memory-based design (provides better density for larger trace buffers). +-smart_implementation memory + + +######## Memory Protection Unit --- com.arc.hardware.Memory_Protection_Unit.1_0 ######## + +# Create Memory Protection Unit +-create com.arc.hardware.Memory_Protection_Unit.1_0 "System.CPUisle.ARCv2EM.Memory Protection Unit" + +# mpu_num_regions --- Number of configured memory regions. +-mpu_num_regions 16 + +# mpu_32b --- Set the minimal region size to be 32 byte instead of 2KB. +-mpu_32b false + + +######## Floating-point unit --- com.arc.hardware.Floating_point_unit.1_0 ######## + +# Create Floating-point unit +-create com.arc.hardware.Floating_point_unit.1_0 "System.CPUisle.ARCv2EM.Floating-point unit" + +# fpu_dp_assist --- This enables double-precision acceleration instructions. +-fpu_dp_assist true + +# fpu_fma_option --- This enables the fused multiply-add & multiply-subtract instructions. +-fpu_fma_option true + +# fpu_mas_cycles --- Make mul/add/sub multicycle to achieve a higher clock speed. +-fpu_mas_cycles 2 + +# fpu_div_option --- This enables divide & square-root acceleration +-fpu_div_option true + +# fpu_div_cycles --- "inferred" option infers DSP datapath elements from verilog operators for better area and "optimized" option selects hardware for better timing +-fpu_div_cycles 17 + + +######## Performance Monitor --- com.arc.hardware.Performance_Monitor.1_0 ######## + +# Create Performance Monitor +-create com.arc.hardware.Performance_Monitor.1_0 "System.CPUisle.ARCv2EM.Performance Monitor" + +# pct_counters --- Number of counters for performance monitoring. +-pct_counters 8 + + +######## dsp_trig --- com.arc.hardware.dfss.dsp_trig.1_0 ######## + +# Create dsp_trig +-create com.arc.hardware.dfss.dsp_trig.1_0 System.CPUisle.ARCv2EM.dsp_trig + +# dsp_trig --- Command line option for EIA extension component 'dsp_trig'. +-dsp_trig true + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_4b0 --- com.arc.hardware.dfss.io_gpio_4b0.1_0 ######## + +# Create io_gpio_4b0 +-create com.arc.hardware.dfss.io_gpio_4b0.1_0 System.CPUisle.ARCv2EM.io_gpio_4b0 + +# io_gpio_4b0 --- Command line option for EIA extension component 'io_gpio_4b0'. +-io_gpio_4b0 true + +# io_gpio_4b0_debounce --- Selects the inclusion of Debounce logic +-io_gpio_4b0_debounce 1 + +# io_gpio_4b0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_4b0_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_4b1 --- com.arc.hardware.dfss.io_gpio_4b1.1_0 ######## + +# Create io_gpio_4b1 +-create com.arc.hardware.dfss.io_gpio_4b1.1_0 System.CPUisle.ARCv2EM.io_gpio_4b1 + +# io_gpio_4b1 --- Command line option for EIA extension component 'io_gpio_4b1'. +-io_gpio_4b1 true + +# io_gpio_4b1_debounce --- Selects the inclusion of Debounce logic +-io_gpio_4b1_debounce 1 + +# io_gpio_4b1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_4b1_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_4b2 --- com.arc.hardware.dfss.io_gpio_4b2.1_0 ######## + +# Create io_gpio_4b2 +-create com.arc.hardware.dfss.io_gpio_4b2.1_0 System.CPUisle.ARCv2EM.io_gpio_4b2 + +# io_gpio_4b2 --- Command line option for EIA extension component 'io_gpio_4b2'. +-io_gpio_4b2 true + +# io_gpio_4b2_debounce --- Selects the inclusion of Debounce logic +-io_gpio_4b2_debounce 1 + +# io_gpio_4b2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_4b2_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_8b0 --- com.arc.hardware.dfss.io_gpio_8b0.1_0 ######## + +# Create io_gpio_8b0 +-create com.arc.hardware.dfss.io_gpio_8b0.1_0 System.CPUisle.ARCv2EM.io_gpio_8b0 + +# io_gpio_8b0 --- Command line option for EIA extension component 'io_gpio_8b0'. +-io_gpio_8b0 true + +# io_gpio_8b0_debounce --- Selects the inclusion of Debounce logic +-io_gpio_8b0_debounce 1 + +# io_gpio_8b0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_8b0_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_8b1 --- com.arc.hardware.dfss.io_gpio_8b1.1_0 ######## + +# Create io_gpio_8b1 +-create com.arc.hardware.dfss.io_gpio_8b1.1_0 System.CPUisle.ARCv2EM.io_gpio_8b1 + +# io_gpio_8b1 --- Command line option for EIA extension component 'io_gpio_8b1'. +-io_gpio_8b1 true + +# io_gpio_8b1_debounce --- Selects the inclusion of Debounce logic +-io_gpio_8b1_debounce 1 + +# io_gpio_8b1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_8b1_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_8b2 --- com.arc.hardware.dfss.io_gpio_8b2.1_0 ######## + +# Create io_gpio_8b2 +-create com.arc.hardware.dfss.io_gpio_8b2.1_0 System.CPUisle.ARCv2EM.io_gpio_8b2 + +# io_gpio_8b2 --- Command line option for EIA extension component 'io_gpio_8b2'. +-io_gpio_8b2 true + +# io_gpio_8b2_debounce --- Selects the inclusion of Debounce logic +-io_gpio_8b2_debounce 1 + +# io_gpio_8b2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_8b2_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_gpio_8b3 --- com.arc.hardware.dfss.io_gpio_8b3.1_0 ######## + +# Create io_gpio_8b3 +-create com.arc.hardware.dfss.io_gpio_8b3.1_0 System.CPUisle.ARCv2EM.io_gpio_8b3 + +# io_gpio_8b3 --- Command line option for EIA extension component 'io_gpio_8b3'. +-io_gpio_8b3 true + +# io_gpio_8b3_debounce --- Selects the inclusion of Debounce logic +-io_gpio_8b3_debounce 1 + +# io_gpio_8b3_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal +-io_gpio_8b3_readback_sync 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2c_mst0 --- com.arc.hardware.dfss.io_i2c_mst0.1_0 ######## + +# Create io_i2c_mst0 +-create com.arc.hardware.dfss.io_i2c_mst0.1_0 System.CPUisle.ARCv2EM.io_i2c_mst0 + +# io_i2c_mst0 --- Command line option for APEX extension component 'io_i2c_mst0'. +-io_i2c_mst0 true + +# io_i2c_mst0_fs --- RX/TX FIFO size +-io_i2c_mst0_fs 16 + +# io_i2c_mst0_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst0_dma_support None + +# io_i2c_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst0_cdc_included 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2c_mst1 --- com.arc.hardware.dfss.io_i2c_mst1.1_0 ######## + +# Create io_i2c_mst1 +-create com.arc.hardware.dfss.io_i2c_mst1.1_0 System.CPUisle.ARCv2EM.io_i2c_mst1 + +# io_i2c_mst1 --- Command line option for APEX extension component 'io_i2c_mst1'. +-io_i2c_mst1 true + +# io_i2c_mst1_fs --- RX/TX FIFO size +-io_i2c_mst1_fs 16 + +# io_i2c_mst1_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst1_dma_support None + +# io_i2c_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst1_cdc_included 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_i2c_mst2 --- com.arc.hardware.dfss.io_i2c_mst2.1_0 ######## + +# Create io_i2c_mst2 +-create com.arc.hardware.dfss.io_i2c_mst2.1_0 System.CPUisle.ARCv2EM.io_i2c_mst2 + +# io_i2c_mst2 --- Command line option for APEX extension component 'io_i2c_mst2'. +-io_i2c_mst2 true + +# io_i2c_mst2_fs --- RX/TX FIFO size +-io_i2c_mst2_fs 16 + +# io_i2c_mst2_dma_support --- Specifies whether the DMA handshake interface is included +-io_i2c_mst2_dma_support None + +# io_i2c_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. +-io_i2c_mst2_cdc_included 1 + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst0 --- com.arc.hardware.dfss.io_spi_mst0.1_0 ######## + +# Create io_spi_mst0 +-create com.arc.hardware.dfss.io_spi_mst0.1_0 System.CPUisle.ARCv2EM.io_spi_mst0 + +# io_spi_mst0 --- Command line option for APEX extension component 'io_spi_mst0'. +-io_spi_mst0 true + +# io_spi_mst0_fz --- RX/TX FIFO depth +-io_spi_mst0_fs 16 + +# io_spi_mst0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst0_max_xfer_size 16 + +# io_spi_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst0_cdc_included 1 + +# io_spi_mst0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst0_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst1 --- com.arc.hardware.dfss.io_spi_mst1.1_0 ######## + +# Create io_spi_mst1 +-create com.arc.hardware.dfss.io_spi_mst1.1_0 System.CPUisle.ARCv2EM.io_spi_mst1 + +# io_spi_mst1 --- Command line option for APEX extension component 'io_spi_mst1'. +-io_spi_mst1 true + +# io_spi_mst1_fz --- RX/TX FIFO depth +-io_spi_mst1_fs 16 + +# io_spi_mst1_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst1_max_xfer_size 16 + +# io_spi_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst1_cdc_included 1 + +# io_spi_mst1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst1_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_mst2 --- com.arc.hardware.dfss.io_spi_mst2.1_0 ######## + +# Create io_spi_mst2 +-create com.arc.hardware.dfss.io_spi_mst2.1_0 System.CPUisle.ARCv2EM.io_spi_mst2 + +# io_spi_mst2 --- Command line option for APEX extension component 'io_spi_mst2'. +-io_spi_mst2 true + +# io_spi_mst2_fz --- RX/TX FIFO depth +-io_spi_mst2_fs 16 + +# io_spi_mst2_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_mst2_max_xfer_size 16 + +# io_spi_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. +-io_spi_mst2_cdc_included 1 + +# io_spi_mst2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_mst2_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_spi_slv0 --- com.arc.hardware.dfss.io_spi_slv0.1_0 ######## + +# Create io_spi_slv0 +-create com.arc.hardware.dfss.io_spi_slv0.1_0 System.CPUisle.ARCv2EM.io_spi_slv0 + +# io_spi_slv0 --- Command line option for APEX extension component 'io_spi_slv0'. +-io_spi_slv0 true + +# io_spi_slv0_fz --- RX/TX FIFO depth +-io_spi_slv0_fs 16 + +# io_spi_slv0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. +-io_spi_slv0_max_xfer_size 16 + +# io_spi_slv0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_spi_slv0_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart0 --- com.arc.hardware.dfss.io_uart0.1_0 ######## + +# Create io_uart0 +-create com.arc.hardware.dfss.io_uart0.1_0 System.CPUisle.ARCv2EM.io_uart0 + +# io_uart0 --- Command line option for EIA extension component 'io_uart0'. +-io_uart0 true + +# io_uart0_fifo_mode --- Set the UART FIFO mode +-io_uart0_fifo_mode 16 + +# io_uart0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart0_dma_support None + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart1 --- com.arc.hardware.dfss.io_uart1.1_0 ######## + +# Create io_uart1 +-create com.arc.hardware.dfss.io_uart1.1_0 System.CPUisle.ARCv2EM.io_uart1 + +# io_uart1 --- Command line option for EIA extension component 'io_uart1'. +-io_uart1 true + +# io_uart1_fifo_mode --- Set the UART FIFO mode +-io_uart1_fifo_mode 16 + +# io_uart1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart1_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart2 --- com.arc.hardware.dfss.io_uart2.1_0 ######## + +# Create io_uart2 +-create com.arc.hardware.dfss.io_uart2.1_0 System.CPUisle.ARCv2EM.io_uart2 + +# io_uart2 --- Command line option for EIA extension component 'io_uart2'. +-io_uart2 true + +# io_uart2_fifo_mode --- Set the UART FIFO mode +-io_uart2_fifo_mode 16 + +# io_uart2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart2_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_uart3 --- com.arc.hardware.dfss.io_uart3.1_0 ######## + +# Create io_uart3 +-create com.arc.hardware.dfss.io_uart3.1_0 System.CPUisle.ARCv2EM.io_uart3 + +# io_uart3 --- Command line option for EIA extension component 'io_uart3'. +-io_uart3 true + +# io_uart3_fifo_mode --- Set the UART FIFO mode +-io_uart3_fifo_mode 16 + +# io_uart3_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. +-io_uart3_dma_support Aux-Based + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_creg_mst0 --- com.arc.hardware.dfss.io_creg_mst0.1_0 ######## + +# Create io_creg_mst0 +-create com.arc.hardware.dfss.io_creg_mst0.1_0 System.CPUisle.ARCv2EM.io_creg_mst0 + +# io_creg_mst0 --- Command line option for EIA extension component 'io_creg_mst0'. +-io_creg_mst0 true + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## io_creg_slv0 --- com.arc.hardware.dfss.io_creg_slv0.1_0 ######## + +# Create io_creg_slv0 +-create com.arc.hardware.dfss.io_creg_slv0.1_0 System.CPUisle.ARCv2EM.io_creg_slv0 + +# io_creg_slv0 --- Command line option for EIA extension component 'io_creg_slv0'. +-io_creg_slv0 true + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## subsys_bcr --- com.arc.hardware.dfss.subsys_bcr.1_0 ######## + +# Create subsys_bcr +-create com.arc.hardware.dfss.subsys_bcr.1_0 System.CPUisle.ARCv2EM.subsys_bcr + +# assign_xpubit --- +# +# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. +#

+# By default an extension is not assigned a bit in this register. This means the extension is always available. +#

+# If you wish to assign an XPU bit number, select this option. +# +# +-assign_xpubit false + +# xpubit --- +# The XPU bit number for this extension. +# +-xpubit 0 + + +######## subsys_infra --- com.arc.hardware.dfss.subsys_infra.1_0 ######## + +# Create subsys_infra +-create com.arc.hardware.dfss.subsys_infra.1_0 System.subsys_infra + +# subsys_infra --- Command line option for EIA glue logic. +-subsys_infra true + +# internal_interrupt --- Connect the IO interrupts internally +-internal_interrupt true + +# internal_dma_handshake --- Connect the DMA handshake signals internally +-internal_dma_handshake true + + +######## ARConnect --- com.arc.hardware.ARConnect.1_0 ######## + +# Create ARConnect +-create com.arc.hardware.ARConnect.1_0 System.ARConnect + +# mcip_def_div2ref --- This specifies the clock division factor at reset. It is used for mss clock controller to generate ARConnect clock, and the value N means ARConnect is running at (1/N) x ref_clk. +-mcip_def_div2ref 1 + +# mcip_has_intrpt --- This specifies whether the Inter-core Interrupt Unit exists +-mcip_has_intrpt false + +# mcip_has_sema --- This specifies whether the Inter-core Semaphore Unit exists +-mcip_has_sema false + +# mcip_sema_num --- This specifies the number of semaphores in the Inter-core Semaphores Unit +-mcip_sema_num 16 + +# mcip_has_msg_sram --- This specifies whether the Inter-core Message Unit exists +-mcip_has_msg_sram false + +# mcip_msg_sram_size --- This specifies the bytes of SRAM in the Inter-core Message Unit +-mcip_msg_sram_size 512 + +# mcip_msg_1cycle --- True: The access path to message SRAM is 1 clock cycle; False: The access path to message SRAM 1.5 cycles. Note: The 1.5 cycles path use clock negetive edge for SRAM, but can acheive higher frequency. No performance difference caused by the value of this option +-mcip_msg_1cycle false + +# mcip_has_debug --- This specifies whether the Inter-core Debug Unit exists +-mcip_has_debug false + +# mcip_has_grtc --- This specifies whether the Global Real-Time Counter Unit exists +-mcip_has_grtc false + +# mcip_has_pmu --- This specifies whether the external Power Management Unit exists +-mcip_has_pmu true + +# mcip_power_domains --- This specifies whether the ARConnect Power Domain Management Unit exists +-mcip_power_domains true + +# mcip_llm_size --- This specifies the KBytes of SRAM in the Low Latency Memory Unit +-mcip_llm_size 32 + +# mcip_llm_base --- This specifies the default memory region of Low Latency Memory Unit +-mcip_llm_base 2 + +# mcip_llm_ecc --- This specifies the ECC mode of SRAM in Low Latency Memory Unit. none = No checking; parity = Parity only; SECDED = single-error correction and double-error detection (SECDED) +-mcip_llm_ecc SECDED + +# mcip_idu_cirq_num --- This specifies the number of common interrupts supported by IDU +-mcip_idu_cirq_num 4 + +# mcip_bsu_dbw --- This specifies the data bus width of Bus Slave Unit +-mcip_bsu_dbw 64 + +# mcip_bsu_type --- This specifies the bus protocol of Bus Slave Unit +-mcip_bsu_type AXI + + +]]> + + + + + + + + + + + + + + + ICCM0 + + GROUP: { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:32768): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} + } > DCCM + GROUP: { + .Xdata? : {} + } > XCCM + GROUP: { + .Ydata? : {} + } > YCCM + GROUP BIND(0x0): { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:684): {} = FILL(0xa5a5a5a5,4) + } + } + +]]> + + + + + + 0x07, sub_opcode => 0x1E , latency_cycles => 8) + +// User extension instruction - dsp_sin +extern long dsp_sin(long); +#pragma intrinsic(dsp_sin, opcode => 0x07, sub_opcode => 0x1F , latency_cycles => 8) + +// User extension instruction - dsp_tan +extern long dsp_tan(long); +#pragma intrinsic(dsp_tan, opcode => 0x07, sub_opcode => 0x22 , latency_cycles => 11) + +// User extension instruction - dsp_acos +extern long dsp_acos(long); +#pragma intrinsic(dsp_acos, opcode => 0x07, sub_opcode => 0x23 , latency_cycles => 31) + +// User extension instruction - dsp_asin +extern long dsp_asin(long); +#pragma intrinsic(dsp_asin, opcode => 0x07, sub_opcode => 0x24 , latency_cycles => 31) + +// User extension instruction - dsp_atan +extern long dsp_atan(long); +#pragma intrinsic(dsp_atan, opcode => 0x07, sub_opcode => 0x25 , latency_cycles => 13) + +// User extension instruction - dsp_sqrt +extern long dsp_sqrt(long); +#pragma intrinsic(dsp_sqrt, opcode => 0x07, sub_opcode => 0x20 , latency_cycles => 31) + +// User extension instruction - dsp_sqrt15 +extern long dsp_sqrt15(long); +#pragma intrinsic(dsp_sqrt15, opcode => 0x07, sub_opcode => 0x21 , latency_cycles => 15) + +#define APEX_COM_ARC_HARDWARE_DFSS_DSP_TRIG_PRESENT 1 +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B0_IO_GPIO_4B0_PRESENT 1 + +// User extension aux register io_gpio_4b0_debounce +#define AR_IO_GPIO_4B0_DEBOUNCE 0x80017c48 +#pragma Aux_register(0x80017c48, name=>"io_gpio_4b0_debounce") + +// User extension aux register io_gpio_4b0_clken +#define AR_IO_GPIO_4B0_CLKEN 0x80017c80 +#pragma Aux_register(0x80017c80, name=>"io_gpio_4b0_clken") + +// User extension aux register io_gpio_4b0_swporta_dr +#define AR_IO_GPIO_4B0_SWPORTA_DR 0x80017c00 +#pragma Aux_register(0x80017c00, name=>"io_gpio_4b0_swporta_dr") + +// User extension aux register io_gpio_4b0_swporta_ddr +#define AR_IO_GPIO_4B0_SWPORTA_DDR 0x80017c04 +#pragma Aux_register(0x80017c04, name=>"io_gpio_4b0_swporta_ddr") + +// User extension aux register io_gpio_4b0_inten +#define AR_IO_GPIO_4B0_INTEN 0x80017c30 +#pragma Aux_register(0x80017c30, name=>"io_gpio_4b0_inten") + +// User extension aux register io_gpio_4b0_intmask +#define AR_IO_GPIO_4B0_INTMASK 0x80017c34 +#pragma Aux_register(0x80017c34, name=>"io_gpio_4b0_intmask") + +// User extension aux register io_gpio_4b0_inttype_level +#define AR_IO_GPIO_4B0_INTTYPE_LEVEL 0x80017c38 +#pragma Aux_register(0x80017c38, name=>"io_gpio_4b0_inttype_level") + +// User extension aux register io_gpio_4b0_int_polarity +#define AR_IO_GPIO_4B0_INT_POLARITY 0x80017c3c +#pragma Aux_register(0x80017c3c, name=>"io_gpio_4b0_int_polarity") + +// User extension aux register io_gpio_4b0_intstatus +#define AR_IO_GPIO_4B0_INTSTATUS 0x80017c40 +#pragma Aux_register(0x80017c40, name=>"io_gpio_4b0_intstatus") + +// User extension aux register io_gpio_4b0_raw_intstatus +#define AR_IO_GPIO_4B0_RAW_INTSTATUS 0x80017c44 +#pragma Aux_register(0x80017c44, name=>"io_gpio_4b0_raw_intstatus") + +// User extension aux register io_gpio_4b0_porta_eoi +#define AR_IO_GPIO_4B0_PORTA_EOI 0x80017c4c +#pragma Aux_register(0x80017c4c, name=>"io_gpio_4b0_porta_eoi") + +// User extension aux register io_gpio_4b0_ext_porta +#define AR_IO_GPIO_4B0_EXT_PORTA 0x80017c50 +#pragma Aux_register(0x80017c50, name=>"io_gpio_4b0_ext_porta") + +// User extension aux register io_gpio_4b0_ls_sync +#define AR_IO_GPIO_4B0_LS_SYNC 0x80017c60 +#pragma Aux_register(0x80017c60, name=>"io_gpio_4b0_ls_sync") + +// User extension aux register io_gpio_4b0_int_bothedge +#define AR_IO_GPIO_4B0_INT_BOTHEDGE 0x80017c68 +#pragma Aux_register(0x80017c68, name=>"io_gpio_4b0_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B1_IO_GPIO_4B1_PRESENT 1 + +// User extension aux register io_gpio_4b1_debounce +#define AR_IO_GPIO_4B1_DEBOUNCE 0x80017d48 +#pragma Aux_register(0x80017d48, name=>"io_gpio_4b1_debounce") + +// User extension aux register io_gpio_4b1_clken +#define AR_IO_GPIO_4B1_CLKEN 0x80017d80 +#pragma Aux_register(0x80017d80, name=>"io_gpio_4b1_clken") + +// User extension aux register io_gpio_4b1_swporta_dr +#define AR_IO_GPIO_4B1_SWPORTA_DR 0x80017d00 +#pragma Aux_register(0x80017d00, name=>"io_gpio_4b1_swporta_dr") + +// User extension aux register io_gpio_4b1_swporta_ddr +#define AR_IO_GPIO_4B1_SWPORTA_DDR 0x80017d04 +#pragma Aux_register(0x80017d04, name=>"io_gpio_4b1_swporta_ddr") + +// User extension aux register io_gpio_4b1_inten +#define AR_IO_GPIO_4B1_INTEN 0x80017d30 +#pragma Aux_register(0x80017d30, name=>"io_gpio_4b1_inten") + +// User extension aux register io_gpio_4b1_intmask +#define AR_IO_GPIO_4B1_INTMASK 0x80017d34 +#pragma Aux_register(0x80017d34, name=>"io_gpio_4b1_intmask") + +// User extension aux register io_gpio_4b1_inttype_level +#define AR_IO_GPIO_4B1_INTTYPE_LEVEL 0x80017d38 +#pragma Aux_register(0x80017d38, name=>"io_gpio_4b1_inttype_level") + +// User extension aux register io_gpio_4b1_int_polarity +#define AR_IO_GPIO_4B1_INT_POLARITY 0x80017d3c +#pragma Aux_register(0x80017d3c, name=>"io_gpio_4b1_int_polarity") + +// User extension aux register io_gpio_4b1_intstatus +#define AR_IO_GPIO_4B1_INTSTATUS 0x80017d40 +#pragma Aux_register(0x80017d40, name=>"io_gpio_4b1_intstatus") + +// User extension aux register io_gpio_4b1_raw_intstatus +#define AR_IO_GPIO_4B1_RAW_INTSTATUS 0x80017d44 +#pragma Aux_register(0x80017d44, name=>"io_gpio_4b1_raw_intstatus") + +// User extension aux register io_gpio_4b1_porta_eoi +#define AR_IO_GPIO_4B1_PORTA_EOI 0x80017d4c +#pragma Aux_register(0x80017d4c, name=>"io_gpio_4b1_porta_eoi") + +// User extension aux register io_gpio_4b1_ext_porta +#define AR_IO_GPIO_4B1_EXT_PORTA 0x80017d50 +#pragma Aux_register(0x80017d50, name=>"io_gpio_4b1_ext_porta") + +// User extension aux register io_gpio_4b1_ls_sync +#define AR_IO_GPIO_4B1_LS_SYNC 0x80017d60 +#pragma Aux_register(0x80017d60, name=>"io_gpio_4b1_ls_sync") + +// User extension aux register io_gpio_4b1_int_bothedge +#define AR_IO_GPIO_4B1_INT_BOTHEDGE 0x80017d68 +#pragma Aux_register(0x80017d68, name=>"io_gpio_4b1_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B2_IO_GPIO_4B2_PRESENT 1 + +// User extension aux register io_gpio_4b2_debounce +#define AR_IO_GPIO_4B2_DEBOUNCE 0x80017e48 +#pragma Aux_register(0x80017e48, name=>"io_gpio_4b2_debounce") + +// User extension aux register io_gpio_4b2_clken +#define AR_IO_GPIO_4B2_CLKEN 0x80017e80 +#pragma Aux_register(0x80017e80, name=>"io_gpio_4b2_clken") + +// User extension aux register io_gpio_4b2_swporta_dr +#define AR_IO_GPIO_4B2_SWPORTA_DR 0x80017e00 +#pragma Aux_register(0x80017e00, name=>"io_gpio_4b2_swporta_dr") + +// User extension aux register io_gpio_4b2_swporta_ddr +#define AR_IO_GPIO_4B2_SWPORTA_DDR 0x80017e04 +#pragma Aux_register(0x80017e04, name=>"io_gpio_4b2_swporta_ddr") + +// User extension aux register io_gpio_4b2_inten +#define AR_IO_GPIO_4B2_INTEN 0x80017e30 +#pragma Aux_register(0x80017e30, name=>"io_gpio_4b2_inten") + +// User extension aux register io_gpio_4b2_intmask +#define AR_IO_GPIO_4B2_INTMASK 0x80017e34 +#pragma Aux_register(0x80017e34, name=>"io_gpio_4b2_intmask") + +// User extension aux register io_gpio_4b2_inttype_level +#define AR_IO_GPIO_4B2_INTTYPE_LEVEL 0x80017e38 +#pragma Aux_register(0x80017e38, name=>"io_gpio_4b2_inttype_level") + +// User extension aux register io_gpio_4b2_int_polarity +#define AR_IO_GPIO_4B2_INT_POLARITY 0x80017e3c +#pragma Aux_register(0x80017e3c, name=>"io_gpio_4b2_int_polarity") + +// User extension aux register io_gpio_4b2_intstatus +#define AR_IO_GPIO_4B2_INTSTATUS 0x80017e40 +#pragma Aux_register(0x80017e40, name=>"io_gpio_4b2_intstatus") + +// User extension aux register io_gpio_4b2_raw_intstatus +#define AR_IO_GPIO_4B2_RAW_INTSTATUS 0x80017e44 +#pragma Aux_register(0x80017e44, name=>"io_gpio_4b2_raw_intstatus") + +// User extension aux register io_gpio_4b2_porta_eoi +#define AR_IO_GPIO_4B2_PORTA_EOI 0x80017e4c +#pragma Aux_register(0x80017e4c, name=>"io_gpio_4b2_porta_eoi") + +// User extension aux register io_gpio_4b2_ext_porta +#define AR_IO_GPIO_4B2_EXT_PORTA 0x80017e50 +#pragma Aux_register(0x80017e50, name=>"io_gpio_4b2_ext_porta") + +// User extension aux register io_gpio_4b2_ls_sync +#define AR_IO_GPIO_4B2_LS_SYNC 0x80017e60 +#pragma Aux_register(0x80017e60, name=>"io_gpio_4b2_ls_sync") + +// User extension aux register io_gpio_4b2_int_bothedge +#define AR_IO_GPIO_4B2_INT_BOTHEDGE 0x80017e68 +#pragma Aux_register(0x80017e68, name=>"io_gpio_4b2_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B0_IO_GPIO_8B0_PRESENT 1 + +// User extension aux register io_gpio_8b0_debounce +#define AR_IO_GPIO_8B0_DEBOUNCE 0x80017848 +#pragma Aux_register(0x80017848, name=>"io_gpio_8b0_debounce") + +// User extension aux register io_gpio_8b0_clken +#define AR_IO_GPIO_8B0_CLKEN 0x80017880 +#pragma Aux_register(0x80017880, name=>"io_gpio_8b0_clken") + +// User extension aux register io_gpio_8b0_swporta_dr +#define AR_IO_GPIO_8B0_SWPORTA_DR 0x80017800 +#pragma Aux_register(0x80017800, name=>"io_gpio_8b0_swporta_dr") + +// User extension aux register io_gpio_8b0_swporta_ddr +#define AR_IO_GPIO_8B0_SWPORTA_DDR 0x80017804 +#pragma Aux_register(0x80017804, name=>"io_gpio_8b0_swporta_ddr") + +// User extension aux register io_gpio_8b0_inten +#define AR_IO_GPIO_8B0_INTEN 0x80017830 +#pragma Aux_register(0x80017830, name=>"io_gpio_8b0_inten") + +// User extension aux register io_gpio_8b0_intmask +#define AR_IO_GPIO_8B0_INTMASK 0x80017834 +#pragma Aux_register(0x80017834, name=>"io_gpio_8b0_intmask") + +// User extension aux register io_gpio_8b0_inttype_level +#define AR_IO_GPIO_8B0_INTTYPE_LEVEL 0x80017838 +#pragma Aux_register(0x80017838, name=>"io_gpio_8b0_inttype_level") + +// User extension aux register io_gpio_8b0_int_polarity +#define AR_IO_GPIO_8B0_INT_POLARITY 0x8001783c +#pragma Aux_register(0x8001783c, name=>"io_gpio_8b0_int_polarity") + +// User extension aux register io_gpio_8b0_intstatus +#define AR_IO_GPIO_8B0_INTSTATUS 0x80017840 +#pragma Aux_register(0x80017840, name=>"io_gpio_8b0_intstatus") + +// User extension aux register io_gpio_8b0_raw_intstatus +#define AR_IO_GPIO_8B0_RAW_INTSTATUS 0x80017844 +#pragma Aux_register(0x80017844, name=>"io_gpio_8b0_raw_intstatus") + +// User extension aux register io_gpio_8b0_porta_eoi +#define AR_IO_GPIO_8B0_PORTA_EOI 0x8001784c +#pragma Aux_register(0x8001784c, name=>"io_gpio_8b0_porta_eoi") + +// User extension aux register io_gpio_8b0_ext_porta +#define AR_IO_GPIO_8B0_EXT_PORTA 0x80017850 +#pragma Aux_register(0x80017850, name=>"io_gpio_8b0_ext_porta") + +// User extension aux register io_gpio_8b0_ls_sync +#define AR_IO_GPIO_8B0_LS_SYNC 0x80017860 +#pragma Aux_register(0x80017860, name=>"io_gpio_8b0_ls_sync") + +// User extension aux register io_gpio_8b0_int_bothedge +#define AR_IO_GPIO_8B0_INT_BOTHEDGE 0x80017868 +#pragma Aux_register(0x80017868, name=>"io_gpio_8b0_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B1_IO_GPIO_8B1_PRESENT 1 + +// User extension aux register io_gpio_8b1_debounce +#define AR_IO_GPIO_8B1_DEBOUNCE 0x80017948 +#pragma Aux_register(0x80017948, name=>"io_gpio_8b1_debounce") + +// User extension aux register io_gpio_8b1_clken +#define AR_IO_GPIO_8B1_CLKEN 0x80017980 +#pragma Aux_register(0x80017980, name=>"io_gpio_8b1_clken") + +// User extension aux register io_gpio_8b1_swporta_dr +#define AR_IO_GPIO_8B1_SWPORTA_DR 0x80017900 +#pragma Aux_register(0x80017900, name=>"io_gpio_8b1_swporta_dr") + +// User extension aux register io_gpio_8b1_swporta_ddr +#define AR_IO_GPIO_8B1_SWPORTA_DDR 0x80017904 +#pragma Aux_register(0x80017904, name=>"io_gpio_8b1_swporta_ddr") + +// User extension aux register io_gpio_8b1_inten +#define AR_IO_GPIO_8B1_INTEN 0x80017930 +#pragma Aux_register(0x80017930, name=>"io_gpio_8b1_inten") + +// User extension aux register io_gpio_8b1_intmask +#define AR_IO_GPIO_8B1_INTMASK 0x80017934 +#pragma Aux_register(0x80017934, name=>"io_gpio_8b1_intmask") + +// User extension aux register io_gpio_8b1_inttype_level +#define AR_IO_GPIO_8B1_INTTYPE_LEVEL 0x80017938 +#pragma Aux_register(0x80017938, name=>"io_gpio_8b1_inttype_level") + +// User extension aux register io_gpio_8b1_int_polarity +#define AR_IO_GPIO_8B1_INT_POLARITY 0x8001793c +#pragma Aux_register(0x8001793c, name=>"io_gpio_8b1_int_polarity") + +// User extension aux register io_gpio_8b1_intstatus +#define AR_IO_GPIO_8B1_INTSTATUS 0x80017940 +#pragma Aux_register(0x80017940, name=>"io_gpio_8b1_intstatus") + +// User extension aux register io_gpio_8b1_raw_intstatus +#define AR_IO_GPIO_8B1_RAW_INTSTATUS 0x80017944 +#pragma Aux_register(0x80017944, name=>"io_gpio_8b1_raw_intstatus") + +// User extension aux register io_gpio_8b1_porta_eoi +#define AR_IO_GPIO_8B1_PORTA_EOI 0x8001794c +#pragma Aux_register(0x8001794c, name=>"io_gpio_8b1_porta_eoi") + +// User extension aux register io_gpio_8b1_ext_porta +#define AR_IO_GPIO_8B1_EXT_PORTA 0x80017950 +#pragma Aux_register(0x80017950, name=>"io_gpio_8b1_ext_porta") + +// User extension aux register io_gpio_8b1_ls_sync +#define AR_IO_GPIO_8B1_LS_SYNC 0x80017960 +#pragma Aux_register(0x80017960, name=>"io_gpio_8b1_ls_sync") + +// User extension aux register io_gpio_8b1_int_bothedge +#define AR_IO_GPIO_8B1_INT_BOTHEDGE 0x80017968 +#pragma Aux_register(0x80017968, name=>"io_gpio_8b1_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B2_IO_GPIO_8B2_PRESENT 1 + +// User extension aux register io_gpio_8b2_debounce +#define AR_IO_GPIO_8B2_DEBOUNCE 0x80017a48 +#pragma Aux_register(0x80017a48, name=>"io_gpio_8b2_debounce") + +// User extension aux register io_gpio_8b2_clken +#define AR_IO_GPIO_8B2_CLKEN 0x80017a80 +#pragma Aux_register(0x80017a80, name=>"io_gpio_8b2_clken") + +// User extension aux register io_gpio_8b2_swporta_dr +#define AR_IO_GPIO_8B2_SWPORTA_DR 0x80017a00 +#pragma Aux_register(0x80017a00, name=>"io_gpio_8b2_swporta_dr") + +// User extension aux register io_gpio_8b2_swporta_ddr +#define AR_IO_GPIO_8B2_SWPORTA_DDR 0x80017a04 +#pragma Aux_register(0x80017a04, name=>"io_gpio_8b2_swporta_ddr") + +// User extension aux register io_gpio_8b2_inten +#define AR_IO_GPIO_8B2_INTEN 0x80017a30 +#pragma Aux_register(0x80017a30, name=>"io_gpio_8b2_inten") + +// User extension aux register io_gpio_8b2_intmask +#define AR_IO_GPIO_8B2_INTMASK 0x80017a34 +#pragma Aux_register(0x80017a34, name=>"io_gpio_8b2_intmask") + +// User extension aux register io_gpio_8b2_inttype_level +#define AR_IO_GPIO_8B2_INTTYPE_LEVEL 0x80017a38 +#pragma Aux_register(0x80017a38, name=>"io_gpio_8b2_inttype_level") + +// User extension aux register io_gpio_8b2_int_polarity +#define AR_IO_GPIO_8B2_INT_POLARITY 0x80017a3c +#pragma Aux_register(0x80017a3c, name=>"io_gpio_8b2_int_polarity") + +// User extension aux register io_gpio_8b2_intstatus +#define AR_IO_GPIO_8B2_INTSTATUS 0x80017a40 +#pragma Aux_register(0x80017a40, name=>"io_gpio_8b2_intstatus") + +// User extension aux register io_gpio_8b2_raw_intstatus +#define AR_IO_GPIO_8B2_RAW_INTSTATUS 0x80017a44 +#pragma Aux_register(0x80017a44, name=>"io_gpio_8b2_raw_intstatus") + +// User extension aux register io_gpio_8b2_porta_eoi +#define AR_IO_GPIO_8B2_PORTA_EOI 0x80017a4c +#pragma Aux_register(0x80017a4c, name=>"io_gpio_8b2_porta_eoi") + +// User extension aux register io_gpio_8b2_ext_porta +#define AR_IO_GPIO_8B2_EXT_PORTA 0x80017a50 +#pragma Aux_register(0x80017a50, name=>"io_gpio_8b2_ext_porta") + +// User extension aux register io_gpio_8b2_ls_sync +#define AR_IO_GPIO_8B2_LS_SYNC 0x80017a60 +#pragma Aux_register(0x80017a60, name=>"io_gpio_8b2_ls_sync") + +// User extension aux register io_gpio_8b2_int_bothedge +#define AR_IO_GPIO_8B2_INT_BOTHEDGE 0x80017a68 +#pragma Aux_register(0x80017a68, name=>"io_gpio_8b2_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B3_IO_GPIO_8B3_PRESENT 1 + +// User extension aux register io_gpio_8b3_debounce +#define AR_IO_GPIO_8B3_DEBOUNCE 0x80017b48 +#pragma Aux_register(0x80017b48, name=>"io_gpio_8b3_debounce") + +// User extension aux register io_gpio_8b3_clken +#define AR_IO_GPIO_8B3_CLKEN 0x80017b80 +#pragma Aux_register(0x80017b80, name=>"io_gpio_8b3_clken") + +// User extension aux register io_gpio_8b3_swporta_dr +#define AR_IO_GPIO_8B3_SWPORTA_DR 0x80017b00 +#pragma Aux_register(0x80017b00, name=>"io_gpio_8b3_swporta_dr") + +// User extension aux register io_gpio_8b3_swporta_ddr +#define AR_IO_GPIO_8B3_SWPORTA_DDR 0x80017b04 +#pragma Aux_register(0x80017b04, name=>"io_gpio_8b3_swporta_ddr") + +// User extension aux register io_gpio_8b3_inten +#define AR_IO_GPIO_8B3_INTEN 0x80017b30 +#pragma Aux_register(0x80017b30, name=>"io_gpio_8b3_inten") + +// User extension aux register io_gpio_8b3_intmask +#define AR_IO_GPIO_8B3_INTMASK 0x80017b34 +#pragma Aux_register(0x80017b34, name=>"io_gpio_8b3_intmask") + +// User extension aux register io_gpio_8b3_inttype_level +#define AR_IO_GPIO_8B3_INTTYPE_LEVEL 0x80017b38 +#pragma Aux_register(0x80017b38, name=>"io_gpio_8b3_inttype_level") + +// User extension aux register io_gpio_8b3_int_polarity +#define AR_IO_GPIO_8B3_INT_POLARITY 0x80017b3c +#pragma Aux_register(0x80017b3c, name=>"io_gpio_8b3_int_polarity") + +// User extension aux register io_gpio_8b3_intstatus +#define AR_IO_GPIO_8B3_INTSTATUS 0x80017b40 +#pragma Aux_register(0x80017b40, name=>"io_gpio_8b3_intstatus") + +// User extension aux register io_gpio_8b3_raw_intstatus +#define AR_IO_GPIO_8B3_RAW_INTSTATUS 0x80017b44 +#pragma Aux_register(0x80017b44, name=>"io_gpio_8b3_raw_intstatus") + +// User extension aux register io_gpio_8b3_porta_eoi +#define AR_IO_GPIO_8B3_PORTA_EOI 0x80017b4c +#pragma Aux_register(0x80017b4c, name=>"io_gpio_8b3_porta_eoi") + +// User extension aux register io_gpio_8b3_ext_porta +#define AR_IO_GPIO_8B3_EXT_PORTA 0x80017b50 +#pragma Aux_register(0x80017b50, name=>"io_gpio_8b3_ext_porta") + +// User extension aux register io_gpio_8b3_ls_sync +#define AR_IO_GPIO_8B3_LS_SYNC 0x80017b60 +#pragma Aux_register(0x80017b60, name=>"io_gpio_8b3_ls_sync") + +// User extension aux register io_gpio_8b3_int_bothedge +#define AR_IO_GPIO_8B3_INT_BOTHEDGE 0x80017b68 +#pragma Aux_register(0x80017b68, name=>"io_gpio_8b3_int_bothedge") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST0_IO_I2C_MST0_PRESENT 1 + +// User extension aux register io_i2c_mst0_clken +#define AR_IO_I2C_MST0_CLKEN 0x800120c0 +#pragma Aux_register(0x800120c0, name=>"io_i2c_mst0_clken") + +// User extension aux register io_i2c_mst0_con +#define AR_IO_I2C_MST0_CON 0x80012000 +#pragma Aux_register(0x80012000, name=>"io_i2c_mst0_con") + +// User extension aux register io_i2c_mst0_tar +#define AR_IO_I2C_MST0_TAR 0x80012004 +#pragma Aux_register(0x80012004, name=>"io_i2c_mst0_tar") + +// User extension aux register io_i2c_mst0_data_cmd +#define AR_IO_I2C_MST0_DATA_CMD 0x80012010 +#pragma Aux_register(0x80012010, name=>"io_i2c_mst0_data_cmd") + +// User extension aux register io_i2c_mst0_ss_scl_hcnt +#define AR_IO_I2C_MST0_SS_SCL_HCNT 0x80012014 +#pragma Aux_register(0x80012014, name=>"io_i2c_mst0_ss_scl_hcnt") + +// User extension aux register io_i2c_mst0_ss_scl_lcnt +#define AR_IO_I2C_MST0_SS_SCL_LCNT 0x80012018 +#pragma Aux_register(0x80012018, name=>"io_i2c_mst0_ss_scl_lcnt") + +// User extension aux register io_i2c_mst0_fs_scl_hcnt +#define AR_IO_I2C_MST0_FS_SCL_HCNT 0x8001201c +#pragma Aux_register(0x8001201c, name=>"io_i2c_mst0_fs_scl_hcnt") + +// User extension aux register io_i2c_mst0_fs_scl_lcnt +#define AR_IO_I2C_MST0_FS_SCL_LCNT 0x80012020 +#pragma Aux_register(0x80012020, name=>"io_i2c_mst0_fs_scl_lcnt") + +// User extension aux register io_i2c_mst0_intr_stat +#define AR_IO_I2C_MST0_INTR_STAT 0x8001202c +#pragma Aux_register(0x8001202c, name=>"io_i2c_mst0_intr_stat") + +// User extension aux register io_i2c_mst0_intr_mask +#define AR_IO_I2C_MST0_INTR_MASK 0x80012030 +#pragma Aux_register(0x80012030, name=>"io_i2c_mst0_intr_mask") + +// User extension aux register io_i2c_mst0_raw_intr_stat +#define AR_IO_I2C_MST0_RAW_INTR_STAT 0x80012034 +#pragma Aux_register(0x80012034, name=>"io_i2c_mst0_raw_intr_stat") + +// User extension aux register io_i2c_mst0_rx_tl +#define AR_IO_I2C_MST0_RX_TL 0x80012038 +#pragma Aux_register(0x80012038, name=>"io_i2c_mst0_rx_tl") + +// User extension aux register io_i2c_mst0_tx_tl +#define AR_IO_I2C_MST0_TX_TL 0x8001203c +#pragma Aux_register(0x8001203c, name=>"io_i2c_mst0_tx_tl") + +// User extension aux register io_i2c_mst0_clr_intr +#define AR_IO_I2C_MST0_CLR_INTR 0x80012040 +#pragma Aux_register(0x80012040, name=>"io_i2c_mst0_clr_intr") + +// User extension aux register io_i2c_mst0_clr_rx_under +#define AR_IO_I2C_MST0_CLR_RX_UNDER 0x80012044 +#pragma Aux_register(0x80012044, name=>"io_i2c_mst0_clr_rx_under") + +// User extension aux register io_i2c_mst0_clr_rx_over +#define AR_IO_I2C_MST0_CLR_RX_OVER 0x80012048 +#pragma Aux_register(0x80012048, name=>"io_i2c_mst0_clr_rx_over") + +// User extension aux register io_i2c_mst0_clr_tx_over +#define AR_IO_I2C_MST0_CLR_TX_OVER 0x8001204c +#pragma Aux_register(0x8001204c, name=>"io_i2c_mst0_clr_tx_over") + +// User extension aux register io_i2c_mst0_clr_tx_abrt +#define AR_IO_I2C_MST0_CLR_TX_ABRT 0x80012054 +#pragma Aux_register(0x80012054, name=>"io_i2c_mst0_clr_tx_abrt") + +// User extension aux register io_i2c_mst0_clr_activity +#define AR_IO_I2C_MST0_CLR_ACTIVITY 0x8001205c +#pragma Aux_register(0x8001205c, name=>"io_i2c_mst0_clr_activity") + +// User extension aux register io_i2c_mst0_clr_stop_det +#define AR_IO_I2C_MST0_CLR_STOP_DET 0x80012060 +#pragma Aux_register(0x80012060, name=>"io_i2c_mst0_clr_stop_det") + +// User extension aux register io_i2c_mst0_clr_start_det +#define AR_IO_I2C_MST0_CLR_START_DET 0x80012064 +#pragma Aux_register(0x80012064, name=>"io_i2c_mst0_clr_start_det") + +// User extension aux register io_i2c_mst0_enable +#define AR_IO_I2C_MST0_ENABLE 0x8001206c +#pragma Aux_register(0x8001206c, name=>"io_i2c_mst0_enable") + +// User extension aux register io_i2c_mst0_status +#define AR_IO_I2C_MST0_STATUS 0x80012070 +#pragma Aux_register(0x80012070, name=>"io_i2c_mst0_status") + +// User extension aux register io_i2c_mst0_txflr +#define AR_IO_I2C_MST0_TXFLR 0x80012074 +#pragma Aux_register(0x80012074, name=>"io_i2c_mst0_txflr") + +// User extension aux register io_i2c_mst0_rxflr +#define AR_IO_I2C_MST0_RXFLR 0x80012078 +#pragma Aux_register(0x80012078, name=>"io_i2c_mst0_rxflr") + +// User extension aux register io_i2c_mst0_sda_hold +#define AR_IO_I2C_MST0_SDA_HOLD 0x8001207c +#pragma Aux_register(0x8001207c, name=>"io_i2c_mst0_sda_hold") + +// User extension aux register io_i2c_mst0_tx_abrt_source +#define AR_IO_I2C_MST0_TX_ABRT_SOURCE 0x80012080 +#pragma Aux_register(0x80012080, name=>"io_i2c_mst0_tx_abrt_source") + +// User extension aux register io_i2c_mst0_enable_status +#define AR_IO_I2C_MST0_ENABLE_STATUS 0x8001209c +#pragma Aux_register(0x8001209c, name=>"io_i2c_mst0_enable_status") + +// User extension aux register io_i2c_mst0_fs_spklen +#define AR_IO_I2C_MST0_FS_SPKLEN 0x800120a0 +#pragma Aux_register(0x800120a0, name=>"io_i2c_mst0_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST1_IO_I2C_MST1_PRESENT 1 + +// User extension aux register io_i2c_mst1_clken +#define AR_IO_I2C_MST1_CLKEN 0x800121c0 +#pragma Aux_register(0x800121c0, name=>"io_i2c_mst1_clken") + +// User extension aux register io_i2c_mst1_con +#define AR_IO_I2C_MST1_CON 0x80012100 +#pragma Aux_register(0x80012100, name=>"io_i2c_mst1_con") + +// User extension aux register io_i2c_mst1_tar +#define AR_IO_I2C_MST1_TAR 0x80012104 +#pragma Aux_register(0x80012104, name=>"io_i2c_mst1_tar") + +// User extension aux register io_i2c_mst1_data_cmd +#define AR_IO_I2C_MST1_DATA_CMD 0x80012110 +#pragma Aux_register(0x80012110, name=>"io_i2c_mst1_data_cmd") + +// User extension aux register io_i2c_mst1_ss_scl_hcnt +#define AR_IO_I2C_MST1_SS_SCL_HCNT 0x80012114 +#pragma Aux_register(0x80012114, name=>"io_i2c_mst1_ss_scl_hcnt") + +// User extension aux register io_i2c_mst1_ss_scl_lcnt +#define AR_IO_I2C_MST1_SS_SCL_LCNT 0x80012118 +#pragma Aux_register(0x80012118, name=>"io_i2c_mst1_ss_scl_lcnt") + +// User extension aux register io_i2c_mst1_fs_scl_hcnt +#define AR_IO_I2C_MST1_FS_SCL_HCNT 0x8001211c +#pragma Aux_register(0x8001211c, name=>"io_i2c_mst1_fs_scl_hcnt") + +// User extension aux register io_i2c_mst1_fs_scl_lcnt +#define AR_IO_I2C_MST1_FS_SCL_LCNT 0x80012120 +#pragma Aux_register(0x80012120, name=>"io_i2c_mst1_fs_scl_lcnt") + +// User extension aux register io_i2c_mst1_intr_stat +#define AR_IO_I2C_MST1_INTR_STAT 0x8001212c +#pragma Aux_register(0x8001212c, name=>"io_i2c_mst1_intr_stat") + +// User extension aux register io_i2c_mst1_intr_mask +#define AR_IO_I2C_MST1_INTR_MASK 0x80012130 +#pragma Aux_register(0x80012130, name=>"io_i2c_mst1_intr_mask") + +// User extension aux register io_i2c_mst1_raw_intr_stat +#define AR_IO_I2C_MST1_RAW_INTR_STAT 0x80012134 +#pragma Aux_register(0x80012134, name=>"io_i2c_mst1_raw_intr_stat") + +// User extension aux register io_i2c_mst1_rx_tl +#define AR_IO_I2C_MST1_RX_TL 0x80012138 +#pragma Aux_register(0x80012138, name=>"io_i2c_mst1_rx_tl") + +// User extension aux register io_i2c_mst1_tx_tl +#define AR_IO_I2C_MST1_TX_TL 0x8001213c +#pragma Aux_register(0x8001213c, name=>"io_i2c_mst1_tx_tl") + +// User extension aux register io_i2c_mst1_clr_intr +#define AR_IO_I2C_MST1_CLR_INTR 0x80012140 +#pragma Aux_register(0x80012140, name=>"io_i2c_mst1_clr_intr") + +// User extension aux register io_i2c_mst1_clr_rx_under +#define AR_IO_I2C_MST1_CLR_RX_UNDER 0x80012144 +#pragma Aux_register(0x80012144, name=>"io_i2c_mst1_clr_rx_under") + +// User extension aux register io_i2c_mst1_clr_rx_over +#define AR_IO_I2C_MST1_CLR_RX_OVER 0x80012148 +#pragma Aux_register(0x80012148, name=>"io_i2c_mst1_clr_rx_over") + +// User extension aux register io_i2c_mst1_clr_tx_over +#define AR_IO_I2C_MST1_CLR_TX_OVER 0x8001214c +#pragma Aux_register(0x8001214c, name=>"io_i2c_mst1_clr_tx_over") + +// User extension aux register io_i2c_mst1_clr_tx_abrt +#define AR_IO_I2C_MST1_CLR_TX_ABRT 0x80012154 +#pragma Aux_register(0x80012154, name=>"io_i2c_mst1_clr_tx_abrt") + +// User extension aux register io_i2c_mst1_clr_activity +#define AR_IO_I2C_MST1_CLR_ACTIVITY 0x8001215c +#pragma Aux_register(0x8001215c, name=>"io_i2c_mst1_clr_activity") + +// User extension aux register io_i2c_mst1_clr_stop_det +#define AR_IO_I2C_MST1_CLR_STOP_DET 0x80012160 +#pragma Aux_register(0x80012160, name=>"io_i2c_mst1_clr_stop_det") + +// User extension aux register io_i2c_mst1_clr_start_det +#define AR_IO_I2C_MST1_CLR_START_DET 0x80012164 +#pragma Aux_register(0x80012164, name=>"io_i2c_mst1_clr_start_det") + +// User extension aux register io_i2c_mst1_enable +#define AR_IO_I2C_MST1_ENABLE 0x8001216c +#pragma Aux_register(0x8001216c, name=>"io_i2c_mst1_enable") + +// User extension aux register io_i2c_mst1_status +#define AR_IO_I2C_MST1_STATUS 0x80012170 +#pragma Aux_register(0x80012170, name=>"io_i2c_mst1_status") + +// User extension aux register io_i2c_mst1_txflr +#define AR_IO_I2C_MST1_TXFLR 0x80012174 +#pragma Aux_register(0x80012174, name=>"io_i2c_mst1_txflr") + +// User extension aux register io_i2c_mst1_rxflr +#define AR_IO_I2C_MST1_RXFLR 0x80012178 +#pragma Aux_register(0x80012178, name=>"io_i2c_mst1_rxflr") + +// User extension aux register io_i2c_mst1_sda_hold +#define AR_IO_I2C_MST1_SDA_HOLD 0x8001217c +#pragma Aux_register(0x8001217c, name=>"io_i2c_mst1_sda_hold") + +// User extension aux register io_i2c_mst1_tx_abrt_source +#define AR_IO_I2C_MST1_TX_ABRT_SOURCE 0x80012180 +#pragma Aux_register(0x80012180, name=>"io_i2c_mst1_tx_abrt_source") + +// User extension aux register io_i2c_mst1_enable_status +#define AR_IO_I2C_MST1_ENABLE_STATUS 0x8001219c +#pragma Aux_register(0x8001219c, name=>"io_i2c_mst1_enable_status") + +// User extension aux register io_i2c_mst1_fs_spklen +#define AR_IO_I2C_MST1_FS_SPKLEN 0x800121a0 +#pragma Aux_register(0x800121a0, name=>"io_i2c_mst1_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST2_IO_I2C_MST2_PRESENT 1 + +// User extension aux register io_i2c_mst2_clken +#define AR_IO_I2C_MST2_CLKEN 0x800122c0 +#pragma Aux_register(0x800122c0, name=>"io_i2c_mst2_clken") + +// User extension aux register io_i2c_mst2_con +#define AR_IO_I2C_MST2_CON 0x80012200 +#pragma Aux_register(0x80012200, name=>"io_i2c_mst2_con") + +// User extension aux register io_i2c_mst2_tar +#define AR_IO_I2C_MST2_TAR 0x80012204 +#pragma Aux_register(0x80012204, name=>"io_i2c_mst2_tar") + +// User extension aux register io_i2c_mst2_data_cmd +#define AR_IO_I2C_MST2_DATA_CMD 0x80012210 +#pragma Aux_register(0x80012210, name=>"io_i2c_mst2_data_cmd") + +// User extension aux register io_i2c_mst2_ss_scl_hcnt +#define AR_IO_I2C_MST2_SS_SCL_HCNT 0x80012214 +#pragma Aux_register(0x80012214, name=>"io_i2c_mst2_ss_scl_hcnt") + +// User extension aux register io_i2c_mst2_ss_scl_lcnt +#define AR_IO_I2C_MST2_SS_SCL_LCNT 0x80012218 +#pragma Aux_register(0x80012218, name=>"io_i2c_mst2_ss_scl_lcnt") + +// User extension aux register io_i2c_mst2_fs_scl_hcnt +#define AR_IO_I2C_MST2_FS_SCL_HCNT 0x8001221c +#pragma Aux_register(0x8001221c, name=>"io_i2c_mst2_fs_scl_hcnt") + +// User extension aux register io_i2c_mst2_fs_scl_lcnt +#define AR_IO_I2C_MST2_FS_SCL_LCNT 0x80012220 +#pragma Aux_register(0x80012220, name=>"io_i2c_mst2_fs_scl_lcnt") + +// User extension aux register io_i2c_mst2_intr_stat +#define AR_IO_I2C_MST2_INTR_STAT 0x8001222c +#pragma Aux_register(0x8001222c, name=>"io_i2c_mst2_intr_stat") + +// User extension aux register io_i2c_mst2_intr_mask +#define AR_IO_I2C_MST2_INTR_MASK 0x80012230 +#pragma Aux_register(0x80012230, name=>"io_i2c_mst2_intr_mask") + +// User extension aux register io_i2c_mst2_raw_intr_stat +#define AR_IO_I2C_MST2_RAW_INTR_STAT 0x80012234 +#pragma Aux_register(0x80012234, name=>"io_i2c_mst2_raw_intr_stat") + +// User extension aux register io_i2c_mst2_rx_tl +#define AR_IO_I2C_MST2_RX_TL 0x80012238 +#pragma Aux_register(0x80012238, name=>"io_i2c_mst2_rx_tl") + +// User extension aux register io_i2c_mst2_tx_tl +#define AR_IO_I2C_MST2_TX_TL 0x8001223c +#pragma Aux_register(0x8001223c, name=>"io_i2c_mst2_tx_tl") + +// User extension aux register io_i2c_mst2_clr_intr +#define AR_IO_I2C_MST2_CLR_INTR 0x80012240 +#pragma Aux_register(0x80012240, name=>"io_i2c_mst2_clr_intr") + +// User extension aux register io_i2c_mst2_clr_rx_under +#define AR_IO_I2C_MST2_CLR_RX_UNDER 0x80012244 +#pragma Aux_register(0x80012244, name=>"io_i2c_mst2_clr_rx_under") + +// User extension aux register io_i2c_mst2_clr_rx_over +#define AR_IO_I2C_MST2_CLR_RX_OVER 0x80012248 +#pragma Aux_register(0x80012248, name=>"io_i2c_mst2_clr_rx_over") + +// User extension aux register io_i2c_mst2_clr_tx_over +#define AR_IO_I2C_MST2_CLR_TX_OVER 0x8001224c +#pragma Aux_register(0x8001224c, name=>"io_i2c_mst2_clr_tx_over") + +// User extension aux register io_i2c_mst2_clr_tx_abrt +#define AR_IO_I2C_MST2_CLR_TX_ABRT 0x80012254 +#pragma Aux_register(0x80012254, name=>"io_i2c_mst2_clr_tx_abrt") + +// User extension aux register io_i2c_mst2_clr_activity +#define AR_IO_I2C_MST2_CLR_ACTIVITY 0x8001225c +#pragma Aux_register(0x8001225c, name=>"io_i2c_mst2_clr_activity") + +// User extension aux register io_i2c_mst2_clr_stop_det +#define AR_IO_I2C_MST2_CLR_STOP_DET 0x80012260 +#pragma Aux_register(0x80012260, name=>"io_i2c_mst2_clr_stop_det") + +// User extension aux register io_i2c_mst2_clr_start_det +#define AR_IO_I2C_MST2_CLR_START_DET 0x80012264 +#pragma Aux_register(0x80012264, name=>"io_i2c_mst2_clr_start_det") + +// User extension aux register io_i2c_mst2_enable +#define AR_IO_I2C_MST2_ENABLE 0x8001226c +#pragma Aux_register(0x8001226c, name=>"io_i2c_mst2_enable") + +// User extension aux register io_i2c_mst2_status +#define AR_IO_I2C_MST2_STATUS 0x80012270 +#pragma Aux_register(0x80012270, name=>"io_i2c_mst2_status") + +// User extension aux register io_i2c_mst2_txflr +#define AR_IO_I2C_MST2_TXFLR 0x80012274 +#pragma Aux_register(0x80012274, name=>"io_i2c_mst2_txflr") + +// User extension aux register io_i2c_mst2_rxflr +#define AR_IO_I2C_MST2_RXFLR 0x80012278 +#pragma Aux_register(0x80012278, name=>"io_i2c_mst2_rxflr") + +// User extension aux register io_i2c_mst2_sda_hold +#define AR_IO_I2C_MST2_SDA_HOLD 0x8001227c +#pragma Aux_register(0x8001227c, name=>"io_i2c_mst2_sda_hold") + +// User extension aux register io_i2c_mst2_tx_abrt_source +#define AR_IO_I2C_MST2_TX_ABRT_SOURCE 0x80012280 +#pragma Aux_register(0x80012280, name=>"io_i2c_mst2_tx_abrt_source") + +// User extension aux register io_i2c_mst2_enable_status +#define AR_IO_I2C_MST2_ENABLE_STATUS 0x8001229c +#pragma Aux_register(0x8001229c, name=>"io_i2c_mst2_enable_status") + +// User extension aux register io_i2c_mst2_fs_spklen +#define AR_IO_I2C_MST2_FS_SPKLEN 0x800122a0 +#pragma Aux_register(0x800122a0, name=>"io_i2c_mst2_fs_spklen") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST0_IO_SPI_MST0_PRESENT 1 + +// User extension aux register io_spi_mst0_ctrlr0 +#define AR_IO_SPI_MST0_CTRLR0 0x80010000 +#pragma Aux_register(0x80010000, name=>"io_spi_mst0_ctrlr0") + +// User extension aux register io_spi_mst0_ctrlr1 +#define AR_IO_SPI_MST0_CTRLR1 0x80010001 +#pragma Aux_register(0x80010001, name=>"io_spi_mst0_ctrlr1") + +// User extension aux register io_spi_mst0_spien +#define AR_IO_SPI_MST0_SPIEN 0x80010002 +#pragma Aux_register(0x80010002, name=>"io_spi_mst0_spien") + +// User extension aux register io_spi_mst0_ser +#define AR_IO_SPI_MST0_SER 0x80010004 +#pragma Aux_register(0x80010004, name=>"io_spi_mst0_ser") + +// User extension aux register io_spi_mst0_baudr +#define AR_IO_SPI_MST0_BAUDR 0x80010005 +#pragma Aux_register(0x80010005, name=>"io_spi_mst0_baudr") + +// User extension aux register io_spi_mst0_txftlr +#define AR_IO_SPI_MST0_TXFTLR 0x80010006 +#pragma Aux_register(0x80010006, name=>"io_spi_mst0_txftlr") + +// User extension aux register io_spi_mst0_rxftlr +#define AR_IO_SPI_MST0_RXFTLR 0x80010007 +#pragma Aux_register(0x80010007, name=>"io_spi_mst0_rxftlr") + +// User extension aux register io_spi_mst0_txflr +#define AR_IO_SPI_MST0_TXFLR 0x80010008 +#pragma Aux_register(0x80010008, name=>"io_spi_mst0_txflr") + +// User extension aux register io_spi_mst0_rxflr +#define AR_IO_SPI_MST0_RXFLR 0x80010009 +#pragma Aux_register(0x80010009, name=>"io_spi_mst0_rxflr") + +// User extension aux register io_spi_mst0_sr +#define AR_IO_SPI_MST0_SR 0x8001000a +#pragma Aux_register(0x8001000a, name=>"io_spi_mst0_sr") + +// User extension aux register io_spi_mst0_imr +#define AR_IO_SPI_MST0_IMR 0x8001000b +#pragma Aux_register(0x8001000b, name=>"io_spi_mst0_imr") + +// User extension aux register io_spi_mst0_isr +#define AR_IO_SPI_MST0_ISR 0x8001000c +#pragma Aux_register(0x8001000c, name=>"io_spi_mst0_isr") + +// User extension aux register io_spi_mst0_risr +#define AR_IO_SPI_MST0_RISR 0x8001000d +#pragma Aux_register(0x8001000d, name=>"io_spi_mst0_risr") + +// User extension aux register io_spi_mst0_txoicr +#define AR_IO_SPI_MST0_TXOICR 0x8001000e +#pragma Aux_register(0x8001000e, name=>"io_spi_mst0_txoicr") + +// User extension aux register io_spi_mst0_rxoicr +#define AR_IO_SPI_MST0_RXOICR 0x8001000f +#pragma Aux_register(0x8001000f, name=>"io_spi_mst0_rxoicr") + +// User extension aux register io_spi_mst0_rxuicr +#define AR_IO_SPI_MST0_RXUICR 0x80010010 +#pragma Aux_register(0x80010010, name=>"io_spi_mst0_rxuicr") + +// User extension aux register io_spi_mst0_icr +#define AR_IO_SPI_MST0_ICR 0x80010012 +#pragma Aux_register(0x80010012, name=>"io_spi_mst0_icr") + +// User extension aux register io_spi_mst0_clken +#define AR_IO_SPI_MST0_CLKEN 0x80010016 +#pragma Aux_register(0x80010016, name=>"io_spi_mst0_clken") + +// User extension aux register io_spi_mst0_dr +#define AR_IO_SPI_MST0_DR 0x80010018 +#pragma Aux_register(0x80010018, name=>"io_spi_mst0_dr") + +// User extension aux register io_spi_mst0_rx_sample_dly +#define AR_IO_SPI_MST0_RX_SAMPLE_DLY 0x8001003c +#pragma Aux_register(0x8001003c, name=>"io_spi_mst0_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST1_IO_SPI_MST1_PRESENT 1 + +// User extension aux register io_spi_mst1_ctrlr0 +#define AR_IO_SPI_MST1_CTRLR0 0x80010100 +#pragma Aux_register(0x80010100, name=>"io_spi_mst1_ctrlr0") + +// User extension aux register io_spi_mst1_ctrlr1 +#define AR_IO_SPI_MST1_CTRLR1 0x80010101 +#pragma Aux_register(0x80010101, name=>"io_spi_mst1_ctrlr1") + +// User extension aux register io_spi_mst1_spien +#define AR_IO_SPI_MST1_SPIEN 0x80010102 +#pragma Aux_register(0x80010102, name=>"io_spi_mst1_spien") + +// User extension aux register io_spi_mst1_ser +#define AR_IO_SPI_MST1_SER 0x80010104 +#pragma Aux_register(0x80010104, name=>"io_spi_mst1_ser") + +// User extension aux register io_spi_mst1_baudr +#define AR_IO_SPI_MST1_BAUDR 0x80010105 +#pragma Aux_register(0x80010105, name=>"io_spi_mst1_baudr") + +// User extension aux register io_spi_mst1_txftlr +#define AR_IO_SPI_MST1_TXFTLR 0x80010106 +#pragma Aux_register(0x80010106, name=>"io_spi_mst1_txftlr") + +// User extension aux register io_spi_mst1_rxftlr +#define AR_IO_SPI_MST1_RXFTLR 0x80010107 +#pragma Aux_register(0x80010107, name=>"io_spi_mst1_rxftlr") + +// User extension aux register io_spi_mst1_txflr +#define AR_IO_SPI_MST1_TXFLR 0x80010108 +#pragma Aux_register(0x80010108, name=>"io_spi_mst1_txflr") + +// User extension aux register io_spi_mst1_rxflr +#define AR_IO_SPI_MST1_RXFLR 0x80010109 +#pragma Aux_register(0x80010109, name=>"io_spi_mst1_rxflr") + +// User extension aux register io_spi_mst1_sr +#define AR_IO_SPI_MST1_SR 0x8001010a +#pragma Aux_register(0x8001010a, name=>"io_spi_mst1_sr") + +// User extension aux register io_spi_mst1_imr +#define AR_IO_SPI_MST1_IMR 0x8001010b +#pragma Aux_register(0x8001010b, name=>"io_spi_mst1_imr") + +// User extension aux register io_spi_mst1_isr +#define AR_IO_SPI_MST1_ISR 0x8001010c +#pragma Aux_register(0x8001010c, name=>"io_spi_mst1_isr") + +// User extension aux register io_spi_mst1_risr +#define AR_IO_SPI_MST1_RISR 0x8001010d +#pragma Aux_register(0x8001010d, name=>"io_spi_mst1_risr") + +// User extension aux register io_spi_mst1_txoicr +#define AR_IO_SPI_MST1_TXOICR 0x8001010e +#pragma Aux_register(0x8001010e, name=>"io_spi_mst1_txoicr") + +// User extension aux register io_spi_mst1_rxoicr +#define AR_IO_SPI_MST1_RXOICR 0x8001010f +#pragma Aux_register(0x8001010f, name=>"io_spi_mst1_rxoicr") + +// User extension aux register io_spi_mst1_rxuicr +#define AR_IO_SPI_MST1_RXUICR 0x80010110 +#pragma Aux_register(0x80010110, name=>"io_spi_mst1_rxuicr") + +// User extension aux register io_spi_mst1_icr +#define AR_IO_SPI_MST1_ICR 0x80010112 +#pragma Aux_register(0x80010112, name=>"io_spi_mst1_icr") + +// User extension aux register io_spi_mst1_clken +#define AR_IO_SPI_MST1_CLKEN 0x80010116 +#pragma Aux_register(0x80010116, name=>"io_spi_mst1_clken") + +// User extension aux register io_spi_mst1_dr +#define AR_IO_SPI_MST1_DR 0x80010118 +#pragma Aux_register(0x80010118, name=>"io_spi_mst1_dr") + +// User extension aux register io_spi_mst1_rx_sample_dly +#define AR_IO_SPI_MST1_RX_SAMPLE_DLY 0x8001013c +#pragma Aux_register(0x8001013c, name=>"io_spi_mst1_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST2_IO_SPI_MST2_PRESENT 1 + +// User extension aux register io_spi_mst2_ctrlr0 +#define AR_IO_SPI_MST2_CTRLR0 0x80010200 +#pragma Aux_register(0x80010200, name=>"io_spi_mst2_ctrlr0") + +// User extension aux register io_spi_mst2_ctrlr1 +#define AR_IO_SPI_MST2_CTRLR1 0x80010201 +#pragma Aux_register(0x80010201, name=>"io_spi_mst2_ctrlr1") + +// User extension aux register io_spi_mst2_spien +#define AR_IO_SPI_MST2_SPIEN 0x80010202 +#pragma Aux_register(0x80010202, name=>"io_spi_mst2_spien") + +// User extension aux register io_spi_mst2_ser +#define AR_IO_SPI_MST2_SER 0x80010204 +#pragma Aux_register(0x80010204, name=>"io_spi_mst2_ser") + +// User extension aux register io_spi_mst2_baudr +#define AR_IO_SPI_MST2_BAUDR 0x80010205 +#pragma Aux_register(0x80010205, name=>"io_spi_mst2_baudr") + +// User extension aux register io_spi_mst2_txftlr +#define AR_IO_SPI_MST2_TXFTLR 0x80010206 +#pragma Aux_register(0x80010206, name=>"io_spi_mst2_txftlr") + +// User extension aux register io_spi_mst2_rxftlr +#define AR_IO_SPI_MST2_RXFTLR 0x80010207 +#pragma Aux_register(0x80010207, name=>"io_spi_mst2_rxftlr") + +// User extension aux register io_spi_mst2_txflr +#define AR_IO_SPI_MST2_TXFLR 0x80010208 +#pragma Aux_register(0x80010208, name=>"io_spi_mst2_txflr") + +// User extension aux register io_spi_mst2_rxflr +#define AR_IO_SPI_MST2_RXFLR 0x80010209 +#pragma Aux_register(0x80010209, name=>"io_spi_mst2_rxflr") + +// User extension aux register io_spi_mst2_sr +#define AR_IO_SPI_MST2_SR 0x8001020a +#pragma Aux_register(0x8001020a, name=>"io_spi_mst2_sr") + +// User extension aux register io_spi_mst2_imr +#define AR_IO_SPI_MST2_IMR 0x8001020b +#pragma Aux_register(0x8001020b, name=>"io_spi_mst2_imr") + +// User extension aux register io_spi_mst2_isr +#define AR_IO_SPI_MST2_ISR 0x8001020c +#pragma Aux_register(0x8001020c, name=>"io_spi_mst2_isr") + +// User extension aux register io_spi_mst2_risr +#define AR_IO_SPI_MST2_RISR 0x8001020d +#pragma Aux_register(0x8001020d, name=>"io_spi_mst2_risr") + +// User extension aux register io_spi_mst2_txoicr +#define AR_IO_SPI_MST2_TXOICR 0x8001020e +#pragma Aux_register(0x8001020e, name=>"io_spi_mst2_txoicr") + +// User extension aux register io_spi_mst2_rxoicr +#define AR_IO_SPI_MST2_RXOICR 0x8001020f +#pragma Aux_register(0x8001020f, name=>"io_spi_mst2_rxoicr") + +// User extension aux register io_spi_mst2_rxuicr +#define AR_IO_SPI_MST2_RXUICR 0x80010210 +#pragma Aux_register(0x80010210, name=>"io_spi_mst2_rxuicr") + +// User extension aux register io_spi_mst2_icr +#define AR_IO_SPI_MST2_ICR 0x80010212 +#pragma Aux_register(0x80010212, name=>"io_spi_mst2_icr") + +// User extension aux register io_spi_mst2_clken +#define AR_IO_SPI_MST2_CLKEN 0x80010216 +#pragma Aux_register(0x80010216, name=>"io_spi_mst2_clken") + +// User extension aux register io_spi_mst2_dr +#define AR_IO_SPI_MST2_DR 0x80010218 +#pragma Aux_register(0x80010218, name=>"io_spi_mst2_dr") + +// User extension aux register io_spi_mst2_rx_sample_dly +#define AR_IO_SPI_MST2_RX_SAMPLE_DLY 0x8001023c +#pragma Aux_register(0x8001023c, name=>"io_spi_mst2_rx_sample_dly") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_SLV0_IO_SPI_SLV0_PRESENT 1 + +// User extension aux register io_spi_slv0_ctrlr0 +#define AR_IO_SPI_SLV0_CTRLR0 0x80011000 +#pragma Aux_register(0x80011000, name=>"io_spi_slv0_ctrlr0") + +// User extension aux register io_spi_slv0_spien +#define AR_IO_SPI_SLV0_SPIEN 0x80011002 +#pragma Aux_register(0x80011002, name=>"io_spi_slv0_spien") + +// User extension aux register io_spi_slv0_txftlr +#define AR_IO_SPI_SLV0_TXFTLR 0x80011006 +#pragma Aux_register(0x80011006, name=>"io_spi_slv0_txftlr") + +// User extension aux register io_spi_slv0_rxftlr +#define AR_IO_SPI_SLV0_RXFTLR 0x80011007 +#pragma Aux_register(0x80011007, name=>"io_spi_slv0_rxftlr") + +// User extension aux register io_spi_slv0_txflr +#define AR_IO_SPI_SLV0_TXFLR 0x80011008 +#pragma Aux_register(0x80011008, name=>"io_spi_slv0_txflr") + +// User extension aux register io_spi_slv0_rxflr +#define AR_IO_SPI_SLV0_RXFLR 0x80011009 +#pragma Aux_register(0x80011009, name=>"io_spi_slv0_rxflr") + +// User extension aux register io_spi_slv0_sr +#define AR_IO_SPI_SLV0_SR 0x8001100a +#pragma Aux_register(0x8001100a, name=>"io_spi_slv0_sr") + +// User extension aux register io_spi_slv0_imr +#define AR_IO_SPI_SLV0_IMR 0x8001100b +#pragma Aux_register(0x8001100b, name=>"io_spi_slv0_imr") + +// User extension aux register io_spi_slv0_isr +#define AR_IO_SPI_SLV0_ISR 0x8001100c +#pragma Aux_register(0x8001100c, name=>"io_spi_slv0_isr") + +// User extension aux register io_spi_slv0_risr +#define AR_IO_SPI_SLV0_RISR 0x8001100d +#pragma Aux_register(0x8001100d, name=>"io_spi_slv0_risr") + +// User extension aux register io_spi_slv0_txoicr +#define AR_IO_SPI_SLV0_TXOICR 0x8001100e +#pragma Aux_register(0x8001100e, name=>"io_spi_slv0_txoicr") + +// User extension aux register io_spi_slv0_rxoicr +#define AR_IO_SPI_SLV0_RXOICR 0x8001100f +#pragma Aux_register(0x8001100f, name=>"io_spi_slv0_rxoicr") + +// User extension aux register io_spi_slv0_rxuicr +#define AR_IO_SPI_SLV0_RXUICR 0x80011010 +#pragma Aux_register(0x80011010, name=>"io_spi_slv0_rxuicr") + +// User extension aux register io_spi_slv0_icr +#define AR_IO_SPI_SLV0_ICR 0x80011012 +#pragma Aux_register(0x80011012, name=>"io_spi_slv0_icr") + +// User extension aux register io_spi_slv0_clken +#define AR_IO_SPI_SLV0_CLKEN 0x80011016 +#pragma Aux_register(0x80011016, name=>"io_spi_slv0_clken") + +// User extension aux register io_spi_slv0_dr +#define AR_IO_SPI_SLV0_DR 0x80011018 +#pragma Aux_register(0x80011018, name=>"io_spi_slv0_dr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART0_IO_UART0_PRESENT 1 + +// User extension aux register io_uart0_clken +#define AR_IO_UART0_CLKEN 0x800140c0 +#pragma Aux_register(0x800140c0, name=>"io_uart0_clken") + +// User extension aux register io_uart0_rbr_thr_dll +#define AR_IO_UART0_RBR_THR_DLL 0x80014000 +#pragma Aux_register(0x80014000, name=>"io_uart0_rbr_thr_dll") + +// User extension aux register io_uart0_ier_dlh +#define AR_IO_UART0_IER_DLH 0x80014004 +#pragma Aux_register(0x80014004, name=>"io_uart0_ier_dlh") + +// User extension aux register io_uart0_iir_fcr +#define AR_IO_UART0_IIR_FCR 0x80014008 +#pragma Aux_register(0x80014008, name=>"io_uart0_iir_fcr") + +// User extension aux register io_uart0_lcr +#define AR_IO_UART0_LCR 0x8001400c +#pragma Aux_register(0x8001400c, name=>"io_uart0_lcr") + +// User extension aux register io_uart0_mcr +#define AR_IO_UART0_MCR 0x80014010 +#pragma Aux_register(0x80014010, name=>"io_uart0_mcr") + +// User extension aux register io_uart0_lsr +#define AR_IO_UART0_LSR 0x80014014 +#pragma Aux_register(0x80014014, name=>"io_uart0_lsr") + +// User extension aux register io_uart0_msr +#define AR_IO_UART0_MSR 0x80014018 +#pragma Aux_register(0x80014018, name=>"io_uart0_msr") + +// User extension aux register io_uart0_usr +#define AR_IO_UART0_USR 0x8001407c +#pragma Aux_register(0x8001407c, name=>"io_uart0_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART1_IO_UART1_PRESENT 1 + +// User extension aux register io_uart1_clken +#define AR_IO_UART1_CLKEN 0x800141c0 +#pragma Aux_register(0x800141c0, name=>"io_uart1_clken") + +// User extension aux register io_uart1_rbr_thr_dll +#define AR_IO_UART1_RBR_THR_DLL 0x80014100 +#pragma Aux_register(0x80014100, name=>"io_uart1_rbr_thr_dll") + +// User extension aux register io_uart1_ier_dlh +#define AR_IO_UART1_IER_DLH 0x80014104 +#pragma Aux_register(0x80014104, name=>"io_uart1_ier_dlh") + +// User extension aux register io_uart1_iir_fcr +#define AR_IO_UART1_IIR_FCR 0x80014108 +#pragma Aux_register(0x80014108, name=>"io_uart1_iir_fcr") + +// User extension aux register io_uart1_lcr +#define AR_IO_UART1_LCR 0x8001410c +#pragma Aux_register(0x8001410c, name=>"io_uart1_lcr") + +// User extension aux register io_uart1_mcr +#define AR_IO_UART1_MCR 0x80014110 +#pragma Aux_register(0x80014110, name=>"io_uart1_mcr") + +// User extension aux register io_uart1_lsr +#define AR_IO_UART1_LSR 0x80014114 +#pragma Aux_register(0x80014114, name=>"io_uart1_lsr") + +// User extension aux register io_uart1_msr +#define AR_IO_UART1_MSR 0x80014118 +#pragma Aux_register(0x80014118, name=>"io_uart1_msr") + +// User extension aux register io_uart1_usr +#define AR_IO_UART1_USR 0x8001417c +#pragma Aux_register(0x8001417c, name=>"io_uart1_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART2_IO_UART2_PRESENT 1 + +// User extension aux register io_uart2_clken +#define AR_IO_UART2_CLKEN 0x800142c0 +#pragma Aux_register(0x800142c0, name=>"io_uart2_clken") + +// User extension aux register io_uart2_rbr_thr_dll +#define AR_IO_UART2_RBR_THR_DLL 0x80014200 +#pragma Aux_register(0x80014200, name=>"io_uart2_rbr_thr_dll") + +// User extension aux register io_uart2_ier_dlh +#define AR_IO_UART2_IER_DLH 0x80014204 +#pragma Aux_register(0x80014204, name=>"io_uart2_ier_dlh") + +// User extension aux register io_uart2_iir_fcr +#define AR_IO_UART2_IIR_FCR 0x80014208 +#pragma Aux_register(0x80014208, name=>"io_uart2_iir_fcr") + +// User extension aux register io_uart2_lcr +#define AR_IO_UART2_LCR 0x8001420c +#pragma Aux_register(0x8001420c, name=>"io_uart2_lcr") + +// User extension aux register io_uart2_mcr +#define AR_IO_UART2_MCR 0x80014210 +#pragma Aux_register(0x80014210, name=>"io_uart2_mcr") + +// User extension aux register io_uart2_lsr +#define AR_IO_UART2_LSR 0x80014214 +#pragma Aux_register(0x80014214, name=>"io_uart2_lsr") + +// User extension aux register io_uart2_msr +#define AR_IO_UART2_MSR 0x80014218 +#pragma Aux_register(0x80014218, name=>"io_uart2_msr") + +// User extension aux register io_uart2_usr +#define AR_IO_UART2_USR 0x8001427c +#pragma Aux_register(0x8001427c, name=>"io_uart2_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART3_IO_UART3_PRESENT 1 + +// User extension aux register io_uart3_clken +#define AR_IO_UART3_CLKEN 0x800143c0 +#pragma Aux_register(0x800143c0, name=>"io_uart3_clken") + +// User extension aux register io_uart3_rbr_thr_dll +#define AR_IO_UART3_RBR_THR_DLL 0x80014300 +#pragma Aux_register(0x80014300, name=>"io_uart3_rbr_thr_dll") + +// User extension aux register io_uart3_ier_dlh +#define AR_IO_UART3_IER_DLH 0x80014304 +#pragma Aux_register(0x80014304, name=>"io_uart3_ier_dlh") + +// User extension aux register io_uart3_iir_fcr +#define AR_IO_UART3_IIR_FCR 0x80014308 +#pragma Aux_register(0x80014308, name=>"io_uart3_iir_fcr") + +// User extension aux register io_uart3_lcr +#define AR_IO_UART3_LCR 0x8001430c +#pragma Aux_register(0x8001430c, name=>"io_uart3_lcr") + +// User extension aux register io_uart3_mcr +#define AR_IO_UART3_MCR 0x80014310 +#pragma Aux_register(0x80014310, name=>"io_uart3_mcr") + +// User extension aux register io_uart3_lsr +#define AR_IO_UART3_LSR 0x80014314 +#pragma Aux_register(0x80014314, name=>"io_uart3_lsr") + +// User extension aux register io_uart3_msr +#define AR_IO_UART3_MSR 0x80014318 +#pragma Aux_register(0x80014318, name=>"io_uart3_msr") + +// User extension aux register io_uart3_usr +#define AR_IO_UART3_USR 0x8001437c +#pragma Aux_register(0x8001437c, name=>"io_uart3_usr") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_CREG_MST0_IO_CREG_MST0_PRESENT 1 + +// User extension aux register io_creg_mst0_ctrl +#define AR_IO_CREG_MST0_CTRL 0x80018000 +#pragma Aux_register(0x80018000, name=>"io_creg_mst0_ctrl") +#define APEX_COM_ARC_HARDWARE_DFSS_IO_CREG_SLV0_IO_CREG_SLV0_PRESENT 1 + +// User extension aux register io_creg_slv0_obsr +#define AR_IO_CREG_SLV0_OBSR 0x80018080 +#pragma Aux_register(0x80018080, name=>"io_creg_slv0_obsr") +#define APEX_COM_ARC_HARDWARE_DFSS_SUBSYS_BCR_SUBSYS_BCR_PRESENT 1 + +// User extension aux register SUBSYS_BUILD +#define AR_SUBSYS_BUILD 0xf0 +#pragma Aux_register(0xf0, name=>"SUBSYS_BUILD") + +// User extension aux register SUBSYS_DSP_0_BUILD +#define AR_SUBSYS_DSP_0_BUILD 0xa00 +#pragma Aux_register(0xa00, name=>"SUBSYS_DSP_0_BUILD") + +// User extension aux register SUBSYS_DSP_0_CONFIG +#define AR_SUBSYS_DSP_0_CONFIG 0xa02 +#pragma Aux_register(0xa02, name=>"SUBSYS_DSP_0_CONFIG") + +// User extension aux register SUBSYS_IO_0_BUILD +#define AR_SUBSYS_IO_0_BUILD 0xa04 +#pragma Aux_register(0xa04, name=>"SUBSYS_IO_0_BUILD") + +// User extension aux register SUBSYS_IO_1_BUILD +#define AR_SUBSYS_IO_1_BUILD 0xa05 +#pragma Aux_register(0xa05, name=>"SUBSYS_IO_1_BUILD") +#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_PRESENT 1 + +// User extension aux register fpu_build +#define AR_FPU_BUILD 0xc8 +#pragma Aux_register(0xc8, name=>"fpu_build") + +// User extension aux register fpu_ctrl +#define AR_FPU_CTRL 0x300 +#pragma Aux_register(0x300, name=>"fpu_ctrl") + +// User extension aux register fpu_status +#define AR_FPU_STATUS 0x301 +#pragma Aux_register(0x301, name=>"fpu_status") + +// User extension instruction fsmadd +extern long fsmadd(long,long); +#pragma intrinsic(fsmadd,opcode=>6,sub_opcode=>5, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsmsub +extern long fsmsub(long,long); +#pragma intrinsic(fsmsub,opcode=>6,sub_opcode=>6, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsmul +extern long fsmul(long,long); +#pragma intrinsic(fsmul,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsadd +extern long fsadd(long,long); +#pragma intrinsic(fsadd,opcode=>6,sub_opcode=>1, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fssub +extern long fssub(long,long); +#pragma intrinsic(fssub,opcode=>6,sub_opcode=>2, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fcvt32 +extern long fcvt32(long,long); +#pragma intrinsic(fcvt32,opcode=>6,sub_opcode=>8, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fsdiv +extern long fsdiv(long,long); +#pragma intrinsic(fsdiv,opcode=>6,sub_opcode=>7, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmp +extern long fscmp(long,long); +#pragma intrinsic(fscmp,opcode=>6,sub_opcode=>3, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmp +extern long fscmp_f(long,long); +#pragma intrinsic(fscmp_f,opcode=>6,sub_opcode=>3, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmpf +extern long fscmpf(long,long); +#pragma intrinsic(fscmpf,opcode=>6,sub_opcode=>4, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fscmpf +extern long fscmpf_f(long,long); +#pragma intrinsic(fscmpf_f,opcode=>6,sub_opcode=>4, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") + +// User extension instruction fssqrt +extern long fssqrt(long); +#pragma intrinsic(fssqrt,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") +#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_DP_ASSIST_PRESENT 1 + +// User extension aux register aux_dpfp1l +#define AR_AUX_DPFP1L 0x302 +#pragma Aux_register(0x302, name=>"aux_dpfp1l") + +// User extension aux register aux_dpfp1h +#define AR_AUX_DPFP1H 0x303 +#pragma Aux_register(0x303, name=>"aux_dpfp1h") + +// User extension aux register aux_dpfp2l +#define AR_AUX_DPFP2L 0x304 +#pragma Aux_register(0x304, name=>"aux_dpfp2l") + +// User extension aux register aux_dpfp2h +#define AR_AUX_DPFP2H 0x305 +#pragma Aux_register(0x305, name=>"aux_dpfp2h") + +// User extension instruction dmulh11 +extern long dmulh11(long,long); +#pragma intrinsic(dmulh11,opcode=>6,sub_opcode=>48,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh11 +extern long dmulh11_f(long,long); +#pragma intrinsic(dmulh11_f,opcode=>6,sub_opcode=>48, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh12 +extern long dmulh12(long,long); +#pragma intrinsic(dmulh12,opcode=>6,sub_opcode=>49,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh12 +extern long dmulh12_f(long,long); +#pragma intrinsic(dmulh12_f,opcode=>6,sub_opcode=>49, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh21 +extern long dmulh21(long,long); +#pragma intrinsic(dmulh21,opcode=>6,sub_opcode=>50,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh21 +extern long dmulh21_f(long,long); +#pragma intrinsic(dmulh21_f,opcode=>6,sub_opcode=>50, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh22 +extern long dmulh22(long,long); +#pragma intrinsic(dmulh22,opcode=>6,sub_opcode=>51,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dmulh22 +extern long dmulh22_f(long,long); +#pragma intrinsic(dmulh22_f,opcode=>6,sub_opcode=>51, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh11 +extern long daddh11(long,long); +#pragma intrinsic(daddh11,opcode=>6,sub_opcode=>52,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh11 +extern long daddh11_f(long,long); +#pragma intrinsic(daddh11_f,opcode=>6,sub_opcode=>52, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh12 +extern long daddh12(long,long); +#pragma intrinsic(daddh12,opcode=>6,sub_opcode=>53,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh12 +extern long daddh12_f(long,long); +#pragma intrinsic(daddh12_f,opcode=>6,sub_opcode=>53, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh21 +extern long daddh21(long,long); +#pragma intrinsic(daddh21,opcode=>6,sub_opcode=>54,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh21 +extern long daddh21_f(long,long); +#pragma intrinsic(daddh21_f,opcode=>6,sub_opcode=>54, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh22 +extern long daddh22(long,long); +#pragma intrinsic(daddh22,opcode=>6,sub_opcode=>55,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction daddh22 +extern long daddh22_f(long,long); +#pragma intrinsic(daddh22_f,opcode=>6,sub_opcode=>55, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh11 +extern long dsubh11(long,long); +#pragma intrinsic(dsubh11,opcode=>6,sub_opcode=>56,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh11 +extern long dsubh11_f(long,long); +#pragma intrinsic(dsubh11_f,opcode=>6,sub_opcode=>56, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh12 +extern long dsubh12(long,long); +#pragma intrinsic(dsubh12,opcode=>6,sub_opcode=>57,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh12 +extern long dsubh12_f(long,long); +#pragma intrinsic(dsubh12_f,opcode=>6,sub_opcode=>57, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh21 +extern long dsubh21(long,long); +#pragma intrinsic(dsubh21,opcode=>6,sub_opcode=>58,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh21 +extern long dsubh21_f(long,long); +#pragma intrinsic(dsubh21_f,opcode=>6,sub_opcode=>58, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh22 +extern long dsubh22(long,long); +#pragma intrinsic(dsubh22,opcode=>6,sub_opcode=>59,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dsubh22 +extern long dsubh22_f(long,long); +#pragma intrinsic(dsubh22_f,opcode=>6,sub_opcode=>59, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dexcl1 +extern long dexcl1(long,long); +#pragma intrinsic(dexcl1,opcode=>6,sub_opcode=>60, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + +// User extension instruction dexcl2 +extern long dexcl2(long,long); +#pragma intrinsic(dexcl2,opcode=>6,sub_opcode=>61, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") + + +#endif + + +]]> + + + + + diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index eb890ef1999..d6b6d604ac7 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -5,6 +5,16 @@ ifeq ($(TARGET_ARCH), arc) AR_TOOL = arac CXX_TOOL = ccac +ifeq ($(TARGET), iotdk) + TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/iotdk/iotdk.tcf + LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/iotdk/iotdk.lcf +endif + +ifeq ($(TARGET), emsdp) + TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf + LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf +endif + ifneq ($(TCF_FILE), ) TARGET = $(basename $(notdir $(TCF_FILE))) else @@ -25,6 +35,11 @@ endif PLATFORM_FLAGS += -tcf_core_config PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -default_lcf=$(MAKEFILE_DIR)/targets/arc/memory.lcf +ifneq ($(LCF_FILE), ) + PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) + THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) +endif + CXXFLAGS += $(PLATFORM_FLAGS) CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) CCFLAGS += $(PLATFORM_FLAGS) From ced5b5bebb526e3e08804f4ccf49b530b9098c31 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 11 Mar 2020 14:47:28 +0300 Subject: [PATCH 15/45] Updated LCF for EMSDP and fixes for arc build process --- .../micro/tools/make/download_and_extract.sh | 2 +- .../tools/make/targets/arc/emsdp/emsdp.lcf | 51 ++++++++++++------- .../micro/tools/make/targets/arc_makefile.inc | 2 +- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index 2248031f6d1..4a75b6b24cd 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -90,7 +90,7 @@ patch_cifar10_dataset() { } build_embarc_mli() { - gmake -j 4 -C ${1}/lib/make TCF_FILE=${2} + make -j 4 -C ${1}/lib/make TCF_FILE=${2} } # Main function handling the download, verify, extract, and patch process. diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf index fc34759d745..d2d1b4220f8 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf @@ -5,43 +5,58 @@ # due to CCM memory wrapping into upper addresses beyond its size MEMORY { - IVT : ORIGIN = 0x00000000, LENGTH = 0x60000000 - ICCM0 : ORIGIN = 0x60000000, LENGTH = 0x00020000 + PSRAM : ORIGIN = 0x10000000, LENGTH = 0x01000000 + SRAM : ORIGIN = 0x20000000, LENGTH = 0x00040000 + IVT : ORIGIN = 0x60000000, LENGTH = 0x400 + ICCM0 : ORIGIN = 0x60000400, LENGTH = (0x00020000 - 0x400) # CCMWRAP0: ORIGIN = 0x60020000, LENGTH = 0x0ffe0000 -# SYSTEM1 : ORIGIN = 0x70000000, LENGTH = 0x10000000 DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 # CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 XCCM : ORIGIN = 0x90000000, LENGTH = 0x00004000 # CCMWRAP2: ORIGIN = 0x90004000, LENGTH = 0x0fffc000 YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00004000 # CCMWRAP3: ORIGIN = 0xa0004000, LENGTH = 0x0fffc000 - SYSTEM2 : ORIGIN = 0xb0000000, LENGTH = 0x50000000 } + SECTIONS { - GROUP BLOCK(4): { - .text? : { *('.text$crt*') } - * (TEXT): {} - * (LIT): {} - } > ICCM0 + + GROUP BLOCK(4) : { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) + } > IVT GROUP BLOCK(4): { - /* _SDA_BASE_ computed implicitly */ + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + } > ICCM0 + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ .sdata?: {} .sbss?: {} * (DATA): {} * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} - .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - } > SYSTEM2 + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:16K): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} + } > DCCM + + GROUP BLOCK(4): { + .rodata_in_data? : {} + } > PSRAM + GROUP BLOCK(4): { .Xdata? : {} - } > XCCM + } > XCCM + GROUP BLOCK(4): { .Ydata? : {} - } > YCCM - GROUP BLOCK(4) : { - .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) - } > IVT + } > YCCM + + GROUP BLOCK(4): { + .Zdata? : {} + } > DCCM + + } diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index d6b6d604ac7..29ad5f5347a 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -31,7 +31,7 @@ else TCF_FILE_NAME = $(TCF_FILE) endif - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections PLATFORM_FLAGS += -tcf_core_config PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -default_lcf=$(MAKEFILE_DIR)/targets/arc/memory.lcf From 503f98f88c2d8a7a636ef4ed920e059196ac9b09 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 30 Mar 2020 18:08:12 +0300 Subject: [PATCH 16/45] ARC EMSDP board specific debug log --- tensorflow/lite/micro/emsdp/debug_log.cc | 108 ++++++++++++++++++ .../tools/make/targets/arc/emsdp/emsdp.lcf | 36 +++--- 2 files changed, 127 insertions(+), 17 deletions(-) create mode 100644 tensorflow/lite/micro/emsdp/debug_log.cc diff --git a/tensorflow/lite/micro/emsdp/debug_log.cc b/tensorflow/lite/micro/emsdp/debug_log.cc new file mode 100644 index 00000000000..7d932939a0b --- /dev/null +++ b/tensorflow/lite/micro/emsdp/debug_log.cc @@ -0,0 +1,108 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/debug_log.h" + +#include +#include +#include + +// Print to debug console by default. One can define next to extend destinations set: +// EMSDP_LOG_TO_MEMORY +// : fill .debug_log memory region (data section) with passed chars. +// EMSDP_LOG_TO_HOST +// : Use hostlink to print output log. +// EMSDP_LOG_TO_UART +// : use default debug UART (out to FTDI channel 0). The same USB Port is used for JTAG. +#define EMSDP_LOG_TO_UART + + +// For simplicity we assume U-boot has already initialized debug console durion +// application loading (or on reset). Hence we use only status and data registers +// to organize blocking loop for printing symbols. No input and no IRQ handling. +// See embarc_osp repository for full EMSDP uart driver. +// TODO: Consider U-Boot API to do it in a less "hacky" way. +void DbgUartSendStr(const char* s) { +#define EMSDP_DBG_UART_BASE (0xF0004000U) +#define DW_UART_CPR_FIFO_STAT (1<<10) +#define DW_UART_USR_TFNF (0x02) +#define DW_UART_LSR_TXD_EMPTY (0x20) + + typedef volatile struct dw_uart_reg { + uint32_t DATA; /*!< data in/out and DLL */ + uint32_t RES1[4]; + uint32_t LSR; /*!< Line Status Register */ + uint32_t RES2[25]; + uint32_t USR; /*!< UART status register */ + uint32_t RES3[29]; + uint32_t CPR; /*!< Component parameter register */ + } DW_UART_REG; + + DW_UART_REG* uart_reg_ptr = (DW_UART_REG*)(EMSDP_DBG_UART_BASE); + const char* src = s; + while (*src) { + // Check uart status to send char + bool uart_is_ready = false; + if (uart_reg_ptr->CPR & DW_UART_CPR_FIFO_STAT) + uart_is_ready = ((uart_reg_ptr->USR & DW_UART_USR_TFNF) != 0); + else + uart_is_ready = ((uart_reg_ptr->LSR & DW_UART_LSR_TXD_EMPTY) != 0); + + // Send char if uart is ready. + if (uart_is_ready) + uart_reg_ptr->DATA = *src++; + } +} + +// Simple symbols dump to a pre-allocated memory region. +// The memory region can be viewed afterward with debugger. +// It can be viewed/read with debugger afterward. +void LogToMem(const char* s) { + constexpr int kDebugLogMemChars = 2 * 1024; + static int cursor = 0; +#pragma Bss(".debug_log") + volatile static char debug_log_mem[kDebugLogMemChars]; +#pragma Bss() + + const char* src = s; + while (*src) { + debug_log_mem[cursor] = *src++; + cursor = (cursor < kDebugLogMemChars) ? cursor + 1 : 0; + } + debug_log_mem[cursor] = '^'; +} + + +extern "C" void DebugLog(const char* s) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS + +#if defined EMSDP_LOG_TO_UART + DbgUartSendStr(s); +#endif + +#if defined EMSDP_LOG_TO_MEMORY +#warning "EMSDP_LOG_TO_MEMORY is defined. View .debug_log memory region for stdout" + LogToMem(s); +#endif + +#if defined EMSDP_LOG_TO_HOST +#warning "EMSDP_LOG_TO_HOST is defined. Ensure hostlib is linked." + fprintf(stderr, "%s", s); +#endif + +#endif // TF_LITE_STRIP_ERROR_STRINGS +} + + diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf index d2d1b4220f8..d17c807e250 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf @@ -5,7 +5,7 @@ # due to CCM memory wrapping into upper addresses beyond its size MEMORY { - PSRAM : ORIGIN = 0x10000000, LENGTH = 0x01000000 + PSRAM : ORIGIN = 0x10000400, LENGTH = (0x01000000 >> 1) - 0x400 SRAM : ORIGIN = 0x20000000, LENGTH = 0x00040000 IVT : ORIGIN = 0x60000000, LENGTH = 0x400 ICCM0 : ORIGIN = 0x60000400, LENGTH = (0x00020000 - 0x400) @@ -31,19 +31,11 @@ SECTIONS { } > ICCM0 GROUP BLOCK(4): { - /* _SDA_BASE_ computed implicitly */ - .sdata?: {} - .sbss?: {} - * (DATA): {} - * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:16K): {} + .Zdata? : {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:8K): {} .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} } > DCCM - - GROUP BLOCK(4): { - .rodata_in_data? : {} - } > PSRAM - + GROUP BLOCK(4): { .Xdata? : {} } > XCCM @@ -53,10 +45,20 @@ SECTIONS { } > YCCM GROUP BLOCK(4): { - .Zdata? : {} - } > DCCM - - - } + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + } > PSRAM + + GROUP BLOCK(4): { + .rodata_in_data? : {} + } > PSRAM + + GROUP BLOCK(4): { + .debug_log? : {} + } > SRAM +} From 2d8e1a45ec34649d216566514d7c062ae985023a Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 1 Apr 2020 17:33:16 +0300 Subject: [PATCH 17/45] ARC EMSDB Board integration: Project generation --- .../micro/tools/make/helper_functions.inc | 32 ++-- .../tools/make/targets/arc/emsdp/uboot.env | Bin 0 -> 4096 bytes .../tools/make/targets/emsdp_makefile.inc | 155 ++++++++++++++++++ .../make/templates/arc/arc_app_makefile.tpl | 134 +++++++++++++++ 4 files changed, 307 insertions(+), 14 deletions(-) create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/emsdp/uboot.env create mode 100644 tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc create mode 100644 tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index a7f9bd788e3..0c398be2118 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -130,31 +130,35 @@ endef define generate_arc_project ifeq ($(TARGET_ARCH), arc) -$(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/Makefile.tpl + +$(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl @mkdir -p $$(dir $$@) @sed -E 's#\%\{SRCS\}\%#$(4)#g' $$< | \ - sed -E '1 i\CC = ccac\nCXX = ccac\nLD = ccac\n' | \ + sed -E 's#\%\{CC\}\%#$(CC_TOOL)#g' | \ + sed -E 's#\%\{CXX\}\%#$(CXX_TOOL)#g' | \ + sed -E 's#\%\{LD\}\%#$(LD_TOOL)#g' | \ sed -E 's#\%\{EXECUTABLE\}\%#$(3).elf#g' | \ sed -E 's#\%\{LINKER_FLAGS\}\%#$(6)#g' | \ sed -E 's#\%\{CXX_FLAGS\}\%#$(7)#g' | \ - sed -E 's#\%\{CC_FLAGS\}\%#$(8)#g' > $$@ + sed -E 's#\%\{CC_FLAGS\}\%#$(8)#g' | \ + sed -E 's#\%\{EXTRA_APP_SETTINGS\}\%#$(ARC_EXTRA_APP_SETTINGS)#g' | \ + sed -E 's#\%\{EXTRA_APP_RULES\}\%#$(ARC_EXTRA_APP_RULES)#g' | \ + sed -E 's#\%\{BIN_DEPEND\}\%#$(ARC_BIN_DEPEND)#g' | \ + sed -E 's#\%\{BIN_RULE\}\%#$(ARC_BIN_RULE)#g' | \ + sed -E 's#\%\{EXTRA_RM_TARGETS\}\%#$(ARC_EXTRA_RM_TARGETS)#g' | \ + sed -E 's#\%\{APP_RUN_CMD\}\%#$(ARC_APP_RUN_CMD)#g' | \ + sed -E 's#\%\{APP_DEBUG_CMD\}\%#$(ARC_APP_RUN_CMD)#g' | \ + sed -E 's#\%\{EXTRA_EXECUTE_RULES\}\%#$(ARC_EXTRA_EXECUTE_RULES)#g' > $$@ -# Special rule to copy TCF in case the local filesystem file name has been defined -ifneq ($(TCF_FILE_NAME), ) -$(PRJDIR)$(3)/$(1)/$(TCF_FILE_NAME): $(TCF_FILE) - @cp $$< $$@ -endif - -# Special rule to copy LCF in case the local filesystem file name has been defined -ifneq ($(LCF_FILE), ) -$(PRJDIR)$(3)/$(1)/$(notdir $(LCF_FILE)): $(LCF_FILE) - @cp $$< $$@ -endif +$(foreach var,$(ARC_TARGET_DIRS),$(eval $(call path_changing_copy_file,$(PRJDIR)$(3)/$(1),$(var)))) endif endef + + + # Creates a set of rules to build a standalone Arduino project for an # executable, including all of the source and header files required in a # separate folder and a simple makefile. diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/uboot.env b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/uboot.env new file mode 100644 index 0000000000000000000000000000000000000000..c336b6c8733f90b8fbaab75fc705f787ad141607 GIT binary patch literal 4096 zcmX?bt34nou{5P9u_V>j(9qP#z_2B}ZW%t@^UDFks+bJD0rFmPW)(z5N^)XyMyf(uPHAxl$b5vIAR8G_jI=G)1qGR2Qf3}QS!$7O zsIF6feu;vSfuV(-p`n6?Uw)Z_kvW*JpsQeDVP#}&Wo)LPZD43%pvf=_MnhmU1V%$( gGz3ONU^E0qLtr!nMnhmU1V%$(Gz3ONfT$1v05jr}EC2ui literal 0 HcmV?d00001 diff --git a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc new file mode 100644 index 00000000000..c7286329651 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc @@ -0,0 +1,155 @@ +# Settings for arc processors +ifeq ($(TARGET), emsdp) + + TARGET_ARCH = arc + + CC_TOOL = ccac + AR_TOOL = arac + CXX_TOOL = ccac + LD_TOOL = ccac + + DLR = $$$$ + ARC_EXTRA_APP_SETTINGS = \ + BIN_DIR = .$(DLR)\(PS\)bin\n\ + BIN_FILE = $(DLR)\(BIN_DIR\)$(DLR)\(PS\)app.elf\n + + ARC_EXTRA_APP_RULES = \ + $(DLR)\(BIN_FILE\): $(DLR)\(BIN_DIR\) $(DLR)\(OUT_NAME\)\ + \n\t\@$(DLR)\(CP\) $(DLR)\(OUT_NAME\) $(DLR)\(BIN_FILE\)\ + \n\t\@$(DLR)\(CP\) uboot.env $(DLR)\(BIN_DIR\)$(DLR)\(PS)uboot.env\ + \n \ + \n$(DLR)\(BIN_DIR\):\ + \n\t\@$(DLR)\(MKDIR\) $(DLR)\(BIN_DIR\)\ + + + ARC_EXTRA_RM_TARGETS = $(DLR)\(BIN_DIR\) + + ARC_BIN_DEPEND = $(DLR)\(BIN_DIR\) $(DLR)\(BIN_FILE\) + ARC_BIN_RULE = \t@echo Copy content of $(DLR)\(BIN_DIR\) into the root of SD card and follow instructions + + ARC_APP_RUN_CMD = mdb -run -digilent -nooptions $(DLR)\(DBG_ARGS) + ARC_APP_DEBUG_CMD = mdb -OK -digilent -nooptions $(DLR)\(DBG_ARGS) + ARC_EXTRA_EXECUTE_RULES = + + + + TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf + LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf + + MAKE_PROJECT_FILES += emsdp_em11d_dfss.tcf emsdp.lcf uboot.env + + ARC_TARGET_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp $(PWD)/$(MAKEFILE_DIR)/targets/arc + # ARC_TARGET_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp + +# The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags. +# This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. + TCF_FILE_NAME = $(notdir $(TCF_FILE)) + + THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME) + + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + PLATFORM_FLAGS += -tcf_core_config + PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map + +# DMITRYZ: I think we need to move it to target specific LCF file. + PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) + # THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) + + CXXFLAGS += $(PLATFORM_FLAGS) + CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) + CCFLAGS += $(PLATFORM_FLAGS) + CCFLAGS:=$(filter-out -std=c11,$(CCFLAGS)) + LDFLAGS += $(PLATFORM_LDFLAGS) + + MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS)) + + # DMITRYZ: Here we need to check tags on "no_embarc_mli". + USE_EMBARC_MLI ?= true + +ifeq ($(USE_EMBARC_MLI), true) + ALL_TAGS += arc + +ifeq ($(PRE_COMPILED_MLI),true) + $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) + + MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include + MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a + + THIRD_PARTY_CC_HDRS += \ + third_party/embarc_osp/LICENSE +else + MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) + + $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) + + MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include + MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a + MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_LIB_DIR)/LICENSE +endif + + THIRD_PARTY_CC_HDRS += $(MLI_LIB) + GENERATED_PROJECT_LIBS += $(MLI_LIB) + + INCLUDES += \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api + + GENERATED_PROJECT_INCLUDES += \ + -I. \ + -I./third_party/$(MLI_INCLUDE_FOLDER) \ + -I./third_party/$(MLI_INCLUDE_FOLDER)/api + + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf + +endif # USE_EMBARC_MLI + +# We overwrite project generator to exclude everything not relevant to ARC platform +define generate_microlite_projects +$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +endef + +# Copy rule generator to do file copyes with changing paths in generated project +# Arguments are: +# 1 - Path files in generated project. +# 2 - Path files in the source repo +# Used in helper_functions.inc for arc projects to copy files +define path_changing_copy_file +$(1)/%: $(2)/% + @mkdir -p $$(dir $$@) + @cp $$< $$@ +endef + +$(foreach var,$(ARC_TARGET_DIRS),$(eval $(call copy_arc_project_file,$(PRJDIR)$(3)/$(1),$(var)))) + +# These are microcontroller-specific rules for converting the ELF output +# of the linker into a binary image that can be loaded directly. + +# Not applicable for ARC, leaving it empty. +$(BINDIR)%.bin: + +endif diff --git a/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl new file mode 100644 index 00000000000..5bbcb7d3f71 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl @@ -0,0 +1,134 @@ +#============================================================= +# OS-specific definitions +#============================================================= +COMMA=, +OPEN_PAREN=( +CLOSE_PAREN=) +BACKSLASH=\$(nullstring) +ifneq ($(ComSpec)$(COMSPEC),) + O_SYS=Windows + RM=del /F /Q + MKDIR=mkdir + CP=copy /Y + TYPE=type + PS=$(BACKSLASH) + Q= + coQ=\$(nullstring) + fix_platform_path = $(subst /,$(PS), $(1)) + DEV_NULL = nul +else + O_SYS=Unix + RM=rm -rf + MKDIR=mkdir -p + CP=cp + TYPE=cat + PS=/ + Q=$(BACKSLASH) + coQ= + fix_platform_path=$(1) + DEV_NULL=/dev/null +endif + +# Note: Windows escaping rules is very combersome +# initially I tried to use Q=^, but this depends on the context and (looks like) on Win version. +# Also expecially ugly thing is that in quoted strings the quotes the same are remain. +# Batch has special parameter expansion syntax to remove quotes, +# but many tools themselves remove quotes (unless escaped with backslash) +# So finally we've found that in our use cases we may not escaping any symbols but prepend backslashes before quotes. + +quote=$(subst %,$(Q)%, \ + $(subst &,$(Q)&, \ + $(subst <,$(Q)<, \ + $(subst >,$(Q)>, \ + $(subst |,$(Q)|, \ + $(subst ',$(Q)', \ + $(subst $(COMMA),$(Q)$(COMMA), \ + $(subst =,$(Q)=, \ + $(subst $(OPEN_PAREN),$(Q)$(OPEN_PAREN), \ + $(subst $(CLOSE_PAREN),$(Q)$(CLOSE_PAREN), \ + $(subst !,$(Q)!, \ + $(subst ",$(BACKSLASH)", \ + $(subst $(Q),$(Q)$(Q), \ + $(1) ))))))))))))) + +#============================================================= +# Toolchain definitions +#============================================================= +CC = %{CC}% +CXX = %{CXX}% +LD = %{LD}% + + +#============================================================= +# Applications settings +#============================================================= +OUT_NAME = %{EXECUTABLE}% + +DBG_ARGS ?= + +RUN_ARGS ?= + +CXXFLAGS += %{CXX_FLAGS}% + +CCFLAGS += %{CC_FLAGS}% + +LDFLAGS += %{LINKER_FLAGS}% + +%{EXTRA_APP_SETTINGS}% + + +#============================================================= +# Files and directories +#============================================================= +SRCS := \ +%{SRCS}% + +OBJS := \ +$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(SRCS))) + + +#============================================================= +# Common rules +#============================================================= +.PHONY: all app flash clean run debug + +%.o: %.cc + $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + +%.o: %.c + $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ + +$(OUT_NAME): $(OBJS) + $(LD) $(CXXFLAGS) -o $@ -Ccrossref $(OBJS) $(LDFLAGS) + +%{EXTRA_APP_RULES}% + + +#================================================================= +# Global rules +#================================================================= +all: $(OUT_NAME) + +app: $(OUT_NAME) + +flash: %{BIN_DEPEND}% +%{BIN_RULE}% + +clean: + -@$(RM) $(call fix_platform_path,$(OBJS)) + -@$(RM) $(OUT_NAME) %{EXTRA_RM_TARGETS}% + +#================================================================= +# Execution rules +#================================================================= + +APP_RUN := %{APP_RUN_CMD}% +APP_DEBUG := %{APP_DEBUG_CMD}% + +run: $(OUT_NAME) + $(APP_RUN) $(OUT_NAME) $(RUN_ARGS) + +debug: $(OUT_NAME) + $(APP_DEBUG) $(OUT_NAME) $(RUN_ARGS) + +%{EXTRA_EXECUTE_RULES}% From 1977bd0442998f7a1d8724d54e5a892d9df0daba Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Thu, 2 Apr 2020 15:52:03 +0300 Subject: [PATCH 18/45] Update project generation for custom ARC target (*.tcf) --- .../micro/tools/make/helper_functions.inc | 2 +- .../micro/tools/make/targets/arc_makefile.inc | 85 ++++++++++++++----- .../tools/make/targets/emsdp_makefile.inc | 15 ++-- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index 0c398be2118..0e21e02bc07 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -151,7 +151,7 @@ $(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/arc/arc_ sed -E 's#\%\{EXTRA_EXECUTE_RULES\}\%#$(ARC_EXTRA_EXECUTE_RULES)#g' > $$@ -$(foreach var,$(ARC_TARGET_DIRS),$(eval $(call path_changing_copy_file,$(PRJDIR)$(3)/$(1),$(var)))) +$(foreach var,$(ARC_TARGET_FILES_DIRS),$(eval $(call path_changing_copy_file,$(PRJDIR)$(3)/$(1),$(var)))) endif endef diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 29ad5f5347a..e6505cd187b 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -1,19 +1,12 @@ -# Settings for arc processors +# Settings for not pre-defined ARC processors. +# User need to specify ARC target with Tool Configuration File (*.tcf). +# Path to this file must be passed through TCF_FILE variable. +# Otherwise, default em7d_voice_audio configuration is used + ifeq ($(TARGET_ARCH), arc) - CC_TOOL = ccac - AR_TOOL = arac - CXX_TOOL = ccac - -ifeq ($(TARGET), iotdk) - TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/iotdk/iotdk.tcf - LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/iotdk/iotdk.lcf -endif - -ifeq ($(TARGET), emsdp) - TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf - LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf -endif +# Known target are specifyed with their own make configurations. +ifeq ($(filter $(TARGET), emsdp iotdk),) ifneq ($(TCF_FILE), ) TARGET = $(basename $(notdir $(TCF_FILE))) @@ -26,30 +19,61 @@ endif # This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. ifneq (,$(findstring .tcf,$(TCF_FILE))) TCF_FILE_NAME = $(notdir $(TCF_FILE)) - THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME) + ARC_TARGET_FILES_DIRS := $(dir $(TCF_FILE)) + MAKE_PROJECT_FILES += $(TCF_FILE_NAME) else TCF_FILE_NAME = $(TCF_FILE) endif - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections - PLATFORM_FLAGS += -tcf_core_config - PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -default_lcf=$(MAKEFILE_DIR)/targets/arc/memory.lcf + CC_TOOL = ccac + AR_TOOL = arac + CXX_TOOL = ccac + LD_TOOL = ccac + + # TODO: Move this to a common arc/arc_common.inc file to share this with other targets + DLR = $$$$ + ARC_EXTRA_APP_SETTINGS = + ARC_EXTRA_APP_RULES = + + ARC_EXTRA_RM_TARGETS = + + ARC_BIN_DEPEND = + ARC_BIN_RULE = \t$(DLR)\(error Flash rule isnt defined for this ARC target\) + + ARC_APP_RUN_CMD = mdb -run -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS) + ARC_APP_DEBUG_CMD = mdb -OK -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS) + ARC_EXTRA_EXECUTE_RULES = + + + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) + PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + PLATFORM_FLAGS += -tcf_core_config + + PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) + PLATFORM_LDFLAGS = -Hnocopyr -m -Hldopt=-Coutput=memory.map ifneq ($(LCF_FILE), ) PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) - THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) + MAKE_PROJECT_FILES += $(notdir $(LCF_FILE)) +ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir LCF_FILE)),) + ARC_TARGET_FILES_DIRS += $(dir $(LCF_FILE)) +endif endif CXXFLAGS += $(PLATFORM_FLAGS) CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) CCFLAGS += $(PLATFORM_FLAGS) + CCFLAGS:=$(filter-out -std=c11,$(CCFLAGS)) LDFLAGS += $(PLATFORM_LDFLAGS) + MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS)) + # TODO: Move/organize embarc_mli usage in an implied way (see ext_libs/cmsis.inc for example USE_EMBARC_MLI ?= true ifeq ($(USE_EMBARC_MLI), true) + # TODO: To understand why it's done here. The same is performed in the higher level MakeFile. ALL_TAGS += arc ifeq ($(PRE_COMPILED_MLI),true) @@ -110,10 +134,29 @@ endif endif # USE_EMBARC_MLI +# We overwrite project generator to exclude everything not relevant to ARC platform +define generate_microlite_projects +$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +endef + +# Copy rule generator to do file copyes with changing paths in generated project +# Arguments are: +# 1 - Path files in generated project. +# 2 - Path files in the source repo +# Used in helper_functions.inc for arc projects to copy files +define path_changing_copy_file +$(1)/%: $(2)/% + @mkdir -p $$(dir $$@) + @cp $$< $$@ +endef + # These are microcontroller-specific rules for converting the ELF output # of the linker into a binary image that can be loaded directly. - # Not applicable for ARC, leaving it empty. $(BINDIR)%.bin: -endif + +endif # ifeq ($(filter $(TARGET),$(ARC_PREDEFINED_TARGETS)),) +endif # ifeq ($(TARGET_ARCH), arc) + diff --git a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc index c7286329651..aeeb7fc178f 100644 --- a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc @@ -21,7 +21,6 @@ ifeq ($(TARGET), emsdp) \n$(DLR)\(BIN_DIR\):\ \n\t\@$(DLR)\(MKDIR\) $(DLR)\(BIN_DIR\)\ - ARC_EXTRA_RM_TARGETS = $(DLR)\(BIN_DIR\) ARC_BIN_DEPEND = $(DLR)\(BIN_DIR\) $(DLR)\(BIN_FILE\) @@ -31,21 +30,19 @@ ifeq ($(TARGET), emsdp) ARC_APP_DEBUG_CMD = mdb -OK -digilent -nooptions $(DLR)\(DBG_ARGS) ARC_EXTRA_EXECUTE_RULES = - - TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf MAKE_PROJECT_FILES += emsdp_em11d_dfss.tcf emsdp.lcf uboot.env - ARC_TARGET_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp $(PWD)/$(MAKEFILE_DIR)/targets/arc - # ARC_TARGET_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp + ARC_TARGET_FILES_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp +# TODO: LESS TCF/LCF Variables # The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags. # This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. TCF_FILE_NAME = $(notdir $(TCF_FILE)) - THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME) +# THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME) PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections PLATFORM_FLAGS += -tcf_core_config @@ -53,7 +50,7 @@ ifeq ($(TARGET), emsdp) # DMITRYZ: I think we need to move it to target specific LCF file. PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) - # THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) +# THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) CXXFLAGS += $(PLATFORM_FLAGS) CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) @@ -133,7 +130,7 @@ $(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(T $(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) endef -# Copy rule generator to do file copyes with changing paths in generated project +# Copy rule generator to do file copyes changing paths in generated project # Arguments are: # 1 - Path files in generated project. # 2 - Path files in the source repo @@ -144,7 +141,7 @@ $(1)/%: $(2)/% @cp $$< $$@ endef -$(foreach var,$(ARC_TARGET_DIRS),$(eval $(call copy_arc_project_file,$(PRJDIR)$(3)/$(1),$(var)))) + # These are microcontroller-specific rules for converting the ELF output # of the linker into a binary image that can be loaded directly. From 984457fd69a2615db8f2d1e5c5848b3b3c7ef27f Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 3 Apr 2020 11:41:58 +0300 Subject: [PATCH 19/45] Update platform flags and debug command template --- .../micro/tools/make/helper_functions.inc | 2 +- .../micro/tools/make/targets/arc_makefile.inc | 10 +++++---- .../tools/make/targets/emsdp_makefile.inc | 22 +++++++++++++------ .../make/templates/arc/arc_app_makefile.tpl | 6 +++-- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index 0e21e02bc07..8d321d42490 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -147,7 +147,7 @@ $(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/arc/arc_ sed -E 's#\%\{BIN_RULE\}\%#$(ARC_BIN_RULE)#g' | \ sed -E 's#\%\{EXTRA_RM_TARGETS\}\%#$(ARC_EXTRA_RM_TARGETS)#g' | \ sed -E 's#\%\{APP_RUN_CMD\}\%#$(ARC_APP_RUN_CMD)#g' | \ - sed -E 's#\%\{APP_DEBUG_CMD\}\%#$(ARC_APP_RUN_CMD)#g' | \ + sed -E 's#\%\{APP_DEBUG_CMD\}\%#$(ARC_APP_DEBUG_CMD)#g' | \ sed -E 's#\%\{EXTRA_EXECUTE_RULES\}\%#$(ARC_EXTRA_EXECUTE_RULES)#g' > $$@ diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index e6505cd187b..1b30e6ac6d0 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -46,12 +46,14 @@ endif ARC_EXTRA_EXECUTE_RULES = - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) - PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections - PLATFORM_FLAGS += -tcf_core_config + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config + PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + + # Use compact CRT. It requires pre-defined heap size + PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) - PLATFORM_LDFLAGS = -Hnocopyr -m -Hldopt=-Coutput=memory.map + PLATFORM_LDFLAGS += -Hnocopyr -m -Hldopt=-Coutput=memory.map -Hheap=2K ifneq ($(LCF_FILE), ) PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) MAKE_PROJECT_FILES += $(notdir $(LCF_FILE)) diff --git a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc index aeeb7fc178f..86e9d9e7379 100644 --- a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc @@ -33,24 +33,32 @@ ifeq ($(TARGET), emsdp) TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf - MAKE_PROJECT_FILES += emsdp_em11d_dfss.tcf emsdp.lcf uboot.env + MAKE_PROJECT_FILES += $(notdir $(TCF_FILE)) $(notdir $(LCF_FILE)) uboot.env - ARC_TARGET_FILES_DIRS := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp + ARC_TARGET_FILES_DIRS = $(dir $(TCF_FILE)) +ifneq ($(dir $(TCF_FILE)), $(dir $(LCF_FILE))) + ARC_TARGET_FILES_DIRS += $(dir $(LCF_FILE)) +endif # TODO: LESS TCF/LCF Variables # The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags. # This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. TCF_FILE_NAME = $(notdir $(TCF_FILE)) -# THIRD_PARTY_CC_HDRS += $(TCF_FILE_NAME) + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config + PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + + # Use compact CRT. It requires pre-defined heap size + PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset + + PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -Hheap=2K - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -O3 -Hpurge -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset -fslp-vectorize-aggressive -ffunction-sections -fdata-sections - PLATFORM_FLAGS += -tcf_core_config - PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map + # for default EMSD configuration we can use defaul em9d rt libs + # for better performance runime should be rebuilt for emsdp configuration + PLATFORM_LDFLAGS += -Hlib=em9d_voice_audio # DMITRYZ: I think we need to move it to target specific LCF file. PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) -# THIRD_PARTY_CC_HDRS += $(notdir $(LCF_FILE)) CXXFLAGS += $(PLATFORM_FLAGS) CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) diff --git a/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl index 5bbcb7d3f71..f79d04b26d1 100644 --- a/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl +++ b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl @@ -68,6 +68,8 @@ DBG_ARGS ?= RUN_ARGS ?= +EXT_CFLAGS ?= + CXXFLAGS += %{CXX_FLAGS}% CCFLAGS += %{CC_FLAGS}% @@ -93,10 +95,10 @@ $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(SRCS))) .PHONY: all app flash clean run debug %.o: %.cc - $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@ + $(CXX) $(CXXFLAGS) $(EXT_CFLAGS) $(INCLUDES) -c $< -o $@ %.o: %.c - $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ + $(CC) $(CCFLAGS) $(EXT_CFLAGS) $(INCLUDES) -c $< -o $@ $(OUT_NAME): $(OBJS) $(LD) $(CXXFLAGS) -o $@ -Ccrossref $(OBJS) $(LDFLAGS) From 7c15ad0e98c1ba9234117fb160c082ef11108b46 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 3 Apr 2020 16:10:34 +0300 Subject: [PATCH 20/45] ARC platform common make parts was moved to a separate file --- .../tools/make/targets/arc/arc_common.inc | 185 ++++++++++++++++++ .../micro/tools/make/targets/arc_makefile.inc | 151 +------------- .../tools/make/targets/emsdp_makefile.inc | 148 ++------------ 3 files changed, 207 insertions(+), 277 deletions(-) create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc new file mode 100644 index 00000000000..e20887abb07 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -0,0 +1,185 @@ +# Common Settings for ARC platform and it's projects. +# Might be reused across different targets + +ifeq ($(TARGET_ARCH), arc) + + DLR := $$$$ + + # List of folders to search project files for copy with path changing + # For instance, TCF and LCF files are copyed into the root of generated project + ARC_TARGET_FILES_DIRS ?= + + # For the following variables see arc_app_makefile.tpl for usage + + # Additional text into application settings section of arc makefile project + ARC_EXTRA_APP_SETTINGS ?= + + # Additional text into application general rules of arc makefile project + ARC_EXTRA_APP_RULES ?= + + # additional arguments for RM command of "clean" target rule ("make clean" command) + ARC_EXTRA_RM_TARGETS ?= + + # Dependencies of "flash" target rule ("make flash" command) + ARC_BIN_DEPEND ?= + + # Commands in "flash" target rule ("make flash" command) + ARC_BIN_RULE ?= \t$(DLR)\(error Flash rule isnt defined for this ARC target\) + + # Command to run app on "make run" command of generated project + ARC_APP_RUN_CMD ?= + + # Command to run app on "make debug" command of generated project + ARC_APP_DEBUG_CMD ?= + + # Additional text into application execution rules of arc makefile project + ARC_EXTRA_EXECUTE_RULES ?= + +# We overwrite project generator to exclude everything not relevant to ARC platform. +# ARC targets doesn't can't work with mbed, keil or other architecture specific development tools +# Basic make project is updated to be applicable for general ARC platform +define generate_microlite_projects +$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +endef + +# Copy rule generator to do file copyes with changing paths in generated project +# Arguments are: +# 1 - Path files in generated project. +# 2 - Path files in the source repo +# Used in helper_functions.inc for arc projects to copy files +define path_changing_copy_file +$(1)/%: $(2)/% + @mkdir -p $$(dir $$@) + @cp $$< $$@ +endef + +# These are microcontroller-specific rules for converting the ELF output +# of the linker into a binary image that can be loaded directly. +# Not applicable for ARC, leaving it empty. +$(BINDIR)%.bin: + + +ifeq ($(ARC_TOOLCHAIN), mwdt) + CC_TOOL := ccac + AR_TOOL := arac + CXX_TOOL := ccac + LD_TOOL := ccac + + ARC_APP_RUN_CMD = mdb -run -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS\) + ARC_APP_DEBUG_CMD = mdb -OK -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS\) + + # The variable TCF_FILE stores path to Tool Configuration File (*.tcf). + # This file is used by MWDT toolchain to properly compile/run code + TCF_FILE ?= + + LCF_FILE ?= + +# The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), +# this variable is used later to add the option to the linker/compiler flags. +# This condition also handles the case when the user/makefile specifies +# the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. +ifneq (,$(findstring .tcf,$(TCF_FILE))) + TCF_FILE_NAME = $(notdir $(TCF_FILE)) + ARC_TARGET_FILES_DIRS = $(dir $(TCF_FILE)) + MAKE_PROJECT_FILES += $(TCF_FILE_NAME) +else + TCF_FILE_NAME = $(TCF_FILE) +endif + + PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config + + PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + + # Use compact CRT. It requires pre-defined heap size + PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset + + PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) + + PLATFORM_LDFLAGS += -Hnocopyr -m -Hldopt=-Coutput=memory.map -Hheap=2K + +ifneq ($(LCF_FILE), ) + PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) + MAKE_PROJECT_FILES += $(notdir $(LCF_FILE)) +ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir $(LCF_FILE))),) + ARC_TARGET_FILES_DIRS += $(dir $(LCF_FILE)) +endif +endif + + CXXFLAGS := $(filter-out -std=c++11,$(CXXFLAGS)) + CCFLAGS := $(filter-out -std=c11,$(CCFLAGS)) + MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS)) + + CXXFLAGS += $(PLATFORM_FLAGS) + CCFLAGS += $(PLATFORM_FLAGS) + LDFLAGS += $(PLATFORM_LDFLAGS) + + + # TODO: Move/organize embarc_mli usage in an implied way (see ext_libs/cmsis.inc for example + USE_EMBARC_MLI ?= true + +ifeq ($(USE_EMBARC_MLI), true) + # TODO: To understand why it's done here. The same is performed in the higher level MakeFile. + ALL_TAGS += arc + +ifeq ($(PRE_COMPILED_MLI),true) + $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) + + MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include + MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a + + THIRD_PARTY_CC_HDRS += \ + third_party/embarc_osp/LICENSE +else + MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) + + $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) + + MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include + MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a + MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_LIB_DIR)/LICENSE +endif + + THIRD_PARTY_CC_HDRS += $(MLI_LIB) + GENERATED_PROJECT_LIBS += $(MLI_LIB) + + INCLUDES += \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api + + GENERATED_PROJECT_INCLUDES += \ + -I. \ + -I./third_party/$(MLI_INCLUDE_FOLDER) \ + -I./third_party/$(MLI_INCLUDE_FOLDER)/api + + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf + +endif # USE_EMBARC_MLI + +endif # ARC_TOOLCHAIN +endif # TARGET_ARCH + diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 1b30e6ac6d0..87d1b736807 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -8,157 +8,18 @@ ifeq ($(TARGET_ARCH), arc) # Known target are specifyed with their own make configurations. ifeq ($(filter $(TARGET), emsdp iotdk),) +ARC_TOOLCHAIN := mwdt + ifneq ($(TCF_FILE), ) TARGET = $(basename $(notdir $(TCF_FILE))) else + $(warning TCF_FILE variable is not specifyed. Use default em7d_voice_audio configuration) TARGET = em7d_voice_audio TCF_FILE = em7d_voice_audio endif -# The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags. -# This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. -ifneq (,$(findstring .tcf,$(TCF_FILE))) - TCF_FILE_NAME = $(notdir $(TCF_FILE)) - ARC_TARGET_FILES_DIRS := $(dir $(TCF_FILE)) - MAKE_PROJECT_FILES += $(TCF_FILE_NAME) -else - TCF_FILE_NAME = $(TCF_FILE) -endif +include $(MAKEFILE_DIR)/targets/arc/arc_common.inc - CC_TOOL = ccac - AR_TOOL = arac - CXX_TOOL = ccac - LD_TOOL = ccac - - # TODO: Move this to a common arc/arc_common.inc file to share this with other targets - DLR = $$$$ - ARC_EXTRA_APP_SETTINGS = - - ARC_EXTRA_APP_RULES = - - ARC_EXTRA_RM_TARGETS = - - ARC_BIN_DEPEND = - ARC_BIN_RULE = \t$(DLR)\(error Flash rule isnt defined for this ARC target\) - - ARC_APP_RUN_CMD = mdb -run -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS) - ARC_APP_DEBUG_CMD = mdb -OK -jit -tcf=$(TCF_FILE_NAME) $(DLR)\(DBG_ARGS) - ARC_EXTRA_EXECUTE_RULES = - - - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config - PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections - - # Use compact CRT. It requires pre-defined heap size - PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset - - PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) - PLATFORM_LDFLAGS += -Hnocopyr -m -Hldopt=-Coutput=memory.map -Hheap=2K -ifneq ($(LCF_FILE), ) - PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) - MAKE_PROJECT_FILES += $(notdir $(LCF_FILE)) -ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir LCF_FILE)),) - ARC_TARGET_FILES_DIRS += $(dir $(LCF_FILE)) -endif -endif - - CXXFLAGS += $(PLATFORM_FLAGS) - CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) - CCFLAGS += $(PLATFORM_FLAGS) - CCFLAGS:=$(filter-out -std=c11,$(CCFLAGS)) - LDFLAGS += $(PLATFORM_LDFLAGS) - - - MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS)) - - # TODO: Move/organize embarc_mli usage in an implied way (see ext_libs/cmsis.inc for example - USE_EMBARC_MLI ?= true - -ifeq ($(USE_EMBARC_MLI), true) - # TODO: To understand why it's done here. The same is performed in the higher level MakeFile. - ALL_TAGS += arc - -ifeq ($(PRE_COMPILED_MLI),true) - $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) - - MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include - MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a - - THIRD_PARTY_CC_HDRS += \ - third_party/embarc_osp/LICENSE -else - MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) - - $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) - - MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include - MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a - MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_LIB_DIR)/LICENSE -endif - - THIRD_PARTY_CC_HDRS += $(MLI_LIB) - GENERATED_PROJECT_LIBS += $(MLI_LIB) - - INCLUDES += \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api - - GENERATED_PROJECT_INCLUDES += \ - -I. \ - -I./third_party/$(MLI_INCLUDE_FOLDER) \ - -I./third_party/$(MLI_INCLUDE_FOLDER)/api - - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf - -endif # USE_EMBARC_MLI - -# We overwrite project generator to exclude everything not relevant to ARC platform -define generate_microlite_projects -$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) -$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) -endef - -# Copy rule generator to do file copyes with changing paths in generated project -# Arguments are: -# 1 - Path files in generated project. -# 2 - Path files in the source repo -# Used in helper_functions.inc for arc projects to copy files -define path_changing_copy_file -$(1)/%: $(2)/% - @mkdir -p $$(dir $$@) - @cp $$< $$@ -endef - -# These are microcontroller-specific rules for converting the ELF output -# of the linker into a binary image that can be loaded directly. -# Not applicable for ARC, leaving it empty. -$(BINDIR)%.bin: - - -endif # ifeq ($(filter $(TARGET),$(ARC_PREDEFINED_TARGETS)),) -endif # ifeq ($(TARGET_ARCH), arc) +endif # $(TARGET) +endif # $(TARGET_ARCH)... diff --git a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc index 86e9d9e7379..9901fd82b07 100644 --- a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc @@ -1,14 +1,16 @@ # Settings for arc processors ifeq ($(TARGET), emsdp) - TARGET_ARCH = arc + TARGET_ARCH := arc + ARC_TOOLCHAIN := mwdt + + TCF_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf + LCF_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf + UBOOT_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/uboot.env + UBOOT_FILE_NAME := $(notdir $(UBOOT_FILE)) + +include $(MAKEFILE_DIR)/targets/arc/arc_common.inc - CC_TOOL = ccac - AR_TOOL = arac - CXX_TOOL = ccac - LD_TOOL = ccac - - DLR = $$$$ ARC_EXTRA_APP_SETTINGS = \ BIN_DIR = .$(DLR)\(PS\)bin\n\ BIN_FILE = $(DLR)\(BIN_DIR\)$(DLR)\(PS\)app.elf\n @@ -16,7 +18,7 @@ ifeq ($(TARGET), emsdp) ARC_EXTRA_APP_RULES = \ $(DLR)\(BIN_FILE\): $(DLR)\(BIN_DIR\) $(DLR)\(OUT_NAME\)\ \n\t\@$(DLR)\(CP\) $(DLR)\(OUT_NAME\) $(DLR)\(BIN_FILE\)\ - \n\t\@$(DLR)\(CP\) uboot.env $(DLR)\(BIN_DIR\)$(DLR)\(PS)uboot.env\ + \n\t\@$(DLR)\(CP\) $(UBOOT_FILE_NAME) $(DLR)\(BIN_DIR\)$(DLR)\(PS\)$(UBOOT_FILE_NAME)\ \n \ \n$(DLR)\(BIN_DIR\):\ \n\t\@$(DLR)\(MKDIR\) $(DLR)\(BIN_DIR\)\ @@ -26,135 +28,17 @@ ifeq ($(TARGET), emsdp) ARC_BIN_DEPEND = $(DLR)\(BIN_DIR\) $(DLR)\(BIN_FILE\) ARC_BIN_RULE = \t@echo Copy content of $(DLR)\(BIN_DIR\) into the root of SD card and follow instructions - ARC_APP_RUN_CMD = mdb -run -digilent -nooptions $(DLR)\(DBG_ARGS) - ARC_APP_DEBUG_CMD = mdb -OK -digilent -nooptions $(DLR)\(DBG_ARGS) + ARC_APP_RUN_CMD = mdb -run -digilent -nooptions $(DLR)\(DBG_ARGS\) + ARC_APP_DEBUG_CMD = mdb -OK -digilent -nooptions $(DLR)\(DBG_ARGS\) ARC_EXTRA_EXECUTE_RULES = - TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf - LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf - - MAKE_PROJECT_FILES += $(notdir $(TCF_FILE)) $(notdir $(LCF_FILE)) uboot.env - - ARC_TARGET_FILES_DIRS = $(dir $(TCF_FILE)) -ifneq ($(dir $(TCF_FILE)), $(dir $(LCF_FILE))) - ARC_TARGET_FILES_DIRS += $(dir $(LCF_FILE)) + MAKE_PROJECT_FILES += $(UBOOT_FILE_NAME) +ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir $(UBOOT_FILE))),) + ARC_TARGET_FILES_DIRS += $(dir $(UBOOT_FILE)) endif -# TODO: LESS TCF/LCF Variables -# The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), this variable is used later to add the option to the linker/compiler flags. -# This condition also handles the case when the user/makefile specifies the configuration bundled with MWDT (usually without .tcf extension) and that doesn't require copying. - TCF_FILE_NAME = $(notdir $(TCF_FILE)) - - PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config - PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections - - # Use compact CRT. It requires pre-defined heap size - PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset - - PLATFORM_LDFLAGS = -tcf=$(TCF_FILE_NAME) -Hnocopyr -m -Hldopt=-Coutput=memory.map -Hheap=2K - # for default EMSD configuration we can use defaul em9d rt libs - # for better performance runime should be rebuilt for emsdp configuration + # for better performance runtime should be built for emsdp configuration PLATFORM_LDFLAGS += -Hlib=em9d_voice_audio -# DMITRYZ: I think we need to move it to target specific LCF file. - PLATFORM_LDFLAGS += $(notdir $(LCF_FILE)) - - CXXFLAGS += $(PLATFORM_FLAGS) - CXXFLAGS:=$(filter-out -std=c++11,$(CXXFLAGS)) - CCFLAGS += $(PLATFORM_FLAGS) - CCFLAGS:=$(filter-out -std=c11,$(CCFLAGS)) - LDFLAGS += $(PLATFORM_LDFLAGS) - - MICROLITE_LIBS := $(filter-out -lm,$(MICROLITE_LIBS)) - - # DMITRYZ: Here we need to check tags on "no_embarc_mli". - USE_EMBARC_MLI ?= true - -ifeq ($(USE_EMBARC_MLI), true) - ALL_TAGS += arc - -ifeq ($(PRE_COMPILED_MLI),true) - $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) - - MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include - MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a - - THIRD_PARTY_CC_HDRS += \ - third_party/embarc_osp/LICENSE -else - MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) - - $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) - - MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include - MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a - MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_LIB_DIR)/LICENSE -endif - - THIRD_PARTY_CC_HDRS += $(MLI_LIB) - GENERATED_PROJECT_LIBS += $(MLI_LIB) - - INCLUDES += \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api - - GENERATED_PROJECT_INCLUDES += \ - -I. \ - -I./third_party/$(MLI_INCLUDE_FOLDER) \ - -I./third_party/$(MLI_INCLUDE_FOLDER)/api - - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf - -endif # USE_EMBARC_MLI - -# We overwrite project generator to exclude everything not relevant to ARC platform -define generate_microlite_projects -$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) -$(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) -endef - -# Copy rule generator to do file copyes changing paths in generated project -# Arguments are: -# 1 - Path files in generated project. -# 2 - Path files in the source repo -# Used in helper_functions.inc for arc projects to copy files -define path_changing_copy_file -$(1)/%: $(2)/% - @mkdir -p $$(dir $$@) - @cp $$< $$@ -endef - - - -# These are microcontroller-specific rules for converting the ELF output -# of the linker into a binary image that can be loaded directly. - -# Not applicable for ARC, leaving it empty. -$(BINDIR)%.bin: - endif From 2226b67dc3bb0a55b30a6599a94454715afba102 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Tue, 7 Apr 2020 12:53:32 +0300 Subject: [PATCH 21/45] changed EMSDP to ARC_EMSDP and other minor fixes regarding guidline --- .../micro/{emsdp => arc_emsdp}/debug_log.cc | 82 ++++++++++--------- .../micro/tools/make/download_and_extract.sh | 3 +- .../tools/make/targets/arc/arc_common.inc | 22 ++++- ...dp_makefile.inc => arc_emsdp_makefile.inc} | 20 ++++- .../micro/tools/make/targets/arc_makefile.inc | 21 ++++- .../make/templates/arc/arc_app_makefile.tpl | 22 ----- 6 files changed, 97 insertions(+), 73 deletions(-) rename tensorflow/lite/micro/{emsdp => arc_emsdp}/debug_log.cc (55%) rename tensorflow/lite/micro/tools/make/targets/{emsdp_makefile.inc => arc_emsdp_makefile.inc} (66%) diff --git a/tensorflow/lite/micro/emsdp/debug_log.cc b/tensorflow/lite/micro/arc_emsdp/debug_log.cc similarity index 55% rename from tensorflow/lite/micro/emsdp/debug_log.cc rename to tensorflow/lite/micro/arc_emsdp/debug_log.cc index 7d932939a0b..57eea6a5579 100644 --- a/tensorflow/lite/micro/emsdp/debug_log.cc +++ b/tensorflow/lite/micro/arc_emsdp/debug_log.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,16 +23,20 @@ limitations under the License. // EMSDP_LOG_TO_MEMORY // : fill .debug_log memory region (data section) with passed chars. // EMSDP_LOG_TO_HOST -// : Use hostlink to print output log. +// : Use MetaWare HostLink to print output log. Requires Synopsys MetaWare debugger // EMSDP_LOG_TO_UART // : use default debug UART (out to FTDI channel 0). The same USB Port is used for JTAG. #define EMSDP_LOG_TO_UART +// Memory size for symbols dump in EMSDP_LOG_TO_MEMORY destination +#define EMSDP_LOG_TO_MEMORY_SIZE (2 * 1024) -// For simplicity we assume U-boot has already initialized debug console durion -// application loading (or on reset). Hence we use only status and data registers + +// For simplicity we assume U-boot has already initialized debug console during +// application loading (or on reset). Hence, we use only status and data registers // to organize blocking loop for printing symbols. No input and no IRQ handling. // See embarc_osp repository for full EMSDP uart driver. +// (https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp) // TODO: Consider U-Boot API to do it in a less "hacky" way. void DbgUartSendStr(const char* s) { #define EMSDP_DBG_UART_BASE (0xF0004000U) @@ -40,48 +44,48 @@ void DbgUartSendStr(const char* s) { #define DW_UART_USR_TFNF (0x02) #define DW_UART_LSR_TXD_EMPTY (0x20) - typedef volatile struct dw_uart_reg { - uint32_t DATA; /*!< data in/out and DLL */ - uint32_t RES1[4]; - uint32_t LSR; /*!< Line Status Register */ - uint32_t RES2[25]; - uint32_t USR; /*!< UART status register */ - uint32_t RES3[29]; - uint32_t CPR; /*!< Component parameter register */ - } DW_UART_REG; + typedef volatile struct dw_uart_reg { + uint32_t DATA; /*!< data in/out and DLL */ + uint32_t RES1[4]; + uint32_t LSR; /*!< Line Status Register */ + uint32_t RES2[25]; + uint32_t USR; /*!< UART status register */ + uint32_t RES3[29]; + uint32_t CPR; /*!< Component parameter register */ + } DW_UART_REG; - DW_UART_REG* uart_reg_ptr = (DW_UART_REG*)(EMSDP_DBG_UART_BASE); - const char* src = s; - while (*src) { - // Check uart status to send char - bool uart_is_ready = false; - if (uart_reg_ptr->CPR & DW_UART_CPR_FIFO_STAT) - uart_is_ready = ((uart_reg_ptr->USR & DW_UART_USR_TFNF) != 0); - else - uart_is_ready = ((uart_reg_ptr->LSR & DW_UART_LSR_TXD_EMPTY) != 0); + DW_UART_REG* uart_reg_ptr = (DW_UART_REG*)(EMSDP_DBG_UART_BASE); + const char* src = s; + while (*src) { + // Check uart status to send char + bool uart_is_ready = false; + if (uart_reg_ptr->CPR & DW_UART_CPR_FIFO_STAT) + uart_is_ready = ((uart_reg_ptr->USR & DW_UART_USR_TFNF) != 0); + else + uart_is_ready = ((uart_reg_ptr->LSR & DW_UART_LSR_TXD_EMPTY) != 0); - // Send char if uart is ready. - if (uart_is_ready) - uart_reg_ptr->DATA = *src++; - } + // Send char if uart is ready. + if (uart_is_ready) + uart_reg_ptr->DATA = *src++; + } } -// Simple symbols dump to a pre-allocated memory region. +// Simple dump of symbols to a pre-allocated memory region. +// When total log exceeds memory region size, cursor is moved to its begining. // The memory region can be viewed afterward with debugger. // It can be viewed/read with debugger afterward. void LogToMem(const char* s) { - constexpr int kDebugLogMemChars = 2 * 1024; - static int cursor = 0; + static int cursor = 0; #pragma Bss(".debug_log") - volatile static char debug_log_mem[kDebugLogMemChars]; + volatile static char debug_log_mem[EMSDP_LOG_TO_MEMORY_SIZE]; #pragma Bss() - const char* src = s; - while (*src) { - debug_log_mem[cursor] = *src++; - cursor = (cursor < kDebugLogMemChars) ? cursor + 1 : 0; - } - debug_log_mem[cursor] = '^'; + const char* src = s; + while (*src) { + debug_log_mem[cursor] = *src++; + cursor = (cursor < EMSDP_LOG_TO_MEMORY_SIZE) ? cursor + 1 : 0; + } + debug_log_mem[cursor] = '^'; } @@ -89,17 +93,17 @@ extern "C" void DebugLog(const char* s) { #ifndef TF_LITE_STRIP_ERROR_STRINGS #if defined EMSDP_LOG_TO_UART - DbgUartSendStr(s); + DbgUartSendStr(s); #endif #if defined EMSDP_LOG_TO_MEMORY #warning "EMSDP_LOG_TO_MEMORY is defined. View .debug_log memory region for stdout" - LogToMem(s); + LogToMem(s); #endif #if defined EMSDP_LOG_TO_HOST #warning "EMSDP_LOG_TO_HOST is defined. Ensure hostlib is linked." - fprintf(stderr, "%s", s); + fprintf(stderr, "%s", s); #endif #endif // TF_LITE_STRIP_ERROR_STRINGS diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index 4a75b6b24cd..5b06e4e819a 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -170,7 +170,8 @@ download_and_extract() { elif [[ ${action} == "patch_cifar10_dataset" ]]; then patch_cifar10_dataset ${dir} elif [[ ${action} == "build_embarc_mli" ]]; then - build_embarc_mli ${dir} ${action_param1} + cp ${action_param1} ${dir}/hw/arc.tcf + build_embarc_mli ${dir} ../../hw/arc.tcf elif [[ ${action} ]]; then echo "Unknown action '${action}'" exit 1 diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc index e20887abb07..50bb5c96799 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -1,4 +1,18 @@ -# Common Settings for ARC platform and it's projects. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common Settings for ARC platform and its projects. # Might be reused across different targets ifeq ($(TARGET_ARCH), arc) @@ -6,7 +20,7 @@ ifeq ($(TARGET_ARCH), arc) DLR := $$$$ # List of folders to search project files for copy with path changing - # For instance, TCF and LCF files are copyed into the root of generated project + # For instance, TCF and LCF files are copied into the root of generated project ARC_TARGET_FILES_DIRS ?= # For the following variables see arc_app_makefile.tpl for usage @@ -36,14 +50,14 @@ ifeq ($(TARGET_ARCH), arc) ARC_EXTRA_EXECUTE_RULES ?= # We overwrite project generator to exclude everything not relevant to ARC platform. -# ARC targets doesn't can't work with mbed, keil or other architecture specific development tools +# ARC targets cannot work with non-ARC development tools. # Basic make project is updated to be applicable for general ARC platform define generate_microlite_projects $(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) $(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) endef -# Copy rule generator to do file copyes with changing paths in generated project +# Copy rule generator to do file copies with changing paths in generated project # Arguments are: # 1 - Path files in generated project. # 2 - Path files in the source repo diff --git a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc similarity index 66% rename from tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc rename to tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc index 9901fd82b07..a84dd15e4e8 100644 --- a/tensorflow/lite/micro/tools/make/targets/emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc @@ -1,5 +1,19 @@ -# Settings for arc processors -ifeq ($(TARGET), emsdp) +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Settings for EMSDP target (ARC processor) +ifeq ($(TARGET), arc_emsdp) TARGET_ARCH := arc ARC_TOOLCHAIN := mwdt @@ -37,7 +51,7 @@ ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir $(UBOOT_FILE))),) ARC_TARGET_FILES_DIRS += $(dir $(UBOOT_FILE)) endif - # for default EMSD configuration we can use defaul em9d rt libs + # for default EMSD configuration we can use default em9d rt libs # for better performance runtime should be built for emsdp configuration PLATFORM_LDFLAGS += -Hlib=em9d_voice_audio diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index 87d1b736807..db474a54b2d 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -1,19 +1,32 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Settings for not pre-defined ARC processors. # User need to specify ARC target with Tool Configuration File (*.tcf). # Path to this file must be passed through TCF_FILE variable. # Otherwise, default em7d_voice_audio configuration is used - ifeq ($(TARGET_ARCH), arc) -# Known target are specifyed with their own make configurations. -ifeq ($(filter $(TARGET), emsdp iotdk),) +# Known target are specified with their own make configurations. +ifeq ($(filter $(TARGET), arc_emsdp arc_iotdk),) ARC_TOOLCHAIN := mwdt ifneq ($(TCF_FILE), ) TARGET = $(basename $(notdir $(TCF_FILE))) else - $(warning TCF_FILE variable is not specifyed. Use default em7d_voice_audio configuration) + $(warning TCF_FILE variable is not specified. Use default em7d_voice_audio configuration) TARGET = em7d_voice_audio TCF_FILE = em7d_voice_audio endif diff --git a/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl index f79d04b26d1..a1a3ab71028 100644 --- a/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl +++ b/tensorflow/lite/micro/tools/make/templates/arc/arc_app_makefile.tpl @@ -29,28 +29,6 @@ else DEV_NULL=/dev/null endif -# Note: Windows escaping rules is very combersome -# initially I tried to use Q=^, but this depends on the context and (looks like) on Win version. -# Also expecially ugly thing is that in quoted strings the quotes the same are remain. -# Batch has special parameter expansion syntax to remove quotes, -# but many tools themselves remove quotes (unless escaped with backslash) -# So finally we've found that in our use cases we may not escaping any symbols but prepend backslashes before quotes. - -quote=$(subst %,$(Q)%, \ - $(subst &,$(Q)&, \ - $(subst <,$(Q)<, \ - $(subst >,$(Q)>, \ - $(subst |,$(Q)|, \ - $(subst ',$(Q)', \ - $(subst $(COMMA),$(Q)$(COMMA), \ - $(subst =,$(Q)=, \ - $(subst $(OPEN_PAREN),$(Q)$(OPEN_PAREN), \ - $(subst $(CLOSE_PAREN),$(Q)$(CLOSE_PAREN), \ - $(subst !,$(Q)!, \ - $(subst ",$(BACKSLASH)", \ - $(subst $(Q),$(Q)$(Q), \ - $(1) ))))))))))))) - #============================================================= # Toolchain definitions #============================================================= From a7dcdb21f69ca8a5078ad855044e76fefa4f0199 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 8 Apr 2020 15:11:41 +0300 Subject: [PATCH 22/45] Move out of function ARC EMSDP UART related constatnts --- tensorflow/lite/micro/arc_emsdp/debug_log.cc | 33 +++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/micro/arc_emsdp/debug_log.cc b/tensorflow/lite/micro/arc_emsdp/debug_log.cc index 57eea6a5579..b3b25f88ac1 100644 --- a/tensorflow/lite/micro/arc_emsdp/debug_log.cc +++ b/tensorflow/lite/micro/arc_emsdp/debug_log.cc @@ -31,6 +31,24 @@ limitations under the License. // Memory size for symbols dump in EMSDP_LOG_TO_MEMORY destination #define EMSDP_LOG_TO_MEMORY_SIZE (2 * 1024) +// EMSDP Debug UART related defines (registers and bits) +#define EMSDP_DBG_UART_BASE (0xF0004000U) +#define DW_UART_CPR_FIFO_STAT (1 << 10) +#define DW_UART_USR_TFNF (0x02) +#define DW_UART_LSR_TXD_EMPTY (0x20) + +// EMSDP UART registers map (only necessairy fields) +typedef volatile struct dw_uart_reg { + uint32_t DATA; /* data in/out and DLL */ + uint32_t RES1[4]; + uint32_t LSR; /* Line Status Register */ + uint32_t RES2[25]; + uint32_t USR; /* UART status register */ + uint32_t RES3[29]; + uint32_t CPR; /* Component parameter register */ +} DW_UART_REG; + + // For simplicity we assume U-boot has already initialized debug console during // application loading (or on reset). Hence, we use only status and data registers @@ -39,21 +57,6 @@ limitations under the License. // (https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp) // TODO: Consider U-Boot API to do it in a less "hacky" way. void DbgUartSendStr(const char* s) { -#define EMSDP_DBG_UART_BASE (0xF0004000U) -#define DW_UART_CPR_FIFO_STAT (1<<10) -#define DW_UART_USR_TFNF (0x02) -#define DW_UART_LSR_TXD_EMPTY (0x20) - - typedef volatile struct dw_uart_reg { - uint32_t DATA; /*!< data in/out and DLL */ - uint32_t RES1[4]; - uint32_t LSR; /*!< Line Status Register */ - uint32_t RES2[25]; - uint32_t USR; /*!< UART status register */ - uint32_t RES3[29]; - uint32_t CPR; /*!< Component parameter register */ - } DW_UART_REG; - DW_UART_REG* uart_reg_ptr = (DW_UART_REG*)(EMSDP_DBG_UART_BASE); const char* src = s; while (*src) { From 105eac5030a346febc615202a4841330f2779c0b Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 8 Apr 2020 17:40:54 +0300 Subject: [PATCH 23/45] Include new parameters of generate_project for arc --- tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc index 50bb5c96799..67be50d4854 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -53,7 +53,7 @@ ifeq ($(TARGET_ARCH), arc) # ARC targets cannot work with non-ARC development tools. # Basic make project is updated to be applicable for general ARC platform define generate_microlite_projects -$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) +$(call generate_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(MICROLITE_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES),$(TARGET_TOOLCHAIN_ROOT),$(TARGET_TOOLCHAIN_PREFIX)) $(call generate_arc_project,make,$(MAKE_PROJECT_FILES),$(1),$(MICROLITE_CC_SRCS) $(THIRD_PARTY_CC_SRCS) $(2),$(MICROLITE_CC_HDRS) $(THIRD_PARTY_CC_HDRS) $(MICROLITE_TEST_HDRS) $(3),$(LDFLAGS) $(GENERATED_PROJECT_LIBS),$(CXXFLAGS) $(GENERATED_PROJECT_INCLUDES), $(CCFLAGS) $(GENERATED_PROJECT_INCLUDES)) endef From e85244f2c3833f63653a92081e75f3cb2412ccc3 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Thu, 9 Apr 2020 15:12:31 +0300 Subject: [PATCH 24/45] Fix arc target list and build for built-in arc configurations --- tensorflow/lite/micro/tools/make/download_and_extract.sh | 8 ++++++-- tensorflow/lite/micro/tools/make/targets/arc_makefile.inc | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index 5b06e4e819a..3ab7c3ba7bd 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -170,8 +170,12 @@ download_and_extract() { elif [[ ${action} == "patch_cifar10_dataset" ]]; then patch_cifar10_dataset ${dir} elif [[ ${action} == "build_embarc_mli" ]]; then - cp ${action_param1} ${dir}/hw/arc.tcf - build_embarc_mli ${dir} ../../hw/arc.tcf + if [[ "${action_param1}" == *.tcf ]]; then + cp ${action_param1} ${dir}/hw/arc.tcf + build_embarc_mli ${dir} ../../hw/arc.tcf + else + build_embarc_mli ${dir} ${action_param1} + fi elif [[ ${action} ]]; then echo "Unknown action '${action}'" exit 1 diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index db474a54b2d..d379eea86f1 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -19,7 +19,7 @@ ifeq ($(TARGET_ARCH), arc) # Known target are specified with their own make configurations. -ifeq ($(filter $(TARGET), arc_emsdp arc_iotdk),) +ifeq ($(filter $(TARGET), arc_emsdp),) ARC_TOOLCHAIN := mwdt From 3006c316b64077a6bad64f42cb5e879351072b29 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 13 Apr 2020 11:22:46 +0300 Subject: [PATCH 25/45] embARC MLI related code as an external library which might be turned-off --- .../micro/kernels/{arc => embarc_mli}/conv.cc | 8 +-- .../{arc => embarc_mli}/depthwise_conv.cc | 8 +-- .../{arc => embarc_mli}/fully_connected.cc | 9 ++- .../{arc => embarc_mli}/mli_slicers.cc | 0 .../kernels/{arc => embarc_mli}/mli_slicers.h | 0 .../{arc => embarc_mli}/mli_tf_utils.h | 0 .../kernels/{arc => embarc_mli}/pooling.cc | 8 +-- .../{arc => embarc_mli}/scratch_buf_mgr.cc | 4 +- .../{arc => embarc_mli}/scratch_buf_mgr.h | 0 .../{arc => embarc_mli}/scratch_buffers.cc | 2 +- .../{arc => embarc_mli}/scratch_buffers.h | 0 .../micro/tools/make/ext_libs/embarc_mli.inc | 67 +++++++++++++++++++ .../tools/make/targets/arc/arc_common.inc | 63 ----------------- 13 files changed, 86 insertions(+), 83 deletions(-) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/conv.cc (98%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/depthwise_conv.cc (98%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/fully_connected.cc (97%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/mli_slicers.cc (100%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/mli_slicers.h (100%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/mli_tf_utils.h (100%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/pooling.cc (98%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/scratch_buf_mgr.cc (98%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/scratch_buf_mgr.h (100%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/scratch_buffers.cc (98%) rename tensorflow/lite/micro/kernels/{arc => embarc_mli}/scratch_buffers.h (100%) create mode 100644 tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc diff --git a/tensorflow/lite/micro/kernels/arc/conv.cc b/tensorflow/lite/micro/kernels/embarc_mli/conv.cc similarity index 98% rename from tensorflow/lite/micro/kernels/arc/conv.cc rename to tensorflow/lite/micro/kernels/embarc_mli/conv.cc index 6cf26c7d6d9..b124b17f66d 100644 --- a/tensorflow/lite/micro/kernels/arc/conv.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/conv.cc @@ -24,10 +24,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc b/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc similarity index 98% rename from tensorflow/lite/micro/kernels/arc/depthwise_conv.cc rename to tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc index 74e48c8c064..0ad2a9fe6c6 100644 --- a/tensorflow/lite/micro/kernels/arc/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc @@ -25,10 +25,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/arc/fully_connected.cc b/tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc similarity index 97% rename from tensorflow/lite/micro/kernels/arc/fully_connected.cc rename to tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc index cc9b95c570a..8088634f8de 100644 --- a/tensorflow/lite/micro/kernels/arc/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc @@ -23,14 +23,13 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" #include "mli_api.h" - namespace tflite { namespace ops { namespace micro { diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.cc b/tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc similarity index 100% rename from tensorflow/lite/micro/kernels/arc/mli_slicers.cc rename to tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc diff --git a/tensorflow/lite/micro/kernels/arc/mli_slicers.h b/tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h similarity index 100% rename from tensorflow/lite/micro/kernels/arc/mli_slicers.h rename to tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h diff --git a/tensorflow/lite/micro/kernels/arc/mli_tf_utils.h b/tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h similarity index 100% rename from tensorflow/lite/micro/kernels/arc/mli_tf_utils.h rename to tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h diff --git a/tensorflow/lite/micro/kernels/arc/pooling.cc b/tensorflow/lite/micro/kernels/embarc_mli/pooling.cc similarity index 98% rename from tensorflow/lite/micro/kernels/arc/pooling.cc rename to tensorflow/lite/micro/kernels/embarc_mli/pooling.cc index 7a26a10e23b..a147171a859 100644 --- a/tensorflow/lite/micro/kernels/arc/pooling.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/pooling.cc @@ -20,10 +20,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/arc/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc similarity index 98% rename from tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc rename to tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc index 5bd2d6aed22..8d00e28714c 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" #include #define MAX(A,B) (((A) > (B))? (A): (B)) #define MIN(A,B) (((A) > (B))? (B): (A)) diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h similarity index 100% rename from tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h rename to tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc similarity index 98% rename from tensorflow/lite/micro/kernels/arc/scratch_buffers.cc rename to tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc index f36059f82d2..689c490569e 100644 --- a/tensorflow/lite/micro/kernels/arc/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/micro/kernels/arc/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" #include #define MAX(A,B) (((A) > (B))? (A): (B)) #define MIN(A,B) (((A) > (B))? (B): (A)) diff --git a/tensorflow/lite/micro/kernels/arc/scratch_buffers.h b/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h similarity index 100% rename from tensorflow/lite/micro/kernels/arc/scratch_buffers.h rename to tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h diff --git a/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc new file mode 100644 index 00000000000..851a5d43378 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc @@ -0,0 +1,67 @@ +ifeq ($(TARGET_ARCH), arc) + +# embarc_mli Library is used by default for ARC platform whenever it's possible. +# To use TFLM reference implementation it should be intentionally turned off +# by passing 'no_embarc_mli' tag (make -f TAGS=no_embarc_mli ...) +ifeq ($(filter no_embarc_mli,$(ALL_TAGS)),) + + +ALL_TAGS += embarc_mli + +ifeq ($(PRE_COMPILED_MLI),true) + # TODO: Replace with proper embarc_mli pre-builts. + $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) + + MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include + MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a + + THIRD_PARTY_CC_HDRS += \ + third_party/embarc_osp/LICENSE +else + MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) + + $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) + + MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include + MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a + MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_LIB_DIR)/LICENSE +endif + + THIRD_PARTY_CC_HDRS += $(MLI_LIB) + GENERATED_PROJECT_LIBS += $(MLI_LIB) + + INCLUDES += \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api + + GENERATED_PROJECT_INCLUDES += \ + -I. \ + -I./third_party/$(MLI_INCLUDE_FOLDER) \ + -I./third_party/$(MLI_INCLUDE_FOLDER)/api + + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h + +endif # no_embarc_mli +endif # TARGET_ARCH diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc index 67be50d4854..4a9a5ccdfc3 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -129,70 +129,7 @@ endif LDFLAGS += $(PLATFORM_LDFLAGS) - # TODO: Move/organize embarc_mli usage in an implied way (see ext_libs/cmsis.inc for example - USE_EMBARC_MLI ?= true -ifeq ($(USE_EMBARC_MLI), true) - # TODO: To understand why it's done here. The same is performed in the higher level MakeFile. - ALL_TAGS += arc - -ifeq ($(PRE_COMPILED_MLI),true) - $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) - - MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include - MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a - - THIRD_PARTY_CC_HDRS += \ - third_party/embarc_osp/LICENSE -else - MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) - - $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) - - MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include - MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a - MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_LIB_DIR)/LICENSE -endif - - THIRD_PARTY_CC_HDRS += $(MLI_LIB) - GENERATED_PROJECT_LIBS += $(MLI_LIB) - - INCLUDES += \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api - - GENERATED_PROJECT_INCLUDES += \ - -I. \ - -I./third_party/$(MLI_INCLUDE_FOLDER) \ - -I./third_party/$(MLI_INCLUDE_FOLDER)/api - - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buffers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buffers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/scratch_buf_mgr.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_slicers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc/mli_slicers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc/mli_tf_utils.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/tools/make/targets/arc/memory.lcf - -endif # USE_EMBARC_MLI endif # ARC_TOOLCHAIN endif # TARGET_ARCH From 03bec25ed962226e59d9d4a8b23a55540ab33ca9 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 13 Apr 2020 14:06:35 +0300 Subject: [PATCH 26/45] Additional tests for embARC MLI specific slicing (initial mock version) --- .../kernels/embarc_mli/conv_slicing_test.cc | 629 ++++++++++ .../embarc_mli/depthwise_conv_slicing_test.cc | 768 ++++++++++++ .../fully_connected_slicing_test.cc | 938 ++++++++++++++ .../embarc_mli/pooling_slicing_test.cc | 1116 +++++++++++++++++ .../micro/tools/make/ext_libs/embarc_mli.inc | 11 +- 5 files changed, 3461 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc create mode 100644 tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc create mode 100644 tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc create mode 100644 tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc b/tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc new file mode 100644 index 00000000000..a1f155ecc56 --- /dev/null +++ b/tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc @@ -0,0 +1,629 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/micro/micro_utils.h" +#include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_utils.h" + +namespace tflite { +namespace testing { +namespace { + +// Common inputs and outputs. +static const int kInputElements = 16; +static const int kInputShape[] = {4, 2, 2, 4, 1}; +static const float kInputData[] = {1, 1, 1, 1, 2, 2, 2, 2, + 1, 2, 3, 4, 1, 2, 3, 4}; +static const int kFilterElements = 12; +static const int kFilterShape[] = {4, 3, 2, 2, 1}; +static const float kFilterData[] = {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}; +static const int kBiasElements = 3; +static const int kBiasShape[] = {1, 3}; +static const float kBiasData[] = {1, 2, 3}; +static const int kOutputElements = 12; +static const int kOutputShape[] = {4, 2, 1, 2, 3}; +static const float kGoldenData[] = {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}; + +static TfLiteConvParams common_conv_params = { + kTfLitePaddingValid, // padding + 2, // stride_width + 2, // stride_height + kTfLiteActNone, // activation + 1, // dilation_width_factor + 1, // dilation_height_factor +}; + +template +TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, + const T* expected_output_data, T* output_data, + int output_length, + TfLiteConvParams* conv_params, + float tolerance = 1e-5) { + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_CONV_2D, 1); + + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + const char* init_data = reinterpret_cast(conv_params); + size_t init_data_size = 0; + void* user_data = nullptr; + + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(conv_params); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TfLiteStatus return_val = registration->invoke(&context, &node); + if (return_val != kTfLiteOk) { + return return_val; + } + + if (registration->free) { + registration->free(&context, user_data); + } + + for (int i = 0; i < output_length; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], + tolerance); + } + return kTfLiteOk; +} + +void TestConvFloat(const int* input_dims_data, const float* input_data, + const int* filter_dims_data, const float* filter_data, + const int* bias_dims_data, const float* bias_data, + const int* output_dims_data, + const float* expected_output_data, float* output_data, + TfLiteConvParams* conv_params) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(filter_data, filter_dims, "filter_tensor"), + CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + ValidateConvGoldens(tensors, tensors_size, expected_output_data, + output_data, output_dims_count, conv_params)); +} + +void TestConvQuantizedPerLayer( + const int* input_dims_data, const float* input_data, + uint8_t* input_quantized, float input_scale, const int* filter_dims_data, + const float* filter_data, uint8_t* filter_quantized, float filter_scale, + const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, + const int* output_dims_data, const float* expected_output_data, + uint8_t* expected_output_quantized, uint8_t* output_data, + float output_scale, TfLiteConvParams* conv_params) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + tflite::AsymmetricQuantize(expected_output_data, expected_output_quantized, + output_dims_count, output_scale, 128); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, 128, "input_tensor"), + CreateQuantizedTensor(filter_data, filter_quantized, filter_dims, + filter_scale, 128, "filter_tensor"), + CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, + input_scale, filter_scale, "bias_tensor"), + CreateQuantizedTensor(output_data, output_dims, output_scale, 128, + "output_tensor")}; + + // TODO(njeff): Affine Quantization Params should be set on tensor creation. + float filter_scales[] = {1, filter_scale}; + int filter_zero_points[] = {1, 128}; + TfLiteAffineQuantization filter_quant = { + FloatArrayFromFloats(filter_scales), + IntArrayFromInts(filter_zero_points)}; + tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + ValidateConvGoldens(tensors, tensors_size, expected_output_quantized, + output_data, output_dims_count, conv_params)); +} + +void TestConvQuantizedPerChannel( + const int* input_dims_data, const float* input_data, + int8_t* input_quantized, float input_scale, int input_zero_point, + const int* filter_dims_data, const float* filter_data, + int8_t* filter_data_quantized, const int* bias_dims_data, + const float* bias_data, int32_t* bias_data_quantized, float* bias_scales, + int* bias_zero_points, const int* output_dims_data, + const float* expected_output_data, int8_t* expected_output_data_quantized, + int8_t* output_data, float output_scale, int output_zero_point, + TfLiteConvParams* conv_params) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + int filter_zero_points[5]; + float filter_scales[5]; + TfLiteAffineQuantization filter_quant; + TfLiteAffineQuantization bias_quant; + TfLiteTensor input_tensor = + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point, "input_tensor"); + TfLiteTensor filter_tensor = CreateSymmetricPerChannelQuantizedTensor( + filter_data, filter_data_quantized, filter_dims, filter_scales, + filter_zero_points, &filter_quant, 0 /* quantized dimension */, + "filter_tensor"); + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( + bias_data, bias_data_quantized, bias_dims, input_scale, &filter_scales[1], + bias_scales, bias_zero_points, &bias_quant, 0 /* quantized dimension */, + "bias_tensor"); + TfLiteTensor output_tensor = + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point, "output_tensor"); + + // TODO(njeff): Affine Quantization Params should be set on tensor creation. + float input_scales[] = {1, input_scale}; + int input_zero_points[] = {1, input_zero_point}; + TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), + IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + float output_scales[] = {1, output_scale}; + int output_zero_points[] = {1, output_zero_point}; + TfLiteAffineQuantization output_quant = { + FloatArrayFromFloats(output_scales), + IntArrayFromInts(output_zero_points)}; + output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + tflite::AsymmetricQuantize(expected_output_data, + expected_output_data_quantized, output_dims_count, + output_scale, output_zero_point); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + ValidateConvGoldens(tensors, tensors_size, expected_output_data_quantized, + output_data, output_dims_count, conv_params, + 1.0 /* tolerance */)); +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTestFloat) { + float output_data[tflite::testing::kOutputElements]; + + tflite::testing::TestConvFloat( + tflite::testing::kInputShape, tflite::testing::kInputData, + tflite::testing::kFilterShape, tflite::testing::kFilterData, + tflite::testing::kBiasShape, tflite::testing::kBiasData, + tflite::testing::kOutputShape, tflite::testing::kGoldenData, output_data, + &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(InputAndFilterSameWidthHeight) { + const int output_dims_count = 2; + float output_data[output_dims_count]; + + const int kFilterShape[] = {4, 1, 2, 4, 1}; + const float filter_values[] = {1, 2, 3, 4, -1, -1, 1, 1}; + const int kBiasShape[] = {1, 1}; + const float bias_values[] = {0}; + const int kOutputShape[] = {4, 2, 1, 1, 1}; + const float expected_output[] = {10, 34}; + + tflite::testing::TestConvFloat( + tflite::testing::kInputShape, tflite::testing::kInputData, kFilterShape, + filter_values, kBiasShape, bias_values, kOutputShape, expected_output, + output_data, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantized) { + const int output_dims_count = 12; + uint8_t output_data[output_dims_count]; + + const float input_scale = 0.5f; + const float filter_scale = 0.5f; + const float output_scale = 1.0f; + + uint8_t input_quantized[tflite::testing::kInputElements]; + uint8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + uint8_t golden_quantized[tflite::testing::kOutputElements]; + + tflite::testing::TestConvQuantizedPerLayer( + tflite::testing::kInputShape, tflite::testing::kInputData, + input_quantized, input_scale, tflite::testing::kFilterShape, + tflite::testing::kFilterData, filter_quantized, filter_scale, + tflite::testing::kBiasShape, tflite::testing::kBiasData, bias_quantized, + tflite::testing::kOutputShape, tflite::testing::kGoldenData, + golden_quantized, output_data, output_scale, + &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { + const int output_dims_count = 12; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[tflite::testing::kInputElements]; + int8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + int8_t golden_quantized[tflite::testing::kOutputElements]; + int zero_points[tflite::testing::kBiasElements + 1]; + float scales[tflite::testing::kBiasElements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInputShape, tflite::testing::kInputData, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShape, tflite::testing::kFilterData, + filter_quantized, tflite::testing::kBiasShape, tflite::testing::kBiasData, + bias_quantized, scales, zero_points, tflite::testing::kOutputShape, + tflite::testing::kGoldenData, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelRelu6) { + // conv params: + // padding, stride_, dilation_, activation + TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; + const int output_dims_count = 12; + int8_t output_data[output_dims_count]; + + const float bias_values[] = {1, 2, -3}; + const float golden_data[] = {6, 2, 0, 6, 2, 0, 6, 4, 0, 6, 4, 0}; + + const float input_scale = 0.023529f; + const float output_scale = 0.023529f; + const int input_zero_point = -128; + const int output_zero_point = -128; + + int8_t input_quantized[tflite::testing::kInputElements]; + int8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + int8_t golden_quantized[tflite::testing::kOutputElements]; + int zero_points[tflite::testing::kBiasElements + 1]; + float scales[tflite::testing::kBiasElements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInputShape, tflite::testing::kInputData, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilterShape, tflite::testing::kFilterData, + filter_quantized, tflite::testing::kBiasShape, bias_values, + bias_quantized, scales, zero_points, tflite::testing::kOutputShape, + golden_data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannel) { + // conv params: + // padding, stride_, activation, dilation_ + TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, + kTfLiteActNone, 1, 1}; + const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C] + const int kInputElements = + kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4]; + float kInputData[/* kInputElements */] = {1, 1, 1, 1, 2, 2, 2, 2, + 1, 2, 3, 4, 1, 2, 3, 4}; + const int kFilterShape[] = {4, 3, 1, 1, 4}; + const int kFilterElements = + kFilterShape[1] * kFilterShape[2] * kFilterShape[3] * kFilterShape[4]; + float kFilterData[/* kFilterElements */] = {1, 2, 3, 4, -1, 1, + -1, 1, -1, -1, 1, 1}; + const int kBiasElements = kFilterShape[1]; + const int kBiasShape[] = {1, kBiasElements}; + float kBiasData[/* kBiasElements */] = {1, 2, 3}; + const int kOutputShape[] = {4, 1, 2, 2, kBiasElements}; + const int kOutputElements = 4 * 3; + int8_t output_data[kOutputElements]; + const float kGoldenData[/* kOutputElements */] = {11, 2, 3, 21, 2, 3, + 31, 4, 7, 31, 4, 7}; + + const float input_scale = 0.5f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[kInputElements]; + int8_t filter_quantized[kFilterElements]; + int32_t bias_quantized[kBiasElements]; + int8_t golden_quantized[kOutputElements]; + int zero_points[kBiasElements + 1]; + float scales[kBiasElements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + kInputShape, kInputData, input_quantized, input_scale, input_zero_point, + kFilterShape, kFilterData, filter_quantized, kBiasShape, kBiasData, + bias_quantized, scales, zero_points, kOutputShape, kGoldenData, + golden_quantized, output_data, output_scale, output_zero_point, + &conv_params); +} + +TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannelRelu6) { + // conv params: + // padding, stride_, dilation_, activation + TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; + const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C] + const int kInputElements = + kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4]; + float kInputData[/* kInputElements */] = {1, 1, 1, 1, 2, 2, 2, 2, + 1, 2, 3, 4, 1, 2, 3, 4}; + const int kFilterShape[] = {4, 3, 1, 1, 4}; + const int kFilterElements = + kFilterShape[1] * kFilterShape[2] * kFilterShape[3] * kFilterShape[4]; + float kFilterData[/* kFilterElements */] = {1, 2, 3, 4, -1, 1, + -1, 1, -1, -1, 1, 1}; + const int kBiasElements = kFilterShape[1]; + const int kBiasShape[] = {1, kBiasElements}; + float kBiasData[/* kBiasElements */] = {1, 2, -3}; + const int kOutputShape[] = {4, 1, 2, 2, kBiasElements}; + const int kOutputElements = 4 * 3; + int8_t output_data[kOutputElements]; + const float kGoldenData[/* kOutputElements */] = {6, 2, 0, 6, 2, 0, + 6, 4, 1, 6, 4, 1}; + + const float input_scale = 0.023529f; + const float output_scale = 0.023529f; + const int input_zero_point = -128; + const int output_zero_point = -128; + + int8_t input_quantized[kInputElements]; + int8_t filter_quantized[kFilterElements]; + int32_t bias_quantized[kBiasElements]; + int8_t golden_quantized[kOutputElements]; + int zero_points[kBiasElements + 1]; + float scales[kBiasElements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + kInputShape, kInputData, input_quantized, input_scale, input_zero_point, + kFilterShape, kFilterData, filter_quantized, kBiasShape, kBiasData, + bias_quantized, scales, zero_points, kOutputShape, kGoldenData, + golden_quantized, output_data, output_scale, output_zero_point, + &conv_params); +} + +TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { + const int output_dims_count = 12; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5f; + const float output_scale = 1.0f; + + int8_t input_quantized[tflite::testing::kInputElements]; + int8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + int8_t golden_quantized[tflite::testing::kOutputElements]; + int zero_points[tflite::testing::kBiasElements + 1]; + float scales[tflite::testing::kBiasElements + 1]; + + TfLiteIntArray* input_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kInputShape); + TfLiteIntArray* filter_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kFilterShape); + TfLiteIntArray* bias_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kBiasShape); + TfLiteIntArray* output_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kOutputShape); + + int filter_zero_points[5]; + float filter_scales[5]; + TfLiteAffineQuantization filter_quant; + TfLiteAffineQuantization bias_quant; + TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( + tflite::testing::kInputData, input_quantized, input_dims, input_scale, 0, + "input_tensor"); + TfLiteTensor filter_tensor = + tflite::testing::CreateSymmetricPerChannelQuantizedTensor( + tflite::testing::kFilterData, filter_quantized, filter_dims, + filter_scales, filter_zero_points, &filter_quant, + 0 /* quantized dimension */, "filter_tensor"); + TfLiteTensor bias_tensor = + tflite::testing::CreatePerChannelQuantizedBiasTensor( + tflite::testing::kBiasData, bias_quantized, bias_dims, input_scale, + &filter_scales[1], scales, zero_points, &bias_quant, 0, + "bias_tensor"); + TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( + output_data, output_dims, output_scale, 0 /* quantized dimension */, + "output_tensor"); + + float input_scales[] = {1, input_scale}; + int input_zero_points[] = {1, 128}; + TfLiteAffineQuantization input_quant = { + tflite::testing::FloatArrayFromFloats(input_scales), + tflite::testing::IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + tflite::AsymmetricQuantize(tflite::testing::kGoldenData, golden_quantized, + output_dims_count, output_scale, 0); + + // Set filter quant to mismatched dimension. + TfLiteAffineQuantization* quant = reinterpret_cast( + filter_tensor.quantization.params); + + // Choose arbitrary incorrect scale and zero point sizes which are neither 1 + // (for broadcast case) nor the quantized dimension size. + quant->scale->size = 2; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, + tflite::testing::ValidateConvGoldens( + tensors, tensors_size, golden_quantized, output_data, + output_dims_count, &tflite::testing::common_conv_params)); + + // Set scale back to correct dimension, and make zero point array too short. + quant->scale->size = tflite::testing::kFilterShape[0]; + quant->zero_point->size = 2; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, + tflite::testing::ValidateConvGoldens( + tensors, tensors_size, golden_quantized, output_data, + output_dims_count, &tflite::testing::common_conv_params)); +} + +TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { + const int output_dims_count = 12; + int8_t output_data[output_dims_count]; + + const float input_scale = 1.0f; + const float filter_scale = 1.0f; + const float output_scale = 1.0f; + + int8_t input_quantized[tflite::testing::kInputElements]; + int8_t filter_quantized[tflite::testing::kFilterElements]; + int32_t bias_quantized[tflite::testing::kBiasElements]; + int8_t golden_quantized[tflite::testing::kOutputElements]; + + TfLiteIntArray* input_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kInputShape); + TfLiteIntArray* filter_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kFilterShape); + TfLiteIntArray* bias_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kBiasShape); + TfLiteIntArray* output_dims = + tflite::testing::IntArrayFromInts(tflite::testing::kOutputShape); + + // Create per-layer quantized int8 input tensor. + TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( + tflite::testing::kInputData, input_quantized, input_dims, input_scale, 0, + "input_tensor"); + int input_zero_points[2] = {1, 0}; + float input_scales[2] = {1, input_scale}; + TfLiteAffineQuantization input_quant = { + tflite::testing::FloatArrayFromFloats(input_scales), + tflite::testing::IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + // Create per-layer quantized int8 filter tensor. + TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( + tflite::testing::kFilterData, filter_quantized, filter_dims, filter_scale, + 0, "filter_tensor"); + int filter_zero_points[2] = {1, 0}; + float filter_scales[2] = {1, filter_scale}; + TfLiteAffineQuantization filter_quant = { + tflite::testing::FloatArrayFromFloats(filter_scales), + tflite::testing::IntArrayFromInts(filter_zero_points)}; + filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; + + // Create per-layer quantized int32 bias tensor. + tflite::SymmetricQuantize(tflite::testing::kBiasData, bias_quantized, + tflite::testing::kBiasElements, + input_scale * output_scale); + TfLiteTensor bias_tensor = tflite::testing::CreateInt32Tensor( + bias_quantized, bias_dims, "bias_tensor"); + + int bias_zero_points[2] = {1, 0}; + float bias_scales[2] = {1, input_scale * filter_scale}; + TfLiteAffineQuantization bias_quant = { + tflite::testing::FloatArrayFromFloats(bias_scales), + tflite::testing::IntArrayFromInts(bias_zero_points)}; + bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; + + // Create per-layer quantized int8 output tensor. + TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( + output_data, output_dims, output_scale, 0 /* quantized dimension */, + "output_tensor"); + int output_zero_points[2] = {1, 0}; + float output_scales[2] = {1, output_scale}; + TfLiteAffineQuantization output_quant = { + tflite::testing::FloatArrayFromFloats(output_scales), + tflite::testing::IntArrayFromInts(output_zero_points)}; + output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + tflite::AsymmetricQuantize(tflite::testing::kGoldenData, golden_quantized, + output_dims_count, output_scale, 0); + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, tflite::testing::ValidateConvGoldens( + tensors, tensors_size, golden_quantized, output_data, + output_dims_count, &tflite::testing::common_conv_params)); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc b/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc new file mode 100644 index 00000000000..8b79885a8a8 --- /dev/null +++ b/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc @@ -0,0 +1,768 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_utils.h" + +namespace tflite { +namespace testing { +namespace { + +constexpr int kMaxFilterChannels = 64; +constexpr int kMaxBiasChannels = 64; + +// Index of the output tensor in context->tensors, specific to +// DepthwiseConv. +constexpr int kOutputTensorIndex = 3; + +// Creates a DepthwiseConv opeerator, calls it with the provided input tensors +// and some defaults parameters, and compares the output with +// expected_output_data. +// +// The tensors parameter contains both the input tensors as well as a +// preallocated output tensor into which the output is stored. +template +TfLiteStatus ValidateDepthwiseConvGoldens(const T* expected_output_data, + int output_length, + TfLiteFusedActivation activation, + float tolerance, int tensors_size, + TfLiteTensor* tensors) { + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + int input_depth = tensors[0].dims->data[3]; + int output_depth = tensors[1].dims->data[3]; + int depth_mul = output_depth / input_depth; + TfLiteDepthwiseConvParams builtin_data; + builtin_data.padding = kTfLitePaddingValid; + builtin_data.activation = activation; + builtin_data.stride_height = 1; + builtin_data.stride_width = 1; + builtin_data.dilation_height_factor = 1; + builtin_data.dilation_width_factor = 1; + builtin_data.depth_multiplier = depth_mul; + + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TfLiteStatus invoke_status = registration->invoke(&context, &node); + if (invoke_status != kTfLiteOk) { + return invoke_status; + } + + if (registration->free) { + registration->free(&context, user_data); + } + + const T* output_data = tflite::GetTensorData(&tensors[kOutputTensorIndex]); + for (int i = 0; i < output_length; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], + tolerance); + } + return kTfLiteOk; +} + +void TestDepthwiseConvFloat(const int* input_dims_data, const float* input_data, + const int* filter_dims_data, + const float* filter_data, const int* bias_dims_data, + const float* bias_data, + const float* expected_output_data, + const int* output_dims_data, + TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(filter_data, filter_dims, "filter_tensor"), + CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, + activation, 1e-5, tensors_size, tensors); +} + +void TestDepthwiseConvQuantizedPerLayer( + const int* input_dims_data, const float* input_data, + uint8_t* input_quantized, float input_scale, int input_zero_point, + const int* filter_dims_data, const float* filter_data, + uint8_t* filter_quantized, float filter_scale, int filter_zero_point, + const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, + const float* golden, uint8_t* golden_quantized, const int* output_dims_data, + uint8_t* output_data, float output_scale, int output_zero_point, + TfLiteFusedActivation activation) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + tflite::testing::CreateQuantizedTensor(input_data, input_quantized, + input_dims, input_scale, + input_zero_point, "input_tensor"), + tflite::testing::CreateQuantizedTensor( + filter_data, filter_quantized, filter_dims, filter_scale, + filter_zero_point, "filter_tensor"), + tflite::testing::CreateQuantizedBiasTensor(bias_data, bias_quantized, + bias_dims, input_scale, + filter_scale, "bias_tensor"), + tflite::testing::CreateQuantizedTensor(output_data, output_dims, + output_scale, output_zero_point, + "output_tensor"), + }; + + // TODO(njeff): Affine Quantization Params should be set on tensor creation. + float filter_scales[] = {1, filter_scale}; + int filter_zero_points[] = {1, 128}; + TfLiteAffineQuantization filter_quant = { + FloatArrayFromFloats(filter_scales), + IntArrayFromInts(filter_zero_points)}; + tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; + + float bias_scales[] = {1, filter_scale * input_scale}; + int bias_zero_points[] = {1, 128}; + TfLiteAffineQuantization bias_quant = {FloatArrayFromFloats(bias_scales), + IntArrayFromInts(bias_zero_points)}; + tensors[2].quantization = {kTfLiteAffineQuantization, &bias_quant}; + + AsymmetricQuantize(golden, golden_quantized, output_dims_count, output_scale, + output_zero_point); + ValidateDepthwiseConvGoldens(golden_quantized, output_dims_count, activation, + 1.0, tensors_size, tensors); +} + +void TestDepthwiseConvQuantizedPerChannel( + const int* input_dims_data, const float* input_data, + int8_t* input_quantized, float input_scale, int input_zero_point, + const int* filter_dims_data, const float* filter_data, + int8_t* filter_data_quantized, const int* bias_dims_data, + const float* bias_data, int32_t* bias_data_quantized, + const int* output_dims_data, const float* expected_output_data, + int8_t* expected_output_data_quantized, int8_t* output_data, + float output_scale, int output_zero_point, + TfLiteFusedActivation activation) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + int filter_zero_points[kMaxFilterChannels]; + float filter_scales[kMaxFilterChannels]; + int bias_zero_points[kMaxBiasChannels]; + float bias_scales[kMaxBiasChannels]; + TfLiteAffineQuantization filter_quant; + TfLiteAffineQuantization bias_quant; + TfLiteTensor input_tensor = + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point, "input_tensor"); + TfLiteTensor filter_tensor = CreateSymmetricPerChannelQuantizedTensor( + filter_data, filter_data_quantized, filter_dims, filter_scales, + filter_zero_points, &filter_quant, 3 /* quantized dimension */, + "filter_tensor"); + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( + bias_data, bias_data_quantized, bias_dims, input_scale, &filter_scales[1], + bias_scales, bias_zero_points, &bias_quant, 3 /* quantized dimension */, + "bias_tensor"); + TfLiteTensor output_tensor = + CreateQuantizedTensor(output_data, output_dims, output_scale, + input_zero_point, "output_tensor"); + + // TODO(njeff): Affine Quantization Params should be set on tensor creation. + float input_scales[] = {1, input_scale}; + int input_zero_points[] = {1, input_zero_point}; + TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), + IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + float output_scales[] = {1, output_scale}; + int output_zero_points[] = {1, output_zero_point}; + TfLiteAffineQuantization output_quant = { + FloatArrayFromFloats(output_scales), + IntArrayFromInts(output_zero_points)}; + output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + AsymmetricQuantize(expected_output_data, expected_output_data_quantized, + output_dims_count, output_scale, output_zero_point); + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, ValidateDepthwiseConvGoldens(expected_output_data_quantized, + output_dims_count, activation, + 1.0, tensors_size, tensors)); +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 71, -34, 99, -20, 91, -26, 127, -4, + }; + const int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + float output_data[output_dims_count]; + tflite::testing::TestDepthwiseConvFloat( + input_shape, input_values, filter_shape, filter_values, bias_shape, + bias_values, golden, output_shape, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantized) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 71, -34, 99, -20, 91, -26, 127, -4, + }; + const int output_shape[] = {4, 1, 2, 1, 4}; + + const float input_scale = 0.5f; + const int input_zero_point = 128; + const float filter_scale = 0.5f; + const int filter_zero_point = 128; + const float output_scale = 1.0f; + const int output_zero_point = 128; + + uint8_t input_quantized[input_elements]; + uint8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + uint8_t golden_quantized[output_elements]; + uint8_t output_data[output_elements]; + + tflite::testing::TestDepthwiseConvQuantizedPerLayer( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, filter_scale, + filter_zero_point, bias_shape, bias_values, bias_quantized, golden, + golden_quantized, output_shape, output_data, output_scale, + output_zero_point, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(SimpleTestRelu) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + const float golden_relu[] = {71, 0, 99, 0, 91, 0, 127, 0}; + float output_data[output_dims_count]; + + tflite::testing::TestDepthwiseConvFloat( + input_shape, input_values, filter_shape, filter_values, bias_shape, + bias_values, golden_relu, output_shape, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestReluQuantized) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + const float golden_relu[] = {71, 0, 99, 0, 91, 0, 127, 0}; + + const float input_scale = 0.5f; + const int input_zero_point = 128; + const float filter_scale = 0.5f; + const int filter_zero_point = 128; + const float output_scale = 1.0f; + const int output_zero_point = 128; + + uint8_t input_quantized[input_elements]; + uint8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + uint8_t golden_quantized[output_elements]; + uint8_t output_data[output_elements]; + + tflite::testing::TestDepthwiseConvQuantizedPerLayer( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, filter_scale, + filter_zero_point, bias_shape, bias_values, bias_quantized, golden_relu, + golden_quantized, output_shape, output_data, output_scale, + output_zero_point, kTfLiteActRelu); +} + +TF_LITE_MICRO_TEST(SimpleTestOptimizedFilterWidth) { + const int input_elements = 12; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const float bias_values[] = {1, 2, 3, 4}; + const int output_dims_count = 9; + const int input_shape[] = {4, 1, 1, 9, 1}; + const int filter_shape[] = {4, 2, 1, 8, 1}; + const int bias_shape[] = {1, 1}; + const float goldens[] = { + 92, 56, 12, 22, 33, 72, 44, 20, 5, + }; + const int output_shape[] = {4, 1, 1, 9, 1}; + + const float input_scale = 1.0f; + const int input_zero_point = 128; + const float filter_scale = 0.5f; + const int filter_zero_point = 128; + const float output_scale = 1.0f; + const int output_zero_point = 128; + + uint8_t input_quantized[input_elements]; + uint8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + uint8_t golden_quantized[output_dims_count]; + uint8_t output_data[output_dims_count]; + + tflite::testing::TestDepthwiseConvQuantizedPerLayer( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, filter_scale, + filter_zero_point, bias_shape, bias_values, bias_quantized, goldens, + golden_quantized, output_shape, output_data, output_scale, + output_zero_point, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 71, -34, 99, -20, 91, -26, 127, -4, + }; + const int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + int8_t output_data[output_dims_count]; + + const float input_scale = 0.5; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelDepthMultiplier1) { + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 8; + const int filter_shape[] = {4, 1, 2, 2, 2}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12}; + const int bias_elements = 2; + const int bias_shape[] = {4, 1, 1, 1, 2}; + const int output_elements = 4; + const float bias_values[] = {1, 2}; + const float golden[] = { + -103, + 127, + -128, + 127, + }; + const int output_shape[] = {4, 1, 2, 1, 2}; + const int output_dims_count = 4; + int8_t output_data[output_dims_count]; + + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(TestQuantizedPerChannelDepthMultiplier1Relu6) { + const int input_elements = 24; + const int input_shape[] = {4, 1, 3, 2, 4}; + const float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {0, 1, 8, -2, -1, 2, -10, 0, + -1, 3, -18, 0, 0, 4, 20, -3}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 0, 6, 3, 0, 0, 6, 3, 0, + }; + const int output_shape[] = {4, 1, 2, 1, 4}; + int8_t output_data[output_elements]; + float output_float[output_elements]; + + const float input_scale = 0.023529f; + const float output_scale = 0.023529f; + const int input_zero_point = -128; + const int output_zero_point = -128; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvFloat( + input_shape, input_values, filter_shape, filter_values, bias_shape, + bias_values, golden, output_shape, kTfLiteActRelu6, output_float); + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActRelu6); +} + +TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) { + const int input_dims[] = {4, 1, 2, 3, 2}; + const float input_data[] = {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4}; + const int filter_dims[] = {4, 1, 2, 2, 4}; + const float filter_data[] = {1, 2, 3, 4, 3, 4, 5, 6, 7, 8, 5, 6, 3, 4, 1, 2}; + const int bias_dims[] = {4, 1, 1, 1, 4}; + const float bias_data[] = {3, -2, 4, 6}; + const int output_dims[] = {4, 1, 1, 2, 4}; + const float golden[] = {43, 48, 18, 22, 3, -4, -28, -36}; + + const int input_size = 12; + const int filter_size = 16; + const int output_size = 8; + const int bias_size = 4; + int8_t input_quantized[input_size]; + int8_t filter_quantized[filter_size]; + int32_t bias_quantized[bias_size]; + int8_t golden_quantized[output_size]; + int zero_points[bias_size + 1]; + float scales[bias_size + 1]; + int8_t output_data[output_size]; + float output_float[output_size]; + + const float input_scale = 0.5; + const float output_scale = 1.0; + const int input_zero_point = 0; + const int output_zero_point = 0; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_dims, input_data, input_quantized, input_scale, input_zero_point, + filter_dims, filter_data, filter_quantized, bias_dims, bias_data, + bias_quantized, output_dims, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); + + tflite::testing::TestDepthwiseConvFloat( + input_dims, input_data, filter_dims, filter_data, bias_dims, bias_data, + golden, output_dims, kTfLiteActNone, output_float); +} + +TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { + const int input_shape[] = {4, 1, 2, 3, 2}; + const float input_data[] = {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4}; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_data[] = {1, 2, 3, 4, 3, 4, 5, 6, 7, 8, 5, 6, 3, 4, 1, 2}; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const float bias_data[] = {3, -2, 4, 6}; + const int output_shape[] = {4, 1, 1, 2, 4}; + const float golden[] = {43, 48, 18, 22, 3, -4, -28, -36}; + + const int input_size = 12; + const int filter_size = 16; + const int output_size = 8; + const int bias_size = 4; + int8_t input_quantized[input_size]; + int8_t filter_quantized[filter_size]; + int32_t bias_quantized[bias_size]; + int8_t golden_quantized[output_size]; + int zero_points[bias_size + 1]; + float scales[bias_size + 1]; + int8_t output_data[output_size]; + float output_float[output_size]; + + const float input_scale = 0.5; + const float output_scale = 1.0; + const int input_zero_point = 0; + const int output_zero_point = 0; + + TfLiteIntArray* input_dims = tflite::testing::IntArrayFromInts(input_shape); + TfLiteIntArray* filter_dims = tflite::testing::IntArrayFromInts(filter_shape); + TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); + TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); + + int filter_zero_points[5]; + float filter_scales[5]; + TfLiteAffineQuantization filter_quant; + TfLiteAffineQuantization bias_quant; + TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( + input_data, input_quantized, input_dims, input_scale, input_zero_point, + "input_tensor"); + TfLiteTensor filter_tensor = + tflite::testing::CreateSymmetricPerChannelQuantizedTensor( + filter_data, filter_quantized, filter_dims, filter_scales, + filter_zero_points, &filter_quant, 0 /* quantized dimension */, + "filter_tensor"); + TfLiteTensor bias_tensor = + tflite::testing::CreatePerChannelQuantizedBiasTensor( + bias_data, bias_quantized, bias_dims, input_scale, &filter_scales[1], + scales, zero_points, &bias_quant, 0, "bias_tensor"); + TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( + output_data, output_dims, output_scale, output_zero_point, + "output_tensor"); + + float input_scales[] = {1, input_scale}; + int input_zero_points[] = {1, input_zero_point}; + TfLiteAffineQuantization input_quant = { + tflite::testing::FloatArrayFromFloats(input_scales), + tflite::testing::IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + // Set filter quant to mismatched dimension. + TfLiteAffineQuantization* quant = reinterpret_cast( + filter_tensor.quantization.params); + quant->scale->size = 2; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, tflite::testing::ValidateDepthwiseConvGoldens( + golden_quantized, output_size, kTfLiteActNone, 1e-5, + tensors_size, tensors)); + + // Set scale back to correct dimension, and make zero point array too short. + quant->scale->size = filter_shape[0]; + quant->zero_point->size = 2; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, tflite::testing::ValidateDepthwiseConvGoldens( + golden_quantized, output_size, kTfLiteActNone, 1e-5, + tensors_size, tensors)); +} + +TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { + const float input_scale = 1.0f; + const float filter_scale = 1.0f; + const float output_scale = 1.0f; + + const int input_elements = 12; + const int input_shape[] = {4, 1, 3, 2, 2}; + const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; + const int filter_elements = 16; + const int filter_shape[] = {4, 1, 2, 2, 4}; + const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, + 5, 6, 7, 8, 13, -14, 15, -16}; + const int bias_elements = 4; + const int bias_shape[] = {4, 1, 1, 1, 4}; + const int output_elements = 8; + const float bias_values[] = {1, 2, 3, 4}; + const float golden[] = { + 71, -34, 99, -20, 91, -26, 127, -4, + }; + const int output_shape[] = {4, 1, 2, 1, 4}; + const int output_dims_count = 8; + int8_t output_data[output_dims_count]; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + + TfLiteIntArray* input_dims = tflite::testing::IntArrayFromInts(input_shape); + TfLiteIntArray* filter_dims = tflite::testing::IntArrayFromInts(filter_shape); + TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); + TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); + + // Create per-layer quantized int8 input tensor. + TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( + input_values, input_quantized, input_dims, input_scale, 0, + "input_tensor"); + int input_zero_points[2] = {1, 0}; + float input_scales[2] = {1, input_scale}; + TfLiteAffineQuantization input_quant = { + tflite::testing::FloatArrayFromFloats(input_scales), + tflite::testing::IntArrayFromInts(input_zero_points)}; + input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; + + // Create per-layer quantized int8 filter tensor. + TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( + filter_values, filter_quantized, filter_dims, filter_scale, 0, + "filter_tensor"); + int filter_zero_points[2] = {1, 0}; + float filter_scales[2] = {1, filter_scale}; + TfLiteAffineQuantization filter_quant = { + tflite::testing::FloatArrayFromFloats(filter_scales), + tflite::testing::IntArrayFromInts(filter_zero_points)}; + filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; + + // Create per-layer quantized int32 bias tensor. + tflite::SymmetricQuantize(bias_values, bias_quantized, bias_elements, + input_scale * output_scale); + TfLiteTensor bias_tensor = tflite::testing::CreateInt32Tensor( + bias_quantized, bias_dims, "bias_tensor"); + + int bias_zero_points[2] = {1, 0}; + float bias_scales[2] = {1, input_scale * filter_scale}; + TfLiteAffineQuantization bias_quant = { + tflite::testing::FloatArrayFromFloats(bias_scales), + tflite::testing::IntArrayFromInts(bias_zero_points)}; + bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; + + // Create per-layer quantized int8 output tensor. + TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( + output_data, output_dims, output_scale, 0, "output_tensor"); + int output_zero_points[2] = {1, 0}; + float output_scales[2] = {1, output_scale}; + TfLiteAffineQuantization output_quant = { + tflite::testing::FloatArrayFromFloats(output_scales), + tflite::testing::IntArrayFromInts(output_zero_points)}; + output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + input_tensor, + filter_tensor, + bias_tensor, + output_tensor, + }; + + tflite::AsymmetricQuantize(golden, golden_quantized, output_dims_count, + output_scale, 0); + + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, tflite::testing::ValidateDepthwiseConvGoldens( + golden_quantized, output_dims_count, kTfLiteActNone, 1e-5, + tensors_size, tensors)); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc b/tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc new file mode 100644 index 00000000000..539c7ecc3a4 --- /dev/null +++ b/tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc @@ -0,0 +1,938 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_utils.h" + +namespace tflite { +namespace testing { +namespace { + +void TestFullyConnectedFloat( + const int* input_dims_data, const float* input_data, + const int* weights_dims_data, const float* weights_data, + const int* bias_dims_data, const float* bias_data, + const float* expected_output_data, const int* output_dims_data, + TfLiteFusedActivation activation, float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* weights_dims = IntArrayFromInts(weights_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(weights_data, weights_dims, "weights_tensor"), + CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteFullyConnectedParams builtin_data = { + activation, + kTfLiteFullyConnectedWeightsFormatDefault, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); + } +} + +template +void TestFullyConnectedQuantized( + const int* input_dims_data, const T* input_data, const float input_min, + const float input_max, const int* weights_dims_data, const T* weights_data, + const float weights_min, const float weights_max, const int* bias_dims_data, + const int32_t* bias_data, const float bias_scale, + const T* expected_output_data, const int* output_dims_data, + const float output_min, const float output_max, + TfLiteFusedActivation activation, T* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* weights_dims = IntArrayFromInts(weights_dims_data); + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 3; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(weights_data, weights_dims, "weights_tensor", + weights_min, weights_max), + CreateQuantized32Tensor(bias_data, bias_dims, "bias_tensor", bias_scale), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 4); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteFullyConnectedParams builtin_data = { + activation, + kTfLiteFullyConnectedWeightsFormatDefault, + }; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); + } +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleTest) { + const int input_dims_data[] = {2, 2, 10}; + const float input_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }; + const int weights_dims_data[] = {2, 3, 10}; + const float weights_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }; + const int bias_dims_data[] = {1, 3}; + const float bias_data[] = {1, 2, 3}; + const float expected_output_data[] = { + 24, 25, 26, 58, 59, 60, + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( + input_dims_data, input_data, weights_dims_data, weights_data, + bias_dims_data, bias_data, expected_output_data, output_dims_data, + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest2) { + const int input_dims_data[] = {2, 2, 2}; + const float input_data[] = { + 1, 2, // b = 0 + 2, 1, // b = 1 + }; + const int weights_dims_data[] = {2, 1, 2}; + const float weights_data[] = { + 2, 4, // u = 0 + }; + const int bias_dims_data[] = {1, 1}; + const float bias_data[] = {1}; + const float expected_output_data[] = { + 11, + 9, + }; + const int output_dims_data[] = {2, 2, 1}; + + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( + input_dims_data, input_data, weights_dims_data, weights_data, + bias_dims_data, bias_data, expected_output_data, output_dims_data, + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestRelu) { + const int input_dims_data[] = {2, 2, 10}; + const float input_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }; + const int weights_dims_data[] = {2, 3, 10}; + const float weights_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }; + const int bias_dims_data[] = {1, 3}; + const float bias_data[] = {1, -2, 3}; + const float expected_output_data[] = { + 24, 0, 26, 58, 0, 60, + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( + input_dims_data, input_data, weights_dims_data, weights_data, + bias_dims_data, bias_data, expected_output_data, output_dims_data, + kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {2, 2, 10}; + const uint8_t input_data[] = { + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const uint8_t weights_data[] = { + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const uint8_t expected_output_data[] = { + F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +// TODO(b/138811455): Fix code duplication in micro tests +TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8) { + using tflite::testing::F2Q32; + using tflite::testing::F2QS; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -64.0f; + const float weights_max = 63.5f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {2, 2, 10}; + const int8_t input_data[] = { + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), + F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), + F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const int8_t weights_data[] = { + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const int8_t expected_output_data[] = { + F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), + F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), + F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + int8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8Relu) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {2, 2, 10}; + const uint8_t input_data[] = { + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const uint8_t weights_data[] = { + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(-1, weights_min, weights_max), F2Q(-2, weights_min, weights_max), + F2Q(-3, weights_min, weights_max), F2Q(-4, weights_min, weights_max), + F2Q(-5, weights_min, weights_max), F2Q(-6, weights_min, weights_max), + F2Q(-7, weights_min, weights_max), F2Q(-8, weights_min, weights_max), + F2Q(-9, weights_min, weights_max), F2Q(-10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(0, bias_scale), + F2Q32(3, bias_scale), + }; + const uint8_t expected_output_data[] = { + F2Q(24, output_min, output_max), F2Q(0, output_min, output_max), + F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), + F2Q(0, output_min, output_max), F2Q(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Relu) { + using tflite::testing::F2Q32; + using tflite::testing::F2QS; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -64.0f; + const float weights_max = 63.5f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {2, 2, 10}; + const int8_t input_data[] = { + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), + F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), + F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const int8_t weights_data[] = { + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(-1, weights_min, weights_max), F2QS(-2, weights_min, weights_max), + F2QS(-3, weights_min, weights_max), F2QS(-4, weights_min, weights_max), + F2QS(-5, weights_min, weights_max), F2QS(-6, weights_min, weights_max), + F2QS(-7, weights_min, weights_max), F2QS(-8, weights_min, weights_max), + F2QS(-9, weights_min, weights_max), F2QS(-10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(0, bias_scale), + F2Q32(3, bias_scale), + }; + const int8_t expected_output_data[] = { + F2QS(24, output_min, output_max), F2QS(0, output_min, output_max), + F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), + F2QS(0, output_min, output_max), F2QS(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + int8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8OutputMultiplierGreaterThan1) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -127.0f; + const float weights_max = 128.0f; + const float bias_scale = 1.0f; + const float output_min = -63.5f; + const float output_max = 64.0f; + + const int input_dims_data[] = {2, 2, 10}; + const uint8_t input_data[] = { + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const uint8_t weights_data[] = { + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const uint8_t expected_output_data[] = { + F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8OutputMultiplierGreaterThan1) { + using tflite::testing::F2Q32; + using tflite::testing::F2QS; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -63.5f; + const float output_max = 64.0f; + + const int input_dims_data[] = {2, 2, 10}; + const int8_t input_data[] = { + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), + F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), + F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const int8_t weights_data[] = { + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const int8_t expected_output_data[] = { + F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), + F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), + F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + int8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInput) { + const int input_dims_data[] = {4, 1, 1, 5, 1}; + const float input_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }; + const int weights_dims_data[] = {2, 3, 10}; + const float weights_data[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }; + const int bias_dims_data[] = {1, 3}; + const float bias_data[] = {1, 2, 3}; + const float expected_output_data[] = { + 24, 25, 26, 58, 59, 60, // Expected results. + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + float output_data[output_dims_count]; + tflite::testing::TestFullyConnectedFloat( + input_dims_data, input_data, weights_dims_data, weights_data, + bias_dims_data, bias_data, expected_output_data, output_dims_data, + kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedUInt8) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -63.5f; + const float weights_max = 64.0f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {4, 1, 1, 5, 1}; + const uint8_t input_data[] = { + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const uint8_t weights_data[] = { + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const uint8_t expected_output_data[] = { + F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) { + using tflite::testing::F2Q32; + using tflite::testing::F2QS; + + const float input_min = -63.5f; + const float input_max = 64.0f; + const float weights_min = -64.0f; + const float weights_max = 63.5f; + const float bias_scale = 0.25f; + const float output_min = -127.0f; + const float output_max = 128.0f; + + const int input_dims_data[] = {4, 1, 1, 5, 1}; + const int8_t input_data[] = { + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), + F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), + F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const int8_t weights_data[] = { + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const int8_t expected_output_data[] = { + F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), + F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), + F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + int8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST( + SimpleTest4DInputQuantizedUInt8OutputMultiplierGreaterThan1) { + using tflite::testing::F2Q; + using tflite::testing::F2Q32; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -127.0f; + const float weights_max = 128.0f; + const float bias_scale = 1.0f; + const float output_min = -63.5f; + const float output_max = 64.0f; + + const int input_dims_data[] = {4, 1, 1, 5, 1}; + const uint8_t input_data[] = { + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), + F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), + F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), + F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), + F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), + F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), + F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const uint8_t weights_data[] = { + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), + F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), + F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), + F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), + F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const uint8_t expected_output_data[] = { + F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), + F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), + F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + uint8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8OutputMultiplierGreaterThan1) { + using tflite::testing::F2Q32; + using tflite::testing::F2QS; + + const float input_min = -127.0f; + const float input_max = 128.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -63.5f; + const float output_max = 64.0f; + + const int input_dims_data[] = {4, 1, 1, 5, 1}; + const int8_t input_data[] = { + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), + F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), + F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), + F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), + F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), + F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), + F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), + }; + const int weights_dims_data[] = {2, 3, 10}; + const int8_t weights_data[] = { + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), + F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), + F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), + F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), + F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), + }; + const int bias_dims_data[] = {1, 3}; + const int32_t bias_data[] = { + F2Q32(1, bias_scale), + F2Q32(2, bias_scale), + F2Q32(3, bias_scale), + }; + const int8_t expected_output_data[] = { + F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), + F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), + F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), + }; + const int output_dims_data[] = {2, 2, 3}; + + const int output_dims_count = 6; + int8_t output_data[output_dims_count]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data, input_data, input_min, input_max, weights_dims_data, + weights_data, weights_min, weights_max, bias_dims_data, bias_data, + bias_scale, expected_output_data, output_dims_data, output_min, + output_max, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc b/tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc new file mode 100644 index 00000000000..8bfeb718a1b --- /dev/null +++ b/tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc @@ -0,0 +1,1116 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_utils.h" + +namespace tflite { +namespace testing { +namespace { + +void TestAveragePoolingFloat(std::initializer_list input_dims_data, + std::initializer_list input_data, + const int filter_height, const int filter_width, + const int stride_height, const int stride_width, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + TfLitePadding padding, + TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLitePoolParams builtin_data = {padding, stride_width, stride_height, + filter_width, filter_height, activation}; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +template +void TestAveragePoolingQuantized( + std::initializer_list input_dims_data, + std::initializer_list input_data, const float input_min, + const float input_max, const int filter_height, const int filter_width, + const int stride_height, const int stride_width, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, float output_min, + float output_max, TfLitePadding padding, TfLiteFusedActivation activation, + T* output_data) { + static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLitePoolParams builtin_data = {padding, stride_width, stride_height, + filter_width, filter_height, activation}; + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +void TestMaxPoolFloat(std::initializer_list input_dims_data, + std::initializer_list input_data, int filter_width, + int filter_height, int stride_width, int stride_height, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + TfLitePadding padding, TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims, "input_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLitePoolParams builtin_data = { + padding, stride_width, stride_height, + filter_width, filter_height, activation, + }; + + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5f); + } +} + +template +void TestMaxPoolQuantized(std::initializer_list input_dims_data, + std::initializer_list input_data, float input_min, + float input_max, int filter_width, int filter_height, + int stride_width, int stride_height, + std::initializer_list expected_output_data, + float output_min, float output_max, + std::initializer_list output_dims_data, + TfLitePadding padding, + TfLiteFusedActivation activation, T* output_data) { + static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + + TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, "input_tensor", input_min, + input_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = + resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLitePoolParams builtin_data = { + padding, stride_width, stride_height, + filter_width, filter_height, activation, + }; + + const char* init_data = reinterpret_cast(&builtin_data); + size_t init_data_size = 0; + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, init_data, init_data_size); + } + + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + int temporaries_array_data[] = {0}; + TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = user_data; + node.builtin_data = reinterpret_cast(&builtin_data); + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + if (registration->free) { + registration->free(&context, user_data); + } + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +} // namespace + +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestFloat) { + float output_data[2]; + tflite::testing::TestAveragePoolingFloat({4, 1, 2, 4, 1}, // Input shape + { // Input values + 0., 6., 2., 4., 3., 2., 10., 7.}, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + 2.75, + 5.75, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestUint8) { + using tflite::testing::F2Q; + + const float input_min = -15.9375; + const float input_max = 15.9375; + const float output_min = -15.9375; + const float output_max = 15.9375; + uint8_t output_data[2]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0., input_min, input_max), + F2Q(-6., input_min, input_max), + F2Q(2., input_min, input_max), + F2Q(4., input_min, input_max), + F2Q(3., input_min, input_max), + F2Q(2., input_min, input_max), + F2Q(-10., input_min, input_max), + F2Q(7., input_min, input_max), + }, + input_min, input_max, // input quantization range + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + F2Q(0., output_min, output_max), + F2Q(0.75, output_min, output_max), + }, + {4, 1, 1, 2, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2ActNone) { + using tflite::testing::F2QS; + + const float input_min = -15.9375; + const float input_max = 15.8130; + const float output_min = -15.9375; + const float output_max = 15.8130; + int8_t output_data[2]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { // Input values + F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), + F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), + F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), + F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 2, 2, // stride height, stride width + { // Output values + F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, + {4, 1, 1, 2, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride1Stride2Relu) { + using tflite::testing::F2QS; + + const float input_min = -15.9375; + const float input_max = 15.8130; + const float output_min = -15.9375; + const float output_max = 15.8130; + int8_t output_data[3]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { // Input values + F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), + F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), + F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), + F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 2, 1, // stride height, stride width + { // Output values + F2QS(0., output_min, output_max), F2QS(0., output_min, output_max), + F2QS(0.75, output_min, output_max)}, + {4, 1, 1, 3, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2Stride1Relu1) { + using tflite::testing::F2QS; + + const float input_min = -15.9375; + const float input_max = 15.8130; + const float output_min = -15.9375; + const float output_max = 15.8130; + int8_t output_data[2]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { // Input values + F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), + F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), + F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), + F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 1, 2, // stride height, stride width + { // Output values + F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, + {4, 1, 1, 2, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActRelu1, output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2Relu6) { + using tflite::testing::F2QS; + + const float input_min = -15.9375; + const float input_max = 15.8130; + const float output_min = -15.9375; + const float output_max = 15.8130; + int8_t output_data[2]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { // Input values + F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), + F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), + F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), + F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 2, 2, // stride height, stride width + { // Output values + F2QS(0.5, output_min, output_max), F2QS(6., output_min, output_max)}, + {4, 1, 1, 2, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActRelu6, output_data); +} + +TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingSameStride1ActNone) { + using tflite::testing::F2QS; + + const float input_min = -15.9375; + const float input_max = 15.8130; + const float output_min = -15.9375; + const float output_max = 15.8130; + int8_t output_data[8]; + tflite::testing::TestAveragePoolingQuantized( + {4, 1, 2, 4, 1}, // Input shape + { // Input values + F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), + F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), + F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), + F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 1, 1, // stride height, stride width + { // Output values + F2QS(0.5, output_min, output_max), F2QS(3.5, output_min, output_max), + F2QS(7.25, output_min, output_max), F2QS(5.5, output_min, output_max), + F2QS(2.5, output_min, output_max), F2QS(6., output_min, output_max), + F2QS(8.5, output_min, output_max), F2QS(7., output_min, output_max)}, + {4, 1, 2, 4, 1}, // Output shape + output_min, output_max, // output quantization range + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloat) { + float output_data[2]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { // Input values + 0, 6, 2, 4, 3, 2, 10, 7}, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + 6, + 10, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu) { + float output_data[2]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + -1, -6, 2, 4, // + -3, -2, 10.5, 7, // + }, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + 0.0, + 10.5, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu1) { + float output_data[2]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + -2.75, -6, 0.2, 0.4, // + -3, -2, -0.3, 0.7, // + }, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + -1.0, + 0.7, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu1, + output_data); + + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + -2.75, -6, -2, -4, // + -3, -2, 10, -7, // + }, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + -1.0, + 1.0, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu1, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu6) { + float output_data[2]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + -1.5, -6, 12, 4, // + -3, -2, 10, 7, // + }, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + 0.0, + 6.0, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, + output_data); + + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + 0, 4.5, 12, 4, // + 3, 2, 10, 7, // + }, + 2, 2, // filter width, filter height + 2, 2, // stride width, stride height + { + // Output values + 4.5, + 6.0, + }, + {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingSameStride1) { + float output_data[8]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }, + 2, 2, // filter width, filter height + 1, 1, // stride width, stride height + { + // Output values + 6, 10, 10, 7, // + 3, 10, 10, 7, // + }, + {4, 1, 2, 4, 1}, // Output shape + kTfLitePaddingSame, kTfLiteActNone, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingValidStride1) { + float output_data[3]; + tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape + { + // Input values + 0, 6, 2, 4, // + 3, 2, 10, 7, // + }, + 2, 2, // filter width, filter height + 1, 1, // stride width, stride height + { + // Output values + 6, + 10, + 10, + }, + {4, 1, 1, 3, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, + output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestUInt8ActNone) { + using tflite::testing::F2Q; + + uint8_t output_data[2]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0, input_min, input_max), + F2Q(6, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2Q(6, output_min, output_max), F2Q(10, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { + using tflite::testing::F2Q; + + uint8_t output_data[2]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(-1.5, input_min, input_max), + F2Q(-6, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(-3, input_min, input_max), + F2Q(-2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2Q(0, output_min, output_max), F2Q(10, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu1) { + using tflite::testing::F2Q; + + uint8_t output_data[2]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(-1.7, input_min, input_max), + F2Q(-6, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(-3, input_min, input_max), + F2Q(-2, input_min, input_max), + F2Q(-10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2Q(-1.0, output_min, output_max), F2Q(1.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu1, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { + using tflite::testing::F2Q; + + uint8_t output_data[8]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0, input_min, input_max), + F2Q(-6, input_min, input_max), + F2Q(12, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(-3, input_min, input_max), + F2Q(-2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2Q(0.0, output_min, output_max), F2Q(6.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, output_data); + + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0, input_min, input_max), + F2Q(4.5, input_min, input_max), + F2Q(12, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2Q(4.5, output_min, output_max), F2Q(6.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { + using tflite::testing::F2Q; + + uint8_t output_data[8]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0, input_min, input_max), + F2Q(6, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + { + // Output values + F2Q(6, output_min, output_max), + F2Q(10, output_min, output_max), + F2Q(10, output_min, output_max), + F2Q(7, output_min, output_max), + F2Q(3, output_min, output_max), + F2Q(10, output_min, output_max), + F2Q(10, output_min, output_max), + F2Q(7, output_min, output_max), + }, + output_min, output_max, {4, 1, 2, 4, 1}, // Output shape + kTfLitePaddingSame, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { + using tflite::testing::F2Q; + + uint8_t output_data[3]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2Q(0, input_min, input_max), + F2Q(6, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(4, input_min, input_max), + F2Q(3, input_min, input_max), + F2Q(2, input_min, input_max), + F2Q(10, input_min, input_max), + F2Q(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + { + // Output values + F2Q(6, output_min, output_max), + F2Q(10, output_min, output_max), + F2Q(10, output_min, output_max), + }, + output_min, output_max, {4, 1, 1, 3, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(SimpleMaxPoolTestInt8ActNone) { + using tflite::testing::F2QS; + + int8_t output_data[2]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(0, input_min, input_max), + F2QS(6, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(3, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2QS(6, output_min, output_max), F2QS(10, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { + using tflite::testing::F2QS; + + int8_t output_data[2]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(-1.5, input_min, input_max), + F2QS(-6, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(-3, input_min, input_max), + F2QS(-2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2QS(0, output_min, output_max), F2QS(10, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu1) { + using tflite::testing::F2QS; + + int8_t output_data[2]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(-1.7, input_min, input_max), + F2QS(-6, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(-3, input_min, input_max), + F2QS(-2, input_min, input_max), + F2QS(-10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2QS(-1.0, output_min, output_max), F2QS(1.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu1, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { + using tflite::testing::F2QS; + + int8_t output_data[8]; + float input_min = -15.9375; + float input_max = 15.9375; + float output_min = -15.9375; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 2; + int stride_height = 2; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(0, input_min, input_max), + F2QS(-6, input_min, input_max), + F2QS(12, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(-3, input_min, input_max), + F2QS(-2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2QS(0.0, output_min, output_max), F2QS(6.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, output_data); + + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(0, input_min, input_max), + F2QS(4.5, input_min, input_max), + F2QS(12, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(3, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + {// Output values + F2QS(4.5, output_min, output_max), F2QS(6.0, output_min, output_max)}, + output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActRelu6, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { + using tflite::testing::F2QS; + + int8_t output_data[8]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(0, input_min, input_max), + F2QS(6, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(3, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + { + // Output values + F2QS(6, output_min, output_max), + F2QS(10, output_min, output_max), + F2QS(10, output_min, output_max), + F2QS(7, output_min, output_max), + F2QS(3, output_min, output_max), + F2QS(10, output_min, output_max), + F2QS(10, output_min, output_max), + F2QS(7, output_min, output_max), + }, + output_min, output_max, {4, 1, 2, 4, 1}, // Output shape + kTfLitePaddingSame, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { + using tflite::testing::F2QS; + + int8_t output_data[3]; + float input_min = 0; + float input_max = 15.9375; + float output_min = 0; + float output_max = 15.9375; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + tflite::testing::TestMaxPoolQuantized( + {4, 1, 2, 4, 1}, // Input shape + { + // Input values + F2QS(0, input_min, input_max), + F2QS(6, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(4, input_min, input_max), + F2QS(3, input_min, input_max), + F2QS(2, input_min, input_max), + F2QS(10, input_min, input_max), + F2QS(7, input_min, input_max), + }, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + { + // Output values + F2QS(6, output_min, output_max), + F2QS(10, output_min, output_max), + F2QS(10, output_min, output_max), + }, + output_min, output_max, {4, 1, 1, 3, 1}, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc index 851a5d43378..0cba07d9d27 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc @@ -1,6 +1,6 @@ ifeq ($(TARGET_ARCH), arc) -# embarc_mli Library is used by default for ARC platform whenever it's possible. +# embarc_mli Library is used by default for ARC platform whenever it is possible. # To use TFLM reference implementation it should be intentionally turned off # by passing 'no_embarc_mli' tag (make -f TAGS=no_embarc_mli ...) ifeq ($(filter no_embarc_mli,$(ALL_TAGS)),) @@ -63,5 +63,14 @@ endif MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h + + MICROLITE_TEST_SRCS += $(wildcard tensorflow/lite/micro/kernels/embarc_mli/*test.cc) + + EMBARC_MLI_TESTS := conv depthwise_conv pooling fully_connected + EMBARC_MLI_TESTS += $(foreach TEST,$(EMBARC_MLI_TESTS), $(TEST)_slicing) + +generate_embarc_mli_test_projects: $(foreach TEST,$(EMBARC_MLI_TESTS), generate_kernel_$(TEST)_test_make_project) + + endif # no_embarc_mli endif # TARGET_ARCH From fc83b7fedb4f8727ac63c9e8b4c3bc7e8e75643c Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 15 Apr 2020 13:26:08 +0300 Subject: [PATCH 27/45] embARC MLI related code is present in arc_mli --- .../kernels/{embarc_mli => arc_mli}/conv.cc | 8 +- .../conv_slicing_test.cc | 0 .../{embarc_mli => arc_mli}/depthwise_conv.cc | 8 +- .../depthwise_conv_slicing_test.cc | 0 .../fully_connected.cc | 8 +- .../fully_connected_slicing_test.cc | 0 .../{embarc_mli => arc_mli}/mli_slicers.cc | 0 .../{embarc_mli => arc_mli}/mli_slicers.h | 0 .../{embarc_mli => arc_mli}/mli_tf_utils.h | 0 .../{embarc_mli => arc_mli}/pooling.cc | 8 +- .../pooling_slicing_test.cc | 0 .../scratch_buf_mgr.cc | 4 +- .../{embarc_mli => arc_mli}/scratch_buf_mgr.h | 0 .../scratch_buffers.cc | 2 +- .../{embarc_mli => arc_mli}/scratch_buffers.h | 0 .../micro/tools/make/ext_libs/arc_mli.inc | 92 +++++++++++++++++++ .../micro/tools/make/ext_libs/embarc_mli.inc | 76 --------------- 17 files changed, 111 insertions(+), 95 deletions(-) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/conv.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/conv_slicing_test.cc (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/depthwise_conv.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/depthwise_conv_slicing_test.cc (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/fully_connected.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/fully_connected_slicing_test.cc (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/mli_slicers.cc (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/mli_slicers.h (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/mli_tf_utils.h (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/pooling.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/pooling_slicing_test.cc (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/scratch_buf_mgr.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/scratch_buf_mgr.h (100%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/scratch_buffers.cc (98%) rename tensorflow/lite/micro/kernels/{embarc_mli => arc_mli}/scratch_buffers.h (100%) create mode 100644 tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc delete mode 100644 tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/conv.cc rename to tensorflow/lite/micro/kernels/arc_mli/conv.cc index b124b17f66d..d02f081434f 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -24,10 +24,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/conv_slicing_test.cc rename to tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc rename to tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 0ad2a9fe6c6..049347cc7a1 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -25,10 +25,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/depthwise_conv_slicing_test.cc rename to tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc rename to tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 8088634f8de..61fa0ff397f 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -23,10 +23,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/fully_connected_slicing_test.cc rename to tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc b/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc rename to tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h b/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h rename to tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h diff --git a/tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h b/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h rename to tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h diff --git a/tensorflow/lite/micro/kernels/embarc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/pooling.cc rename to tensorflow/lite/micro/kernels/arc_mli/pooling.cc index a147171a859..ced5c4a21b8 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -20,10 +20,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" #include "mli_api.h" diff --git a/tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/pooling_slicing_test.cc rename to tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc diff --git a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc rename to tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc index 8d00e28714c..d030d04170c 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" #include #define MAX(A,B) (((A) > (B))? (A): (B)) #define MIN(A,B) (((A) > (B))? (B): (A)) diff --git a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h rename to tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h diff --git a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc similarity index 98% rename from tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc rename to tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc index 689c490569e..a770e4ccd66 100644 --- a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" #include #define MAX(A,B) (((A) > (B))? (A): (B)) #define MIN(A,B) (((A) > (B))? (B): (A)) diff --git a/tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h b/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h similarity index 100% rename from tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h rename to tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h diff --git a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc new file mode 100644 index 00000000000..3b8fa04d536 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc @@ -0,0 +1,92 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Settings for embARC MLI library for ARC platform. + +ifeq ($(TARGET_ARCH), arc) + +# MLI Library is used by default for ARC platform whenever it is possible. +# To use TFLM reference implementation MLI should be intentionally turned off +# by passing 'no_arc_mli' tag (make -f TAGS=no_arc_mli ...) +ifeq ($(filter no_arc_mli,$(ALL_TAGS)),) + + +ALL_TAGS += arc_mli + +ifeq ($(PRE_COMPILED_MLI),true) + # TODO: Replace with proper arc_mli pre-builts. + $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) + + MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include + MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a + + THIRD_PARTY_CC_HDRS += \ + third_party/embarc_osp/LICENSE +else + MLI_LIB_DIR = arc_mli_$(basename $(TCF_FILE_NAME)) + + $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) + + MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include + MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a + MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_LIB_DIR)/LICENSE +endif + + THIRD_PARTY_CC_HDRS += $(MLI_LIB) + GENERATED_PROJECT_LIBS += $(MLI_LIB) + + INCLUDES += \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ + -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api + + GENERATED_PROJECT_INCLUDES += \ + -I. \ + -I./third_party/$(MLI_INCLUDE_FOLDER) \ + -I./third_party/$(MLI_INCLUDE_FOLDER)/api + + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ + third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ + third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h + + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h + MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc + MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h + + + MICROLITE_TEST_SRCS += $(wildcard tensorflow/lite/micro/kernels/arc_mli/*test.cc) + + ARC_MLI_TESTS := conv depthwise_conv pooling fully_connected + ARC_MLI_TESTS += $(foreach TEST,$(ARC_MLI_TESTS), $(TEST)_slicing) + +generate_arc_mli_test_projects: $(foreach TEST,$(ARC_MLI_TESTS), generate_kernel_$(TEST)_test_make_project) + + +endif # no_embarc_mli +endif # TARGET_ARCH diff --git a/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc deleted file mode 100644 index 0cba07d9d27..00000000000 --- a/tensorflow/lite/micro/tools/make/ext_libs/embarc_mli.inc +++ /dev/null @@ -1,76 +0,0 @@ -ifeq ($(TARGET_ARCH), arc) - -# embarc_mli Library is used by default for ARC platform whenever it is possible. -# To use TFLM reference implementation it should be intentionally turned off -# by passing 'no_embarc_mli' tag (make -f TAGS=no_embarc_mli ...) -ifeq ($(filter no_embarc_mli,$(ALL_TAGS)),) - - -ALL_TAGS += embarc_mli - -ifeq ($(PRE_COMPILED_MLI),true) - # TODO: Replace with proper embarc_mli pre-builts. - $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) - - MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include - MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a - - THIRD_PARTY_CC_HDRS += \ - third_party/embarc_osp/LICENSE -else - MLI_LIB_DIR = embarc_mli_$(basename $(TCF_FILE_NAME)) - - $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) - - MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include - MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/libmli.a - MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/libmli.a - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_LIB_DIR)/LICENSE -endif - - THIRD_PARTY_CC_HDRS += $(MLI_LIB) - GENERATED_PROJECT_LIBS += $(MLI_LIB) - - INCLUDES += \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER) \ - -I$(MAKEFILE_DIR)/downloads/$(MLI_INCLUDE_FOLDER)/api - - GENERATED_PROJECT_INCLUDES += \ - -I. \ - -I./third_party/$(MLI_INCLUDE_FOLDER) \ - -I./third_party/$(MLI_INCLUDE_FOLDER)/api - - - THIRD_PARTY_CC_HDRS += \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_config.h \ - third_party/$(MLI_INCLUDE_FOLDER)/mli_types.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_helpers_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_kernels_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_avepool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_depthwise_conv2d_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_krn_maxpool_spec_api.h \ - third_party/$(MLI_INCLUDE_FOLDER)/api/mli_mov_api.h - - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buffers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/scratch_buf_mgr.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.h - MICROLITE_CC_SRCS += tensorflow/lite/micro/kernels/embarc_mli/mli_slicers.cc - MICROLITE_CC_HDRS += tensorflow/lite/micro/kernels/embarc_mli/mli_tf_utils.h - - - MICROLITE_TEST_SRCS += $(wildcard tensorflow/lite/micro/kernels/embarc_mli/*test.cc) - - EMBARC_MLI_TESTS := conv depthwise_conv pooling fully_connected - EMBARC_MLI_TESTS += $(foreach TEST,$(EMBARC_MLI_TESTS), $(TEST)_slicing) - -generate_embarc_mli_test_projects: $(foreach TEST,$(EMBARC_MLI_TESTS), generate_kernel_$(TEST)_test_make_project) - - -endif # no_embarc_mli -endif # TARGET_ARCH From 1196bed72bcedb8abc72a3da70c7ba58af03395f Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Thu, 16 Apr 2020 12:15:40 +0300 Subject: [PATCH 28/45] Merge latest updates from reference kernelse inside wrappers of arc_mli + fix minor bugs in kernel tests --- tensorflow/lite/micro/kernels/arc_mli/conv.cc | 180 +++++--- .../micro/kernels/arc_mli/depthwise_conv.cc | 389 ++++++++++-------- .../micro/kernels/arc_mli/fully_connected.cc | 49 ++- tensorflow/lite/micro/kernels/conv_test.cc | 4 +- tensorflow/lite/micro/kernels/pooling_test.cc | 2 +- 5 files changed, 361 insertions(+), 263 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc index d02f081434f..b9be93ceb11 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/conv.h" -#include "mli_api.h" // NOLINT +#include "mli_api.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" @@ -24,12 +24,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" - -#include "mli_api.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" namespace tflite { namespace ops { @@ -42,9 +40,11 @@ constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; constexpr int kMaxChannels = 256; -// This file has 2 implementation of Conv. +// Conv is quantized along dimension 0: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kConvQuantizedDimension = 0; -const int kTensorNotAllocated = -1; +// This file has 2 implementation of Conv. struct OpData { TfLitePaddingValues padding; @@ -101,13 +101,15 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + int output_channels = filter->dims->data[kConvQuantizedDimension]; TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( context, input, filter, bias, output, params->activation, &data->output_multiplier, &data->output_shift, &data->output_activation_min, &data->output_activation_max, data->per_channel_output_multiplier, - reinterpret_cast(data->per_channel_output_shift))); + reinterpret_cast(data->per_channel_output_shift), + output_channels)); } return kTfLiteOk; } @@ -144,12 +146,10 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, GetTensorData(im2col), nullptr); } -TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, - TfLiteTensor* im2col) { +TfLiteStatus EvalMliQuantizedPerChannel( + TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, + OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { // Run Conv MLI kernel // MLI optimized version only supports int8 dataype and dilation factor of 1 if ((input->type == kTfLiteInt8) && (params->dilation_width_factor == 1) && @@ -204,24 +204,36 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const int height_dimension = 1; int in_slice_height = 0; int out_slice_height = 0; - const int kernel_height = static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); + const int kernel_height = + static_cast(mli_weights.shape[KRNL_H_DIM_HWC]); const int overlap = kernel_height - cfg.stride_height; // for weight slicing (on output channels) - const int weight_out_ch_dimension = 0; // NHWC layout for weigths, output channel dimension is the first dimension. - int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); - const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + const int weight_out_ch_dimension = + 0; // NHWC layout for weigths, output channel dimension is the first + // dimension. + int slice_channels = + static_cast(mli_weights.shape[weight_out_ch_dimension]); + const int out_tensor_ch_dimension = + 3; // Batch-Height-Width-Channel layout means last dimension is output + // channels. - // Tensors for data in fast (local) memory and config to copy data from external to local memory + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; mli_tensor in_local = mli_in; mli_tensor out_local = mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &in_slice_height, &out_slice_height)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors( + context, &in_local, &weights_local, &bias_local, &out_local)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io( + &in_local, &out_local, kernel_height, cfg.stride_height, + cfg.padding_top, cfg.padding_bottom, &in_slice_height, + &out_slice_height)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights( + &weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); /* is_local indicates that the tensor is already in local memory, so in that case the original tensor can be used, @@ -233,33 +245,40 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels); TensorSlicer b_slice(&mli_bias, weight_out_ch_dimension, slice_channels); - TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, + 0, 0, 0, true); - mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; - mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local; - void *input_buffer_ptr = NULL; + void* input_buffer_ptr = NULL; int input_buffer_size = 0; - while (!w_slice.Done()){ + while (!w_slice.Done()) { mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. - because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. - on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. - The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) - in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, cfg.padding_top, cfg.padding_bottom, overlap); + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional + tensor. because the mli kernel will process one HWC tensor at a time, the + 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. + on top of that there could be a need to also slice in the Height + dimension. for that the sliceHeight has been calculated. The tensor slicer + is configured that it will completely slice the nBatch dimension (0) and + slice the height dimension (1) in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, + cfg.padding_top, cfg.padding_bottom, overlap); - /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of - output channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and - height dimension. */ - TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, out_slice_height); + /* output tensor is alreade sliced in the output channel dimension. + out_ch_slice.Sub() is the tensor for the amount of output channels of this + itteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. */ + TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension, + out_slice_height); - /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; while (!out_slice.Done()) { TF_LITE_ENSURE(context, !in_slice.Done()); @@ -267,7 +286,8 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, cfg.padding_bottom = in_slice.GetPaddingPost(); // if same input copy as previous iteration, skip the copy of input - if ((in_slice.Sub()->data != input_buffer_ptr) || (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + if ((in_slice.Sub()->data != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); input_buffer_ptr = in_slice.Sub()->data; input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); @@ -283,26 +303,37 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, out_ch_slice.Next(); TF_LITE_ENSURE(context, in_slice.Done()); } - - } else { - ConvParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - - reference_integer_ops::ConvPerChannel( - op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); } + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, OpData* data, + const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, + TfLiteTensor* output) { + ConvParams op_params; + op_params.input_offset = -input->params.zero_point; + op_params.output_offset = output->params.zero_point; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + + reference_integer_ops::ConvPerChannel( + op_params, data->per_channel_output_multiplier, + data->per_channel_output_shift, GetTensorShape(input), + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); + return kTfLiteOk; } @@ -352,6 +383,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpData data; // All per-channel quantized tensors need valid zero point and scale arrays. + bool mli_is_applicable = false; if (input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); @@ -362,26 +394,38 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, affine_quantization); TF_LITE_ENSURE(context, affine_quantization->scale); TF_LITE_ENSURE(context, affine_quantization->zero_point); - // Conv is quantized along dimension 0: - // https://www.tensorflow.org/lite/performance/quantization_spec - TF_LITE_ENSURE_EQ(context, filter->dims->data[0], - affine_quantization->scale->size); - TF_LITE_ENSURE_EQ(context, filter->dims->data[0], + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, affine_quantization->zero_point->size); + mli_is_applicable = + ((filter->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension])); } TF_LITE_ENSURE_STATUS(CalculateOpData( context, node, params, input_width, input_height, filter_width, filter_height, output_width, output_height, input->type, &data)); - switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: EvalFloat(context, node, params, &data, input, filter, bias, nullptr, nullptr, output); break; case kTfLiteInt8: - return EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, - output, nullptr); + if (mli_is_applicable) { + return EvalMliQuantizedPerChannel(context, node, params, &data, input, + filter, bias, output); + + } else { + return EvalQuantizedPerChannel(context, node, params, &data, input, + filter, bias, output); + } break; case kTfLiteUInt8: EvalQuantized(context, node, params, &data, input, filter, bias, nullptr, diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 049347cc7a1..9860235b2fb 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" -#include "mli_api.h" // NOLINT +#include "mli_api.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" @@ -30,8 +30,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" -#include "mli_api.h" - namespace tflite { namespace ops { namespace micro { @@ -44,6 +42,10 @@ constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; constexpr int kMaxChannels = 256; +// Depthwise conv is quantized along dimension 3: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kDepthwiseConvQuantizedDimension = 3; + struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can @@ -85,6 +87,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Ensure filter and bias channel count does not exceed space reserved for // quantization metadata. @@ -101,7 +104,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, &data->output_multiplier, &data->output_shift, &data->output_activation_min, &data->output_activation_max, data->per_channel_output_multiplier, - reinterpret_cast(data->per_channel_output_shift))); + reinterpret_cast(data->per_channel_output_shift), num_channels)); } return kTfLiteOk; } @@ -136,187 +139,201 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorData(output)); } -TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, +TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { // Run Depthwise Conv MLI kernel // MLI optimized version only supports int8 dataype and dilation factor of 1 - if ((input->type == kTfLiteInt8) && (params->dilation_width_factor == 1) && - (params->dilation_height_factor == 1)) { - mli_tensor mli_in = {0}; - mli_tensor mli_weights = {0}; - mli_tensor mli_bias = {0}; - mli_tensor mli_out = {0}; - mli_conv2d_cfg cfg = {}; + mli_tensor mli_in = {0}; + mli_tensor mli_weights = {0}; + mli_tensor mli_bias = {0}; + mli_tensor mli_out = {0}; + mli_conv2d_cfg cfg = {}; - // reuse space allocated for OpData parameters - mli_weights.el_params.asym.scale.pi16 = - (int16_t*)data->per_channel_output_multiplier; - mli_bias.el_params.asym.scale.pi16 = - (int16_t*)data->per_channel_output_shift; + // reuse space allocated for OpData parameters + mli_weights.el_params.asym.scale.pi16 = + (int16_t*)data->per_channel_output_multiplier; + mli_bias.el_params.asym.scale.pi16 = + (int16_t*)data->per_channel_output_shift; - int16_t filter_zero_point = 0; - int16_t bias_zero_point = 0; - mli_weights.el_params.asym.zero_point.pi16 = &filter_zero_point; - mli_bias.el_params.asym.zero_point.pi16 = &bias_zero_point; + int16_t filter_zero_point = 0; + int16_t bias_zero_point = 0; + mli_weights.el_params.asym.zero_point.pi16 = &filter_zero_point; + mli_bias.el_params.asym.zero_point.pi16 = &bias_zero_point; - ConvertToMliTensor(input, &mli_in); - ConvertToMliTensorPerChannel(filter, &mli_weights); - ConvertToMliTensorPerChannel(bias, &mli_bias); - ConvertToMliTensor(output, &mli_out); - - if (params->activation == kTfLiteActRelu) { - cfg.relu.type = MLI_RELU_GEN; - } else if (params->activation == kTfLiteActRelu6) { - cfg.relu.type = MLI_RELU_6; - } else if (params->activation == kTfLiteActRelu1) { - cfg.relu.type = MLI_RELU_1; - } else { - cfg.relu.type = MLI_RELU_NONE; - } - - cfg.stride_width = params->stride_width; - cfg.stride_height = params->stride_height; - if (params->padding == kTfLitePaddingValid) { - cfg.padding_left = 0; - cfg.padding_right = 0; - cfg.padding_top = 0; - cfg.padding_bottom = 0; - } else { - cfg.padding_left = data->padding.width; - cfg.padding_right = data->padding.width + data->padding.width_offset; - cfg.padding_top = data->padding.height; - cfg.padding_bottom = data->padding.height + data->padding.height_offset; - } - - // for height slicing - const int heightDimension = 1; - int inSliceHeight = 0; - int outSliceHeight = 0; - const int kernelHeight = static_cast(mli_weights.shape[KRNL_DW_H_DIM_HWC]); - const int overlap = kernelHeight - cfg.stride_height; - - // for weight slicing (on output channels) - const int weight_out_ch_dimension = 3; // HWCN layout for weigths, output channel dimension is the first dimension. - const int bias_out_ch_dimension = 0; // bias has only 1 dimension - const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. - const int32_t in_channels = mli_in.shape[out_tensor_ch_dimension]; - const int32_t out_channels = mli_out.shape[out_tensor_ch_dimension]; - int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); - - // Tensors for data in fast (local) memory and config to copy data from external to local memory - mli_tensor weights_local = mli_weights; - mli_tensor bias_local = mli_bias; - mli_tensor in_local = mli_in; - mli_tensor out_local = mli_out; // this assumes that output shape is already filled in the tensor struct. - mli_mov_cfg_t copy_config; - mli_mov_cfg_for_copy(©_config); - - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - /* is_local indicates that the tensor is already in local memory, - so in that case the original tensor can be used, - and there is no need to copy it to the local tensor*/ - const bool in_is_local = in_local.data == mli_in.data; - const bool out_is_local = out_local.data == mli_out.data; - const bool w_is_local = weights_local.data == mli_weights.data; - const bool b_is_local = bias_local.data == mli_bias.data; - - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); - - /* if input channels is not equal to output channels, a channel multiplier is used. - in this case the slice channels needs to be rounded down to a multiple of the input channels */ - if (in_channels != out_channels) { - slice_channels = (slice_channels / in_channels) * in_channels; - } - - TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, 0, 0, 0, true); - TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension, slice_channels); - TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); - TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); - - mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; - mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; - - void *input_buffer_ptr = NULL; - int input_buffer_size = 0; - int padding_top = cfg.padding_top; - int padding_bottom = cfg.padding_bottom; - - while (!w_slice.Done()){ - mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); - mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - - /* input tensor is alreade sliced in the channel dimension. out_ch_slice.Sub() is the tensor for the amount of - channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and - height dimension. - in_ch_slice.Sub() tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. - because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. - on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. - The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) - in chunks of 'sliceHeight' */ - TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight, padding_top, padding_bottom, overlap); - - /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of - output channels of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch and - height dimension. */ - TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); - - /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; - - while (!out_slice.Done()) { - TF_LITE_ENSURE(context, !in_slice.Done()); - cfg.padding_top = in_slice.GetPaddingPre(); - cfg.padding_bottom = in_slice.GetPaddingPost(); - - // if same input copy as previous iteration, skip the copy of input - if ((in_slice.Sub()->data != input_buffer_ptr) || (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { - mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - input_buffer_ptr = in_slice.Sub()->data; - input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); - } - mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); - mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); - - in_slice.Next(); - out_slice.Next(); - } - w_slice.Next(); - b_slice.Next(); - out_ch_slice.Next(); - in_ch_slice.Next(); - TF_LITE_ENSURE(context, in_slice.Done()); - } + ConvertToMliTensor(input, &mli_in); + ConvertToMliTensorPerChannel(filter, &mli_weights); + ConvertToMliTensorPerChannel(bias, &mli_bias); + ConvertToMliTensor(output, &mli_out); + if (params->activation == kTfLiteActRelu) { + cfg.relu.type = MLI_RELU_GEN; + } else if (params->activation == kTfLiteActRelu6) { + cfg.relu.type = MLI_RELU_6; + } else if (params->activation == kTfLiteActRelu1) { + cfg.relu.type = MLI_RELU_1; } else { - DepthwiseParams op_params; - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; - op_params.stride_width = params->stride_width; - op_params.stride_height = params->stride_height; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.depth_multiplier = params->depth_multiplier; - op_params.input_offset = -input->params.zero_point; - op_params.weights_offset = 0; - op_params.output_offset = output->params.zero_point; - // TODO(b/130439627): Use calculated value for clamping. - op_params.quantized_activation_min = std::numeric_limits::min(); - op_params.quantized_activation_max = std::numeric_limits::max(); - - reference_integer_ops::DepthwiseConvPerChannel( - op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + cfg.relu.type = MLI_RELU_NONE; } + + cfg.stride_width = params->stride_width; + cfg.stride_height = params->stride_height; + if (params->padding == kTfLitePaddingValid) { + cfg.padding_left = 0; + cfg.padding_right = 0; + cfg.padding_top = 0; + cfg.padding_bottom = 0; + } else { + cfg.padding_left = data->padding.width; + cfg.padding_right = data->padding.width + data->padding.width_offset; + cfg.padding_top = data->padding.height; + cfg.padding_bottom = data->padding.height + data->padding.height_offset; + } + + // for height slicing + const int heightDimension = 1; + int inSliceHeight = 0; + int outSliceHeight = 0; + const int kernelHeight = static_cast(mli_weights.shape[KRNL_DW_H_DIM_HWC]); + const int overlap = kernelHeight - cfg.stride_height; + + // for weight slicing (on output channels) + const int weight_out_ch_dimension = 3; // HWCN layout for weigths, output channel dimension is the first dimension. + const int bias_out_ch_dimension = 0; // bias has only 1 dimension + const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + const int32_t in_channels = mli_in.shape[out_tensor_ch_dimension]; + const int32_t out_channels = mli_out.shape[out_tensor_ch_dimension]; + int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); + + // Tensors for data in fast (local) memory and config to copy data from external to local memory + mli_tensor weights_local = mli_weights; + mli_tensor bias_local = mli_bias; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; // this assumes that output shape is already filled in the tensor struct. + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_conv_tensors( + context, &in_local, &weights_local, &bias_local, &out_local)); + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = in_local.data == mli_in.data; + const bool out_is_local = out_local.data == mli_out.data; + const bool w_is_local = weights_local.data == mli_weights.data; + const bool b_is_local = bias_local.data == mli_bias.data; + + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io( + &in_local, &out_local, kernelHeight, cfg.stride_height, cfg.padding_top, + cfg.padding_bottom, &inSliceHeight, &outSliceHeight)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights( + &weights_local, &bias_local, weight_out_ch_dimension, &slice_channels)); + + /* if input channels is not equal to output channels, a channel multiplier + is used. in this case the slice channels needs to be rounded down to a + multiple of the input channels */ + if (in_channels != out_channels) { + slice_channels = (slice_channels / in_channels) * in_channels; + } + + TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension, slice_channels); + TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + + mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void *input_buffer_ptr = NULL; + int input_buffer_size = 0; + int padding_top = cfg.padding_top; + int padding_bottom = cfg.padding_bottom; + + while (!w_slice.Done()){ + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + /* input tensor is alreade sliced in the channel dimension. + out_ch_slice.Sub() is the tensor for the amount of channels of this + itteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. in_ch_slice.Sub() tensor + contains batches of HWC tensors. so it is a 4 dimensional tensor. because + the mli kernel will process one HWC tensor at a time, the 4 dimensional + tensor needs to be sliced into nBatch 3 dimensional tensors. on top of + that there could be a need to also slice in the Height dimension. for that + the sliceHeight has been calculated. The tensor slicer is configured that + it will completely slice the nBatch dimension (0) and slice the height + dimension (1) in chunks of 'sliceHeight' */ + TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight, padding_top, padding_bottom, overlap); + + /* output tensor is alreade sliced in the output channel dimension. + out_ch_slice.Sub() is the tensor for the amount of output channels of this + itteration of the weight slice loop. This tensor needs to be further + sliced over the batch and height dimension. */ + TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension, outSliceHeight); + + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + TF_LITE_ENSURE(context, !in_slice.Done()); + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + // if same input copy as previous iteration, skip the copy of input + if ((in_slice.Sub()->data != input_buffer_ptr) || + (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0); + } + mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + in_ch_slice.Next(); + TF_LITE_ENSURE(context, in_slice.Done()); + } + return kTfLiteOk; +} + +TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, + OpData* data, const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, + TfLiteTensor* output) { + DepthwiseParams op_params; + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.depth_multiplier = params->depth_multiplier; + op_params.input_offset = -input->params.zero_point; + op_params.weights_offset = 0; + op_params.output_offset = output->params.zero_point; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + + reference_integer_ops::DepthwiseConvPerChannel( + op_params, data->per_channel_output_multiplier, + data->per_channel_output_shift, GetTensorShape(input), + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); return kTfLiteOk; } @@ -373,6 +390,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpData data; // All per-channel quantized tensors need valid zero point and scale arrays. + bool mli_is_applicable = false; if (input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); @@ -383,12 +401,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, affine_quantization); TF_LITE_ENSURE(context, affine_quantization->scale); TF_LITE_ENSURE(context, affine_quantization->zero_point); - // Depthwise conv is quantized along dimension 3: - // https://www.tensorflow.org/lite/performance/quantization_spec - TF_LITE_ENSURE_EQ(context, filter->dims->data[3], - affine_quantization->scale->size); - TF_LITE_ENSURE_EQ(context, filter->dims->data[3], + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, affine_quantization->zero_point->size); + mli_is_applicable = + ((filter->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension])); } TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, @@ -399,8 +423,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { EvalFloat(context, node, params, &data, input, filter, bias, output); break; case kTfLiteInt8: - return EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, - output); + if (mli_is_applicable) { + return EvalMliQuantizedPerChannel(context, node, params, &data, input, + filter, bias, output); + } else { + return EvalQuantizedPerChannel(context, node, params, &data, input, + filter, bias, output); + } break; case kTfLiteUInt8: EvalQuantized(context, node, params, &data, input, filter, bias, output); diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 61fa0ff397f..185217d0c6a 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "mli_api.h" // NOLINT +#include "mli_api.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" @@ -28,8 +28,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" -#include "mli_api.h" - namespace tflite { namespace ops { namespace micro { @@ -77,6 +75,37 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, } // namespace +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + OpData* data = nullptr; + TfLiteStatus status = context->AllocatePersistentBuffer( + context, sizeof(OpData), reinterpret_cast(&data)); + if (status != kTfLiteOk || data == nullptr) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + auto* params = + reinterpret_cast(node->builtin_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG(context, input->type == filter->type, + "Hybrid models are not supported on TFLite Micro."); + + TfLiteType data_type = input->type; + TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input, + filter, bias, output, data)); + + return kTfLiteOk; +} + TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input, @@ -263,13 +292,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TfLiteType data_type = input->type; - OpData local_data_object; - OpData* data = &local_data_object; - TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input, - filter, bias, output, data)); + OpData* data = reinterpret_cast(node->user_data); - switch (filter->type) { // Already know in/out types are same. + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { case kTfLiteFloat32: return EvalFloat(context, node, params, data, input, filter, bias, output); @@ -292,15 +318,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace fully_connected TfLiteRegistration* Register_FULLY_CONNECTED() { - static TfLiteRegistration r = {/*init=*/nullptr, + static TfLiteRegistration r = {/*init=*/fully_connected::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/fully_connected::Prepare, /*invoke=*/fully_connected::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, /*version=*/0}; - return &r; } diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc index 4cc2a80c3ea..8a3eb30630d 100644 --- a/tensorflow/lite/micro/kernels/conv_test.cc +++ b/tensorflow/lite/micro/kernels/conv_test.cc @@ -409,8 +409,8 @@ TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannel) { TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannelRelu6) { // conv params: - // padding, stride_, dilation_, activation - TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; + // padding, stride_, activation, dilation_ + TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6, 1, 1}; const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C] const int kInputElements = kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4]; diff --git a/tensorflow/lite/micro/kernels/pooling_test.cc b/tensorflow/lite/micro/kernels/pooling_test.cc index 8bfeb718a1b..96dff421d53 100644 --- a/tensorflow/lite/micro/kernels/pooling_test.cc +++ b/tensorflow/lite/micro/kernels/pooling_test.cc @@ -496,7 +496,7 @@ TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingSameStride1ActNone) { F2QS(8.5, output_min, output_max), F2QS(7., output_min, output_max)}, {4, 1, 2, 4, 1}, // Output shape output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActNone, output_data); + kTfLitePaddingSame, kTfLiteActNone, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloat) { From 273948c6aaf8424e8adf33d6f3fcba6c9fa935e2 Mon Sep 17 00:00:00 2001 From: Daria Zhuravleva Date: Tue, 14 Apr 2020 12:10:11 +0300 Subject: [PATCH 29/45] Common wrapper for average and max pooling --- .../lite/micro/kernels/arc_mli/pooling.cc | 267 ++++++++++-------- 1 file changed, 145 insertions(+), 122 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index ced5c4a21b8..7f87d4849ff 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/kernels/internal/reference/pooling.h" -#include "mli_api.h" // NOLINT #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" @@ -41,6 +40,8 @@ struct OpData { TfLitePaddingValues padding; }; +typedef enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 } MliPoolingType; + TfLiteStatus CalculateOpData(const TfLiteContext* context, const TfLitePoolParams* params, const TfLiteTensor* input, @@ -81,110 +82,111 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, GetTensorShape(output), GetTensorData(output)); } -void AverageEvalUint8(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); +//Prepare MLI tensors and run Average or Max Pooling +TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, + const OpData* data, const TfLiteTensor* input, + TfLiteTensor* output, const MliPoolingType pooling_type) { + mli_tensor mli_in = {0}; + mli_tensor mli_out = {0}; + mli_pool_cfg cfg = {0}; - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + ConvertToMliTensor(input, &mli_in); + ConvertToMliTensor(output, &mli_out); + + cfg.kernel_width = params->filter_width; + cfg.kernel_height = params->filter_height; + cfg.stride_width = params->stride_width; + cfg.stride_height = params->stride_height; + + if (params->padding == kTfLitePaddingValid) { + cfg.padding_left = 0; + cfg.padding_right = 0; + cfg.padding_top = 0; + cfg.padding_bottom = 0; + } else { + cfg.padding_left = data->padding.width; + cfg.padding_right = data->padding.width + data->padding.width_offset; + cfg.padding_top = data->padding.height; + cfg.padding_bottom = data->padding.height + data->padding.height_offset; + } + + mli_point_to_subtsr_cfg subtsr_cfg_in = { + {0, 0}, 2, static_cast(mli_in.shape[1])}; + mli_point_to_subtsr_cfg subtsr_cfg_out = { + {0, 0}, 2, static_cast(mli_out.shape[1])}; + mli_tensor sub_mli_in = {0}; + mli_tensor sub_mli_out = {0}; + mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); + mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); + + const int height_dimension = 1; + int in_slice_height = 0; + int out_slice_height = 0; + const int overlap = cfg.kernel_height - cfg.stride_height; + + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory + mli_tensor in_local = sub_mli_in; + mli_tensor out_local = sub_mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors( + context, &in_local, &out_local)); + bool in_is_local = in_local.data == sub_mli_in.data; + bool out_is_local = out_local.data == sub_mli_out.data; + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io( + &in_local, &out_local, cfg.kernel_height, cfg.stride_height, + cfg.padding_top, cfg.padding_bottom, &in_slice_height, + &out_slice_height)); + + /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional + tensor. because the mli kernel will process one HWC tensor at a time, the 4 + dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. on + top of that there could be a need to also slice in the Height dimension. + for that the sliceHeight has been calculated. The tensor slicer is + configured that it will completely slice the nBatch dimension (0) and slice + the height dimension (1) in chunks of 'sliceHeight' */ + TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, + cfg.padding_top, cfg.padding_bottom, overlap); + TensorSlicer out_slice(&mli_out, height_dimension, out_slice_height); + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + cfg.padding_top = in_slice.GetPaddingPre(); + cfg.padding_bottom = in_slice.GetPaddingPost(); + + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + if (pooling_type == AveragePooling) + mli_krn_avepool_hwc_sa8(in_ptr, &cfg, out_ptr); + else if (pooling_type == MaxPooling) + mli_krn_maxpool_hwc_sa8(in_ptr, &cfg, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + return kTfLiteOk; } -TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, + const TfLitePoolParams* params, const OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); // Run Average Pooling MLI kernel // MLI optimized version only supports int8 dataype and no fused Relu // TODO: subject to add mli_saturate kernel if (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone) { - mli_tensor mli_in = {0}; - mli_tensor mli_out = {0}; - mli_pool_cfg cfg = {0}; - - ConvertToMliTensor(input, &mli_in); - ConvertToMliTensor(output, &mli_out); - - cfg.kernel_width = params->filter_width; - cfg.kernel_height = params->filter_height; - cfg.stride_width = params->stride_width; - cfg.stride_height = params->stride_height; - - if (params->padding == kTfLitePaddingValid) { - cfg.padding_left = 0; - cfg.padding_right = 0; - cfg.padding_top = 0; - cfg.padding_bottom = 0; - } else { - cfg.padding_left = data->padding.width; - cfg.padding_right = data->padding.width + data->padding.width_offset; - cfg.padding_top = data->padding.height; - cfg.padding_bottom = data->padding.height + data->padding.height_offset; - } - - mli_point_to_subtsr_cfg subtsr_cfg_in = {{0,0}, 2, static_cast(mli_in.shape[1])}; - mli_point_to_subtsr_cfg subtsr_cfg_out = {{0,0}, 2, static_cast(mli_out.shape[1])}; - mli_tensor sub_mli_in = {0}; - mli_tensor sub_mli_out = {0}; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - - const int height_dimension = 1; - int in_slice_height = 0; - int out_slice_height = 0; - const int overlap = cfg.kernel_height - cfg.stride_height; - - // Tensors for data in fast (local) memory and config to copy data from external to local memory - mli_tensor in_local = sub_mli_in; - mli_tensor out_local = sub_mli_out; - mli_mov_cfg_t copy_config; - mli_mov_cfg_for_copy(©_config); - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors(context, &in_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(&in_local, &out_local, cfg.kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &in_slice_height, &out_slice_height)); - - /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional tensor. - because the mli kernel will process one HWC tensor at a time, the 4 dimensional tensor needs to be sliced into nBatch 3 dimensional tensors. - on top of that there could be a need to also slice in the Height dimension. for that the sliceHeight has been calculated. - The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) - in chunks of 'sliceHeight' */ - TensorSlicer in_slice(&mli_in, height_dimension, in_slice_height, cfg.padding_top, cfg.padding_bottom, overlap); - TensorSlicer out_slice(&mli_out, height_dimension, out_slice_height); - - /* is_local indicates that the tensor is already in local memory, - so in that case the original tensor can be used, - and there is no need to copy it to the local tensor*/ - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; - - while (!out_slice.Done()) { - cfg.padding_top = in_slice.GetPaddingPre(); - cfg.padding_bottom = in_slice.GetPaddingPost(); - - mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - mli_krn_avepool_hwc_sa8(in_ptr, &cfg, out_ptr); - mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); - - in_slice.Next(); - out_slice.Next(); - } - + EvalMli(context, params, data, input, output, AveragePooling); } else { int32_t activation_min, activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); + PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -194,11 +196,17 @@ TfLiteStatus AverageEvalInt8(TfLiteContext* context, const TfLiteNode* node, op_params.padding_values.width = data->padding.width; op_params.quantized_activation_min = activation_min; op_params.quantized_activation_max = activation_max; - reference_integer_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + + if (input->type == kTfLiteUInt8) { + reference_ops::AveragePool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + } else { + reference_integer_ops::AveragePool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + } } - return kTfLiteOk; } void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, @@ -222,29 +230,45 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorData(output)); } -void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); +void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { + TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); + + // Run Max Pooling MLI kernel + // MLI optimized version only supports int8 dataype and no fused Relu + // TODO: subject to add mli_saturate kernel + if (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone) { + EvalMli(context, params, data, input, output, MaxPooling); + } else { + int32_t activation_min, activation_max; + (void)CalculateActivationRangeQuantized(context, params->activation, output, + &activation_min, &activation_max); - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = activation_min; + op_params.quantized_activation_max = activation_max; + + if (input->type == kTfLiteUInt8) { + reference_ops::MaxPool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + } else { + reference_integer_ops::MaxPool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + } + } } - } // namespace + TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); OpData data; @@ -254,16 +278,14 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data)); - // Inputs and outputs share the same type, guarenteed by the converter. + // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { case kTfLiteFloat32: AverageEvalFloat(context, node, params, &data, input, output); break; case kTfLiteUInt8: - AverageEvalUint8(context, node, params, &data, input, output); - break; case kTfLiteInt8: - return AverageEvalInt8(context, node, params, &data, input, output); + AverageEvalQuantized(context, node, params, &data, input, output); break; default: TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", @@ -287,7 +309,8 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { MaxEvalFloat(context, node, params, &data, input, output); break; case kTfLiteUInt8: - MaxEvalQuantizedUInt8(context, node, params, &data, input, output); + case kTfLiteInt8: + MaxEvalQuantized(context, node, params, &data, input, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", From 8ed89130aa4c3da812790a73dae465881428863f Mon Sep 17 00:00:00 2001 From: Daria Zhuravleva Date: Wed, 15 Apr 2020 15:10:52 +0300 Subject: [PATCH 30/45] Refactoring --- tensorflow/lite/micro/kernels/arc_mli/pooling.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index 7f87d4849ff..7b68e314277 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/kernels/internal/reference/pooling.h" +#include "mli_api.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" @@ -24,7 +25,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" -#include "mli_api.h" namespace tflite { namespace ops { @@ -40,7 +40,7 @@ struct OpData { TfLitePaddingValues padding; }; -typedef enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 } MliPoolingType; +enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; TfLiteStatus CalculateOpData(const TfLiteContext* context, const TfLitePoolParams* params, @@ -111,9 +111,15 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, } mli_point_to_subtsr_cfg subtsr_cfg_in = { - {0, 0}, 2, static_cast(mli_in.shape[1])}; + .start_coord = {0, 0}, + .coord_num = 2, + .first_out_dim_size = static_cast(mli_in.shape[1]), + }; mli_point_to_subtsr_cfg subtsr_cfg_out = { - {0, 0}, 2, static_cast(mli_out.shape[1])}; + .start_coord = {0, 0}, + .coord_num = 2, + .first_out_dim_size = static_cast(mli_out.shape[1]), + }; mli_tensor sub_mli_in = {0}; mli_tensor sub_mli_out = {0}; mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); From 51522a108d0ee14a665752f3f65e534235925a41 Mon Sep 17 00:00:00 2001 From: Daria Zhuravleva Date: Wed, 15 Apr 2020 21:46:00 +0300 Subject: [PATCH 31/45] Removed sub_tensors --- .../lite/micro/kernels/arc_mli/pooling.cc | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index 7b68e314277..2c3875b58eb 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -109,22 +109,7 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, cfg.padding_top = data->padding.height; cfg.padding_bottom = data->padding.height + data->padding.height_offset; } - - mli_point_to_subtsr_cfg subtsr_cfg_in = { - .start_coord = {0, 0}, - .coord_num = 2, - .first_out_dim_size = static_cast(mli_in.shape[1]), - }; - mli_point_to_subtsr_cfg subtsr_cfg_out = { - .start_coord = {0, 0}, - .coord_num = 2, - .first_out_dim_size = static_cast(mli_out.shape[1]), - }; - mli_tensor sub_mli_in = {0}; - mli_tensor sub_mli_out = {0}; - mli_hlp_point_to_subtensor(&mli_in, &subtsr_cfg_in, &sub_mli_in); - mli_hlp_point_to_subtensor(&mli_out, &subtsr_cfg_out, &sub_mli_out); - + const int height_dimension = 1; int in_slice_height = 0; int out_slice_height = 0; @@ -132,14 +117,14 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, // Tensors for data in fast (local) memory and config to copy data from // external to local memory - mli_tensor in_local = sub_mli_in; - mli_tensor out_local = sub_mli_out; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors( context, &in_local, &out_local)); - bool in_is_local = in_local.data == sub_mli_in.data; - bool out_is_local = out_local.data == sub_mli_out.data; + bool in_is_local = in_local.data == mli_in.data; + bool out_is_local = out_local.data == mli_out.data; TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io( &in_local, &out_local, cfg.kernel_height, cfg.stride_height, cfg.padding_top, cfg.padding_bottom, &in_slice_height, From 99d489c7efa85b121b99393a53c3c07ac356c641 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 20 Apr 2020 17:09:56 +0300 Subject: [PATCH 32/45] Option to remove kernels implementation beside ARC MLI --- tensorflow/lite/micro/kernels/arc_mli/conv.cc | 104 ++++--- .../micro/kernels/arc_mli/depthwise_conv.cc | 108 +++++-- .../micro/kernels/arc_mli/fully_connected.cc | 290 ++++++++++-------- .../lite/micro/kernels/arc_mli/pooling.cc | 172 +++++++---- .../micro/tools/make/ext_libs/arc_mli.inc | 8 + 5 files changed, 427 insertions(+), 255 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc index b9be93ceb11..4a2676821d9 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019-2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -44,8 +44,6 @@ constexpr int kMaxChannels = 256; // https://www.tensorflow.org/lite/performance/quantization_spec constexpr int kConvQuantizedDimension = 0; -// This file has 2 implementation of Conv. - struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can @@ -76,11 +74,31 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) { } } + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteConvParams* params) { + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + // MLI optimized version only supports int8 dataype, dilation factor of 1 and + // per-axis quantization of weights (no broadcasting/per-tensor) + bool ret_val = (filter->type == kTfLiteInt8) && + (input->type == kTfLiteInt8) && + (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]) && + affine_quantization->scale->size <= (kMaxChannels * 2); + return ret_val; +} + + TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, int width, int height, int filter_width, int filter_height, int out_width, int out_height, const TfLiteType data_type, - OpData* data) { + bool mli_is_applicable, OpData* data) { bool has_bias = node->inputs->size == 3; // Check number of inputs/outputs TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); @@ -95,7 +113,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, // Note that quantized inference requires that all tensors have their // parameters set. This is usually done during quantized training. - if (data_type != kTfLiteFloat32) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + if (data_type != kTfLiteFloat32 && !mli_is_applicable) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); const TfLiteTensor* bias = @@ -111,14 +130,16 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, reinterpret_cast(data->per_channel_output_shift), output_channels)); } +#endif return kTfLiteOk; } -void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* im2col, + TfLiteTensor* hwcn_weights, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) const int32_t input_offset = -input->params.zero_point; const int32_t filter_offset = -filter->params.zero_point; const int32_t output_offset = output->params.zero_point; @@ -144,6 +165,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, GetTensorData(bias), GetTensorShape(output), GetTensorData(output), GetTensorShape(im2col), GetTensorData(im2col), nullptr); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus EvalMliQuantizedPerChannel( @@ -209,14 +236,13 @@ TfLiteStatus EvalMliQuantizedPerChannel( const int overlap = kernel_height - cfg.stride_height; // for weight slicing (on output channels) - const int weight_out_ch_dimension = - 0; // NHWC layout for weigths, output channel dimension is the first - // dimension. + // NHWC layout for weigths, output channel dimension is the first dimension. + const int weight_out_ch_dimension = 0; int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); - const int out_tensor_ch_dimension = - 3; // Batch-Height-Width-Channel layout means last dimension is output - // channels. + // Batch-Height-Width-Channel layout means last dimension is output channels. + const int out_tensor_ch_dimension = 3; + // Tensors for data in fast (local) memory and config to copy data from // external to local memory @@ -304,7 +330,6 @@ TfLiteStatus EvalMliQuantizedPerChannel( TF_LITE_ENSURE(context, in_slice.Done()); } } - return kTfLiteOk; } @@ -314,6 +339,7 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) ConvParams op_params; op_params.input_offset = -input->params.zero_point; op_params.output_offset = output->params.zero_point; @@ -333,15 +359,20 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); - return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif } -void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* im2col, + TfLiteTensor* hwcn_weights, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -363,6 +394,12 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorData(bias), GetTensorShape(output), GetTensorData(output), GetTensorShape(im2col), GetTensorData(im2col)); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -383,7 +420,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpData data; // All per-channel quantized tensors need valid zero point and scale arrays. - bool mli_is_applicable = false; if (input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); @@ -401,26 +437,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { filter->dims->data[kConvQuantizedDimension]); TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, affine_quantization->zero_point->size); - mli_is_applicable = - ((filter->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && - (params->dilation_width_factor == 1) && - (params->dilation_height_factor == 1) && - (affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension])); } + bool mli_is_applicable = IsMliApplicable(context, input, filter, bias, params); + TF_LITE_ENSURE_STATUS( + CalculateOpData(context, node, params, input_width, input_height, + filter_width, filter_height, output_width, output_height, + input->type, mli_is_applicable, &data)); - TF_LITE_ENSURE_STATUS(CalculateOpData( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, &data)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input, filter, bias, nullptr, + return EvalFloat(context, node, params, &data, input, filter, bias, nullptr, nullptr, output); break; case kTfLiteInt8: if (mli_is_applicable) { return EvalMliQuantizedPerChannel(context, node, params, &data, input, - filter, bias, output); + filter, bias, output); } else { return EvalQuantizedPerChannel(context, node, params, &data, input, @@ -428,7 +460,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } break; case kTfLiteUInt8: - EvalQuantized(context, node, params, &data, input, filter, bias, nullptr, + return EvalQuantized(context, node, params, &data, input, filter, bias, nullptr, nullptr, output); break; default: diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 9860235b2fb..081a40b23b5 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017-2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -64,10 +64,30 @@ struct OpData { int32_t output_activation_max; }; +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteDepthwiseConvParams* params) { + const auto* affine_quantization = + reinterpret_cast(filter->quantization.params); + // MLI optimized version only supports int8 dataype, dilation factor of 1 and + // per-axis quantization of weights (no broadcasting/per-tensor) + bool ret_val = (filter->type == kTfLiteInt8) && + (input->type == kTfLiteInt8) && + (bias->type == kTfLiteInt32) && + (params->dilation_width_factor == 1) && + (params->dilation_height_factor == 1) && + (affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]) && + affine_quantization->scale->size <= (kMaxChannels * 2); + return ret_val; +} + + TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, int width, int height, int filter_width, int filter_height, - const TfLiteType data_type, OpData* data) { + const TfLiteType data_type, bool mli_is_applicable, + OpData* data) { bool has_bias = node->inputs->size == 3; // Check number of inputs/outputs TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); @@ -81,7 +101,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, // Note that quantized inference requires that all tensors have their // parameters set. This is usually done during quantized training. - if (data_type != kTfLiteFloat32) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + if (data_type != kTfLiteFloat32 && !mli_is_applicable) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); const TfLiteTensor* bias = @@ -106,15 +127,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, data->per_channel_output_multiplier, reinterpret_cast(data->per_channel_output_shift), num_channels)); } +#endif return kTfLiteOk; } } // namespace -void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -137,6 +160,12 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, @@ -145,7 +174,6 @@ TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { // Run Depthwise Conv MLI kernel - // MLI optimized version only supports int8 dataype and dilation factor of 1 mli_tensor mli_in = {0}; mli_tensor mli_weights = {0}; mli_tensor mli_bias = {0}; @@ -200,18 +228,23 @@ TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node const int overlap = kernelHeight - cfg.stride_height; // for weight slicing (on output channels) - const int weight_out_ch_dimension = 3; // HWCN layout for weigths, output channel dimension is the first dimension. - const int bias_out_ch_dimension = 0; // bias has only 1 dimension - const int out_tensor_ch_dimension = 3; // Batch-Height-Width-Channel layout means last dimension is output channels. + // HWCN layout for weigths, output channel dimension is the first dimension. + const int weight_out_ch_dimension = 3; + // bias has only 1 dimension + const int bias_out_ch_dimension = 0; + // Batch-Height-Width-Channel layout means last dimension is output channels. + const int out_tensor_ch_dimension = 3; const int32_t in_channels = mli_in.shape[out_tensor_ch_dimension]; const int32_t out_channels = mli_out.shape[out_tensor_ch_dimension]; int slice_channels = static_cast(mli_weights.shape[weight_out_ch_dimension]); - // Tensors for data in fast (local) memory and config to copy data from external to local memory + // Tensors for data in fast (local) memory + // and config to copy data from external to local memory mli_tensor weights_local = mli_weights; mli_tensor bias_local = mli_bias; mli_tensor in_local = mli_in; - mli_tensor out_local = mli_out; // this assumes that output shape is already filled in the tensor struct. + mli_tensor out_local = mli_out; // this assumes that output shape + // is already filled in the tensor struct. mli_mov_cfg_t copy_config; mli_mov_cfg_for_copy(©_config); @@ -238,10 +271,13 @@ TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node slice_channels = (slice_channels / in_channels) * in_channels; } - TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, 0, 0, 0, true); TensorSlicer b_slice(&mli_bias, bias_out_ch_dimension, slice_channels); - TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); - TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, 0, 0, 0, true); + TensorSlicer w_slice(&mli_weights, weight_out_ch_dimension, slice_channels, + 0, 0, 0, true); + TensorSlicer out_ch_slice(&mli_out, out_tensor_ch_dimension, slice_channels, + 0, 0, 0, true); + TensorSlicer in_ch_slice(&mli_in, out_tensor_ch_dimension, slice_channels, + 0, 0, 0, true); mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; @@ -266,7 +302,8 @@ TfLiteStatus EvalMliQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node the sliceHeight has been calculated. The tensor slicer is configured that it will completely slice the nBatch dimension (0) and slice the height dimension (1) in chunks of 'sliceHeight' */ - TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight, padding_top, padding_bottom, overlap); + TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension, inSliceHeight, + padding_top, padding_bottom, overlap); /* output tensor is alreade sliced in the output channel dimension. out_ch_slice.Sub() is the tensor for the amount of output channels of this @@ -312,6 +349,7 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; op_params.padding_values.width = data->padding.width; @@ -335,12 +373,18 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif } -void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDepthwiseConvParams* params, OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) const int32_t input_offset = -input->params.zero_point; const int32_t filter_offset = -filter->params.zero_point; const int32_t output_offset = output->params.zero_point; @@ -369,6 +413,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -390,7 +440,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { OpData data; // All per-channel quantized tensors need valid zero point and scale arrays. - bool mli_is_applicable = false; if (input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, filter->quantization.type, kTfLiteAffineQuantization); @@ -407,20 +456,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { filter->dims->data[kDepthwiseConvQuantizedDimension]); TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, affine_quantization->zero_point->size); - mli_is_applicable = - ((filter->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && - (params->dilation_width_factor == 1) && - (params->dilation_height_factor == 1) && - (affine_quantization->scale->size == - filter->dims->data[kDepthwiseConvQuantizedDimension])); } + bool mli_is_applicable = IsMliApplicable(context, input, filter, bias, params); TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, filter_width, filter_height, data_type, - &data)); + mli_is_applicable, &data)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input, filter, bias, output); + return EvalFloat(context, node, params, &data, input, filter, bias, + output); break; case kTfLiteInt8: if (mli_is_applicable) { @@ -432,7 +477,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } break; case kTfLiteUInt8: - EvalQuantized(context, node, params, &data, input, filter, bias, output); + return EvalQuantized(context, node, params, &data, input, filter, bias, + output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 185217d0c6a..70d1fda4c2b 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017-2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" -#include "mli_api.h" +#include "mli_api.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" @@ -23,10 +23,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" -#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" -#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" #include "tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h" +#include "tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" +#include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" namespace tflite { namespace ops { @@ -52,6 +52,18 @@ constexpr int kWeightsTensor = 1; constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLiteTensor* filter, const TfLiteTensor* bias, + const TfLiteFullyConnectedParams* params) { + // MLI optimized version only supports int8 dataype and no fused Relu and + // symmetric per-tensor quantization of weights (not per-axis) + bool ret_val = (filter->type == kTfLiteInt8) && + (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && + (params->activation == kTfLiteActNone) && + (filter->params.zero_point == 0); + return ret_val; +} + TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteFullyConnectedParams* params, TfLiteType data_type, const TfLiteTensor* input, @@ -59,7 +71,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, const TfLiteTensor* bias, TfLiteTensor* output, OpData* data) { TfLiteStatus status = kTfLiteOk; - if (data_type != kTfLiteFloat32) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + if (data_type != kTfLiteFloat32 && + !IsMliApplicable(context, input, filter, bias, params)) { double real_multiplier = 0.0; TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( context, input, filter, bias, output, &real_multiplier)); @@ -70,6 +84,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, context, params->activation, output, &data->output_activation_min, &data->output_activation_max)); } +#endif return status; } @@ -95,6 +110,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE(context, data != nullptr); TF_LITE_ENSURE_EQ(context, input->type, output->type); TF_LITE_ENSURE_MSG(context, input->type == filter->type, "Hybrid models are not supported on TFLite Micro."); @@ -106,122 +122,135 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node, + TfLiteFullyConnectedParams* params, + OpData* data, const TfLiteTensor* input, + const TfLiteTensor* filter, + const TfLiteTensor* bias, + TfLiteTensor* output) { + mli_tensor mli_in = {0}; + mli_tensor mli_weights = {0}; + mli_tensor mli_bias = {0}; + mli_tensor mli_out = {0}; + + ConvertToMliTensor(input, &mli_in); + ConvertToMliTensor(filter, &mli_weights); + ConvertToMliTensor(bias, &mli_bias); + ConvertToMliTensor(output, &mli_out); + + /* The input tensor can have more than 2 dimensions. for the compute this + doesn't make any difference because all the inputs or a batch entry will + be used anyway. because the MLI kernel doesn't recognize the multiple + dimensions, the tensor shape is casted to a {batchnum, inputsize} shape. */ + mli_in.shape[0] = mli_out.shape[0]; + mli_in.shape[1] = mli_weights.shape[1]; + mli_in.shape[2] = 0; + mli_in.shape[3] = 0; + mli_in.rank = 2; + + // Tensors for data in fast (local) memory and config to copy data from + // external to local memory + mli_tensor weights_local = mli_weights; + mli_tensor bias_local = mli_bias; + mli_tensor in_local = mli_in; + mli_tensor out_local = mli_out; + mli_mov_cfg_t copy_config; + mli_mov_cfg_for_copy(©_config); + const int weight_out_dimension = 0; + const int out_tensor_dimension = 1; + const int batch_dimension = 0; + int slice_size = mli_weights.shape[weight_out_dimension]; + + /* allocate the local buffers, and compute the slice size */ + TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_fully_connect_tensors( + context, &in_local, &weights_local, &bias_local, &out_local)); + TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights( + &weights_local, &bias_local, weight_out_dimension, &slice_size)); + int max_out_slice_size = + out_local.capacity / mli_hlp_tensor_element_size(&out_local); + if (slice_size > max_out_slice_size) slice_size = max_out_slice_size; + + /* is_local indicates that the tensor is already in local memory, + so in that case the original tensor can be used, + and there is no need to copy it to the local tensor*/ + const bool in_is_local = in_local.data == mli_in.data; + const bool out_is_local = out_local.data == mli_out.data; + const bool w_is_local = weights_local.data == mli_weights.data; + const bool b_is_local = bias_local.data == mli_bias.data; + + TensorSlicer w_slice(&mli_weights, weight_out_dimension, slice_size); + TensorSlicer b_slice(&mli_bias, weight_out_dimension, slice_size); + TensorSlicer out_ch_slice(&mli_out, out_tensor_dimension, slice_size, 0, 0, 0, + true); + + mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local; + mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local; + + void* input_buffer_ptr = NULL; + + while (!w_slice.Done()) { + mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); + mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); + + TensorSlicer in_slice(&mli_in, batch_dimension, 1); + + /* output tensor is alreade sliced in the output size dimension. + out_ch_slice.Sub() is the tensor for the amount of output size of this + itteration of the weight slice loop. This tensor needs to be further + sliced over the batch */ + TensorSlicer out_slice(out_ch_slice.Sub(), batch_dimension, 1); + + /* setup the pointers to the local or remote tensor to make the code + * inside the loop easier. */ + mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local; + mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local; + + while (!out_slice.Done()) { + // if same input copy as previous iteration, skip the copy of input + if (in_slice.Sub()->data != input_buffer_ptr) { + mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); + input_buffer_ptr = in_slice.Sub()->data; + } + mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, out_ptr); + mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); + + in_slice.Next(); + out_slice.Next(); + } + w_slice.Next(); + b_slice.Next(); + out_ch_slice.Next(); + } + return kTfLiteOk; +} + TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { - // Run Fully Connected MLI kernel - // MLI optimized version only supports int8 dataype and no fused Relu - // TODO: subject to add mli_saturate kernel - // work around for issue #35318, mli fully connect kernel only supports - // zeropoint == 0 for weights. this check can be removed once issue #35318 is - // resolved. - if ((filter->params.zero_point == 0) && - (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone)) { - mli_tensor mli_in = {0}; - mli_tensor mli_weights = {0}; - mli_tensor mli_bias = {0}; - mli_tensor mli_out = {0}; +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) + FullyConnectedParams op_params; + op_params.input_offset = -input->params.zero_point; + op_params.weights_offset = -filter->params.zero_point; + op_params.output_offset = output->params.zero_point; + op_params.output_multiplier = data->output_multiplier; + // TODO(b/138810107): Figure out whether output shift should be inverted + op_params.output_shift = -data->output_shift; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; - ConvertToMliTensor(input, &mli_in); - ConvertToMliTensor(filter, &mli_weights); - ConvertToMliTensor(bias, &mli_bias); - ConvertToMliTensor(output, &mli_out); - - /* The input tensor can have more than 2 dimensions. for the compute this doesn't make any difference - because all the inputs or a batch entry will be used anyway. because the MLI kernel doesn't recognize - the multiple dimensions, the tensor shape is casted to a {batchnum, inputsize} shape. */ - mli_in.shape[0] = mli_out.shape[0]; - mli_in.shape[1] = mli_weights.shape[1]; - mli_in.shape[2] = 0; - mli_in.shape[3] = 0; - mli_in.rank = 2; - - // Tensors for data in fast (local) memory and config to copy data from external to local memory - mli_tensor weights_local = mli_weights; - mli_tensor bias_local = mli_bias; - mli_tensor in_local = mli_in; - mli_tensor out_local = mli_out; - mli_mov_cfg_t copy_config; - mli_mov_cfg_for_copy(©_config); - const int weight_out_dimension = 0; - const int out_tensor_dimension = 1; - const int batch_dimension = 0; - int slice_size = mli_weights.shape[weight_out_dimension]; - - /* allocate the local buffers, and compute the slice size */ - TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_fully_connect_tensors(context, &in_local, &weights_local, &bias_local, &out_local)); - TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_weights(&weights_local, &bias_local, weight_out_dimension, &slice_size)); - int max_out_slice_size = out_local.capacity / mli_hlp_tensor_element_size(&out_local); - if (slice_size > max_out_slice_size) slice_size = max_out_slice_size; - - /* is_local indicates that the tensor is already in local memory, - so in that case the original tensor can be used, - and there is no need to copy it to the local tensor*/ - const bool in_is_local = in_local.data == mli_in.data; - const bool out_is_local = out_local.data == mli_out.data; - const bool w_is_local = weights_local.data == mli_weights.data; - const bool b_is_local = bias_local.data == mli_bias.data; - - TensorSlicer w_slice(&mli_weights, weight_out_dimension, slice_size); - TensorSlicer b_slice(&mli_bias, weight_out_dimension, slice_size); - TensorSlicer out_ch_slice(&mli_out, out_tensor_dimension, slice_size, 0, 0, 0, true); - - mli_tensor *w_ptr = w_is_local ? w_slice.Sub() : &weights_local; - mli_tensor *b_ptr = b_is_local ? b_slice.Sub() : &bias_local; - - void *input_buffer_ptr = NULL; - - while (!w_slice.Done()){ - mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); - mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - - TensorSlicer in_slice(&mli_in, batch_dimension, 1); - - /* output tensor is alreade sliced in the output size dimension. out_ch_slice.Sub() is the tensor for the amount of - output size of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch */ - TensorSlicer out_slice(out_ch_slice.Sub(), batch_dimension, 1); - - /* setup the pointers to the local or remote tensor to make the code inside the loop easier. */ - mli_tensor *in_ptr = in_is_local ? in_slice.Sub() : &in_local; - mli_tensor *out_ptr = out_is_local ? out_slice.Sub() : &out_local; - - while (!out_slice.Done()) { - - // if same input copy as previous iteration, skip the copy of input - if (in_slice.Sub()->data != input_buffer_ptr) { - mli_mov_tensor_sync(in_slice.Sub(), ©_config, in_ptr); - input_buffer_ptr = in_slice.Sub()->data; - } - mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, out_ptr); - mli_mov_tensor_sync(out_ptr, ©_config, out_slice.Sub()); - - in_slice.Next(); - out_slice.Next(); - } - w_slice.Next(); - b_slice.Next(); - out_ch_slice.Next(); - } - } else { - FullyConnectedParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.weights_offset = -filter->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - // TODO(b/138810107): Figure out whether output shift should be inverted - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; - - reference_integer_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); - } + reference_integer_ops::FullyConnected( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), + GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Node configuration is not supported by ARC MLI Library."); + return kTfLiteError; +#endif } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, @@ -229,6 +258,7 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) const int32_t input_offset = -input->params.zero_point; const int32_t filter_offset = -filter->params.zero_point; const int32_t output_offset = output->params.zero_point; @@ -261,14 +291,20 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteTypeGetName(output->type), output->type); return kTfLiteError; } - return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteFullyConnectedParams* params, OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -281,6 +317,12 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -293,6 +335,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE(context, data != nullptr); // Checks in Prepare ensure input, output and filter types are all the same. switch (input->type) { @@ -300,12 +343,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return EvalFloat(context, node, params, data, input, filter, bias, output); case kTfLiteInt8: - return EvalQuantizedInt8(context, node, params, data, input, filter, bias, - output); + if (IsMliApplicable(context, input, filter, bias, params)) { + return EvalMliQuantizedInt8(context, node, params, data, input, filter, + bias, output); + } else { + return EvalQuantizedInt8(context, node, params, data, input, filter, + bias, output); + } - case kTfLiteUInt8: - return EvalQuantized(context, node, params, data, input, filter, bias, - output); + case kTfLiteUInt8: + return EvalQuantized(context, node, params, data, input, filter, bias, + output); default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index 2c3875b58eb..79deacc23d9 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019-2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,6 +42,15 @@ struct OpData { enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; + +bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, + const TfLitePoolParams* params) { + // MLI optimized version only supports int8 dataype and no fused Relu + // TODO: subject to add mli_saturate kernel + return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone); +} + + TfLiteStatus CalculateOpData(const TfLiteContext* context, const TfLitePoolParams* params, const TfLiteTensor* input, @@ -61,9 +70,11 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context, return kTfLiteOk; } -void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus AverageEvalFloat(TfLiteContext* context, + const TfLiteNode* node, + const TfLitePoolParams* params, const OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -80,6 +91,13 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, reference_ops::AveragePool( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } //Prepare MLI tensors and run Average or Max Pooling @@ -164,45 +182,49 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params, return kTfLiteOk; } -void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, - const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus AverageEvalQuantized(TfLiteContext* context, + const TfLiteNode* node, + const TfLitePoolParams* params, + const OpData* data, const TfLiteTensor* input, + TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); - // Run Average Pooling MLI kernel - // MLI optimized version only supports int8 dataype and no fused Relu - // TODO: subject to add mli_saturate kernel - if (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone) { - EvalMli(context, params, data, input, output, AveragePooling); + int32_t activation_min, activation_max; + (void)CalculateActivationRangeQuantized(context, params->activation, output, + &activation_min, &activation_max); + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = activation_min; + op_params.quantized_activation_max = activation_max; + + if (input->type == kTfLiteUInt8) { + reference_ops::AveragePool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); } else { - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); - - PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; - - if (input->type == kTfLiteUInt8) { - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); - } else { - reference_integer_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); - } + reference_integer_ops::AveragePool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); } + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG( + context, + "Node configuration or type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } -void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -219,43 +241,50 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, reference_ops::MaxPool(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } -void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { +TfLiteStatus MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLitePoolParams* params, OpData* data, + const TfLiteTensor* input, TfLiteTensor* output) { +#if !defined(TF_LITE_STRIP_REFERENCE_IMPL) TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); - - // Run Max Pooling MLI kernel - // MLI optimized version only supports int8 dataype and no fused Relu - // TODO: subject to add mli_saturate kernel - if (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone) { - EvalMli(context, params, data, input, output, MaxPooling); - } else { - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); + int32_t activation_min, activation_max; + (void)CalculateActivationRangeQuantized(context, params->activation, output, + &activation_min, &activation_max); - tflite::PoolParams op_params; - op_params.stride_height = params->stride_height; - op_params.stride_width = params->stride_width; - op_params.filter_height = params->filter_height; - op_params.filter_width = params->filter_width; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = activation_min; + op_params.quantized_activation_max = activation_max; - if (input->type == kTfLiteUInt8) { + if (input->type == kTfLiteUInt8) { reference_ops::MaxPool( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); - } else { + } else { reference_integer_ops::MaxPool( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); - } } + return kTfLiteOk; +#else + TF_LITE_KERNEL_LOG(context, + "Node configuration or type %s (%d) is not supported by ARC MLI Library.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; +#endif } } // namespace @@ -272,11 +301,16 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { case kTfLiteFloat32: - AverageEvalFloat(context, node, params, &data, input, output); + return AverageEvalFloat(context, node, params, &data, input, output); break; case kTfLiteUInt8: case kTfLiteInt8: - AverageEvalQuantized(context, node, params, &data, input, output); + if (IsMliApplicable(context, input, params)) { + return EvalMli(context, params, &data, input, output, AveragePooling); + } else { + return AverageEvalQuantized(context, node, params, &data, input, + output); + } break; default: TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", @@ -297,11 +331,15 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { switch (input->type) { case kTfLiteFloat32: - MaxEvalFloat(context, node, params, &data, input, output); + return MaxEvalFloat(context, node, params, &data, input, output); break; case kTfLiteUInt8: case kTfLiteInt8: - MaxEvalQuantized(context, node, params, &data, input, output); + if (IsMliApplicable(context, input, params)) { + return EvalMli(context, params, &data, input, output, MaxPooling); + } else { + return MaxEvalQuantized(context, node, params, &data, input, output); + } break; default: TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", diff --git a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc index 3b8fa04d536..ee3cc8113c1 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc @@ -86,6 +86,14 @@ endif ARC_MLI_TESTS += $(foreach TEST,$(ARC_MLI_TESTS), $(TEST)_slicing) generate_arc_mli_test_projects: $(foreach TEST,$(ARC_MLI_TESTS), generate_kernel_$(TEST)_test_make_project) + + ARC_EXTRA_APP_SETTINGS += \ + \nMLI_ONLY ?= false\n\ + \nifeq \($(DLR)\(MLI_ONLY\), true\)\ + \nCCFLAGS += -DTF_LITE_STRIP_REFERENCE_IMPL\ + \nCXXFLAGS += -DTF_LITE_STRIP_REFERENCE_IMPL\ + \nendif\n + endif # no_embarc_mli From 2621bf4ee40a7d14db48b63ead3fca2589552670 Mon Sep 17 00:00:00 2001 From: naumkin Date: Sun, 26 Apr 2020 23:49:42 -0700 Subject: [PATCH 33/45] Data movement tests added --- .../kernels/arc_mli/conv_slicing_test.cc | 784 +++++------- .../arc_mli/depthwise_conv_slicing_test.cc | 836 +++++------- .../arc_mli/fully_connected_slicing_test.cc | 1074 ++++------------ .../kernels/arc_mli/pooling_slicing_test.cc | 1140 ++++------------- 4 files changed, 1167 insertions(+), 2667 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc index a1f155ecc56..27e30856f6c 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc @@ -24,25 +24,114 @@ namespace tflite { namespace testing { namespace { -// Common inputs and outputs. -static const int kInputElements = 16; -static const int kInputShape[] = {4, 2, 2, 4, 1}; -static const float kInputData[] = {1, 1, 1, 1, 2, 2, 2, 2, - 1, 2, 3, 4, 1, 2, 3, 4}; -static const int kFilterElements = 12; -static const int kFilterShape[] = {4, 3, 2, 2, 1}; -static const float kFilterData[] = {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}; -static const int kBiasElements = 3; -static const int kBiasShape[] = {1, 3}; -static const float kBiasData[] = {1, 2, 3}; -static const int kOutputElements = 12; -static const int kOutputShape[] = {4, 2, 1, 2, 3}; -static const float kGoldenData[] = {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}; +// Common inputs and outputs 1. +static const int kInput1Elements = 20; +static const int kInput1Shape[] = {4, 1, 5, 2, 2}; +static const float kInput1Data[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; +static const int kFilter1Elements = 36; +static const int kFilter1Shape[] = {4, 2, 3, 3, 2}; +static const float kFilter1Data[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; +static const int kBias1Elements = 2; +static const int kBias1Shape[] = {1, 2}; +static const float kBias1Data[] = {2, 2}; +static const int kOutput1Elements = 20; +static const int kOutput1Shape[] = {4, 1, 5, 2, 2}; +static const float kGolden1Data[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + +// Common inputs and outputs 2. +static const int kInput2Elements = 80; +static const int kInput2Shape[] = {4, 1, 20, 2, 2}; +static const float kInput2Data[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; +static const int kFilter2Elements = 36; +static const int kFilter2Shape[] = {4, 2, 3, 3, 2}; +static const float kFilter2Data[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; +static const int kBias2Elements = 2; +static const int kBias2Shape[] = {1, 2}; +static const float kBias2Data[] = {2, 2}; +static const int kOutput2Elements = 80; +static const int kOutput2Shape[] = {4, 1, 20, 2, 2}; +static const float kGolden2Data[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + +// Common inputs and outputs 3. +static const int kInput3Elements = 40; +static const int kInput3Shape[] = {4, 1, 2, 2, 10}; +static const float kInput3Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int kFilter3Elements = 90; +static const int kFilter3Shape[] = {4, 1, 3, 3, 10}; // 1 3 3 10 +static const float kFilter3Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int kBias3Elements = 1; +static const int kBias3Shape[] = {1, 1}; +static const float kBias3Data[] = {1}; +static const int kOutput3Elements = 4; +static const int kOutput3Shape[] = {4, 1, 2, 2, 1}; // 2 2 1 +static const float kGolden3Data[] = {41, 41, 41, 41}; + +// Common inputs and outputs 4. +static const int kInput4Elements = 80; +static const int kInput4Shape[] = {4, 1, 4, 2, 10}; +static const float kInput4Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int kFilter4Elements = 90; +static const int kFilter4Shape[] = {4, 1, 3, 3, 10}; +static const float kFilter4Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +static const int kBias4Elements = 1; +static const int kBias4Shape[] = {1, 1}; +static const float kBias4Data[] = {1}; +static const int kOutput4Elements = 8; +static const int kOutput4Shape[] = {4, 1, 4, 2, 1}; +static const float kGolden4Data[] = {41, 41, 61, 61, 61, 61, 41, 41}; static TfLiteConvParams common_conv_params = { - kTfLitePaddingValid, // padding - 2, // stride_width - 2, // stride_height + kTfLitePaddingSame, // padding + 1, // stride_width + 1, // stride_height kTfLiteActNone, // activation 1, // dilation_width_factor 1, // dilation_height_factor @@ -109,77 +198,6 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, return kTfLiteOk; } -void TestConvFloat(const int* input_dims_data, const float* input_data, - const int* filter_dims_data, const float* filter_data, - const int* bias_dims_data, const float* bias_data, - const int* output_dims_data, - const float* expected_output_data, float* output_data, - TfLiteConvParams* conv_params) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims, "input_tensor"), - CreateFloatTensor(filter_data, filter_dims, "filter_tensor"), - CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), - CreateFloatTensor(output_data, output_dims, "output_tensor"), - }; - - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, - ValidateConvGoldens(tensors, tensors_size, expected_output_data, - output_data, output_dims_count, conv_params)); -} - -void TestConvQuantizedPerLayer( - const int* input_dims_data, const float* input_data, - uint8_t* input_quantized, float input_scale, const int* filter_dims_data, - const float* filter_data, uint8_t* filter_quantized, float filter_scale, - const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, - const int* output_dims_data, const float* expected_output_data, - uint8_t* expected_output_quantized, uint8_t* output_data, - float output_scale, TfLiteConvParams* conv_params) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - tflite::AsymmetricQuantize(expected_output_data, expected_output_quantized, - output_dims_count, output_scale, 128); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_quantized, input_dims, - input_scale, 128, "input_tensor"), - CreateQuantizedTensor(filter_data, filter_quantized, filter_dims, - filter_scale, 128, "filter_tensor"), - CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, - input_scale, filter_scale, "bias_tensor"), - CreateQuantizedTensor(output_data, output_dims, output_scale, 128, - "output_tensor")}; - - // TODO(njeff): Affine Quantization Params should be set on tensor creation. - float filter_scales[] = {1, filter_scale}; - int filter_zero_points[] = {1, 128}; - TfLiteAffineQuantization filter_quant = { - FloatArrayFromFloats(filter_scales), - IntArrayFromInts(filter_zero_points)}; - tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; - - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, - ValidateConvGoldens(tensors, tensors_size, expected_output_quantized, - output_data, output_dims_count, conv_params)); -} - void TestConvQuantizedPerChannel( const int* input_dims_data, const float* input_data, int8_t* input_quantized, float input_scale, int input_zero_point, @@ -207,6 +225,20 @@ void TestConvQuantizedPerChannel( filter_data, filter_data_quantized, filter_dims, filter_scales, filter_zero_points, &filter_quant, 0 /* quantized dimension */, "filter_tensor"); + + // DN: to replace scales and quantized data to avoid second quantization + int channel_count = filter_dims->data[0]; + float true_filter_scales[5] = {1.0, 1.0, 1.0, 1.0, 1.0}; + true_filter_scales[0] = static_cast(channel_count); + TfLiteAffineQuantization *to_change = (TfLiteAffineQuantization *)filter_tensor.quantization.params; + to_change->scale = FloatArrayFromFloats(true_filter_scales); + + int filter_size = filter_tensor.bytes; + for(int i = 0; i < filter_size; ++i) { + filter_tensor.data.int8[i] = filter_data[i]; + } + + TfLiteTensor bias_tensor = CreatePerChannelQuantizedBiasTensor( bias_data, bias_data_quantized, bias_dims, input_scale, &filter_scales[1], bias_scales, bias_zero_points, &bias_quant, 0 /* quantized dimension */, @@ -255,375 +287,223 @@ void TestConvQuantizedPerChannel( TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SimpleTestFloat) { - float output_data[tflite::testing::kOutputElements]; - - tflite::testing::TestConvFloat( - tflite::testing::kInputShape, tflite::testing::kInputData, - tflite::testing::kFilterShape, tflite::testing::kFilterData, - tflite::testing::kBiasShape, tflite::testing::kBiasData, - tflite::testing::kOutputShape, tflite::testing::kGoldenData, output_data, - &tflite::testing::common_conv_params); -} - -TF_LITE_MICRO_TEST(InputAndFilterSameWidthHeight) { - const int output_dims_count = 2; - float output_data[output_dims_count]; - - const int kFilterShape[] = {4, 1, 2, 4, 1}; - const float filter_values[] = {1, 2, 3, 4, -1, -1, 1, 1}; - const int kBiasShape[] = {1, 1}; - const float bias_values[] = {0}; - const int kOutputShape[] = {4, 2, 1, 1, 1}; - const float expected_output[] = {10, 34}; - - tflite::testing::TestConvFloat( - tflite::testing::kInputShape, tflite::testing::kInputData, kFilterShape, - filter_values, kBiasShape, bias_values, kOutputShape, expected_output, - output_data, &tflite::testing::common_conv_params); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantized) { - const int output_dims_count = 12; - uint8_t output_data[output_dims_count]; - - const float input_scale = 0.5f; - const float filter_scale = 0.5f; - const float output_scale = 1.0f; - - uint8_t input_quantized[tflite::testing::kInputElements]; - uint8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - uint8_t golden_quantized[tflite::testing::kOutputElements]; - - tflite::testing::TestConvQuantizedPerLayer( - tflite::testing::kInputShape, tflite::testing::kInputData, - input_quantized, input_scale, tflite::testing::kFilterShape, - tflite::testing::kFilterData, filter_quantized, filter_scale, - tflite::testing::kBiasShape, tflite::testing::kBiasData, bias_quantized, - tflite::testing::kOutputShape, tflite::testing::kGoldenData, - golden_quantized, output_data, output_scale, - &tflite::testing::common_conv_params); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5f; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[tflite::testing::kInputElements]; - int8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; - - tflite::testing::TestConvQuantizedPerChannel( - tflite::testing::kInputShape, tflite::testing::kInputData, - input_quantized, input_scale, input_zero_point, - tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, tflite::testing::kBiasShape, tflite::testing::kBiasData, - bias_quantized, scales, zero_points, tflite::testing::kOutputShape, - tflite::testing::kGoldenData, golden_quantized, output_data, output_scale, - output_zero_point, &tflite::testing::common_conv_params); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelRelu6) { - // conv params: - // padding, stride_, dilation_, activation - TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; - - const float bias_values[] = {1, 2, -3}; - const float golden_data[] = {6, 2, 0, 6, 2, 0, 6, 4, 0, 6, 4, 0}; - - const float input_scale = 0.023529f; - const float output_scale = 0.023529f; - const int input_zero_point = -128; - const int output_zero_point = -128; - - int8_t input_quantized[tflite::testing::kInputElements]; - int8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; - - tflite::testing::TestConvQuantizedPerChannel( - tflite::testing::kInputShape, tflite::testing::kInputData, - input_quantized, input_scale, input_zero_point, - tflite::testing::kFilterShape, tflite::testing::kFilterData, - filter_quantized, tflite::testing::kBiasShape, bias_values, - bias_quantized, scales, zero_points, tflite::testing::kOutputShape, - golden_data, golden_quantized, output_data, output_scale, - output_zero_point, &tflite::testing::common_conv_params); -} - -TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannel) { - // conv params: - // padding, stride_, activation, dilation_ - TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, - kTfLiteActNone, 1, 1}; - const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C] - const int kInputElements = - kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4]; - float kInputData[/* kInputElements */] = {1, 1, 1, 1, 2, 2, 2, 2, - 1, 2, 3, 4, 1, 2, 3, 4}; - const int kFilterShape[] = {4, 3, 1, 1, 4}; - const int kFilterElements = - kFilterShape[1] * kFilterShape[2] * kFilterShape[3] * kFilterShape[4]; - float kFilterData[/* kFilterElements */] = {1, 2, 3, 4, -1, 1, - -1, 1, -1, -1, 1, 1}; - const int kBiasElements = kFilterShape[1]; - const int kBiasShape[] = {1, kBiasElements}; - float kBiasData[/* kBiasElements */] = {1, 2, 3}; - const int kOutputShape[] = {4, 1, 2, 2, kBiasElements}; - const int kOutputElements = 4 * 3; - int8_t output_data[kOutputElements]; - const float kGoldenData[/* kOutputElements */] = {11, 2, 3, 21, 2, 3, - 31, 4, 7, 31, 4, 7}; - - const float input_scale = 0.5f; - const float output_scale = 1.0f; - const int input_zero_point = 0; - const int output_zero_point = 0; - - int8_t input_quantized[kInputElements]; - int8_t filter_quantized[kFilterElements]; - int32_t bias_quantized[kBiasElements]; - int8_t golden_quantized[kOutputElements]; - int zero_points[kBiasElements + 1]; - float scales[kBiasElements + 1]; - - tflite::testing::TestConvQuantizedPerChannel( - kInputShape, kInputData, input_quantized, input_scale, input_zero_point, - kFilterShape, kFilterData, filter_quantized, kBiasShape, kBiasData, - bias_quantized, scales, zero_points, kOutputShape, kGoldenData, - golden_quantized, output_data, output_scale, output_zero_point, - &conv_params); -} - -TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannelRelu6) { - // conv params: - // padding, stride_, dilation_, activation - TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; - const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C] - const int kInputElements = - kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4]; - float kInputData[/* kInputElements */] = {1, 1, 1, 1, 2, 2, 2, 2, - 1, 2, 3, 4, 1, 2, 3, 4}; - const int kFilterShape[] = {4, 3, 1, 1, 4}; - const int kFilterElements = - kFilterShape[1] * kFilterShape[2] * kFilterShape[3] * kFilterShape[4]; - float kFilterData[/* kFilterElements */] = {1, 2, 3, 4, -1, 1, - -1, 1, -1, -1, 1, 1}; - const int kBiasElements = kFilterShape[1]; - const int kBiasShape[] = {1, kBiasElements}; - float kBiasData[/* kBiasElements */] = {1, 2, -3}; - const int kOutputShape[] = {4, 1, 2, 2, kBiasElements}; - const int kOutputElements = 4 * 3; - int8_t output_data[kOutputElements]; - const float kGoldenData[/* kOutputElements */] = {6, 2, 0, 6, 2, 0, - 6, 4, 1, 6, 4, 1}; - - const float input_scale = 0.023529f; - const float output_scale = 0.023529f; - const int input_zero_point = -128; - const int output_zero_point = -128; - - int8_t input_quantized[kInputElements]; - int8_t filter_quantized[kFilterElements]; - int32_t bias_quantized[kBiasElements]; - int8_t golden_quantized[kOutputElements]; - int zero_points[kBiasElements + 1]; - float scales[kBiasElements + 1]; - - tflite::testing::TestConvQuantizedPerChannel( - kInputShape, kInputData, input_quantized, input_scale, input_zero_point, - kFilterShape, kFilterData, filter_quantized, kBiasShape, kBiasData, - bias_quantized, scales, zero_points, kOutputShape, kGoldenData, - golden_quantized, output_data, output_scale, output_zero_point, - &conv_params); -} - -TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; - - const float input_scale = 0.5f; - const float output_scale = 1.0f; - - int8_t input_quantized[tflite::testing::kInputElements]; - int8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; - int zero_points[tflite::testing::kBiasElements + 1]; - float scales[tflite::testing::kBiasElements + 1]; - - TfLiteIntArray* input_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kInputShape); - TfLiteIntArray* filter_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kFilterShape); - TfLiteIntArray* bias_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kBiasShape); - TfLiteIntArray* output_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kOutputShape); - - int filter_zero_points[5]; - float filter_scales[5]; - TfLiteAffineQuantization filter_quant; - TfLiteAffineQuantization bias_quant; - TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( - tflite::testing::kInputData, input_quantized, input_dims, input_scale, 0, - "input_tensor"); - TfLiteTensor filter_tensor = - tflite::testing::CreateSymmetricPerChannelQuantizedTensor( - tflite::testing::kFilterData, filter_quantized, filter_dims, - filter_scales, filter_zero_points, &filter_quant, - 0 /* quantized dimension */, "filter_tensor"); - TfLiteTensor bias_tensor = - tflite::testing::CreatePerChannelQuantizedBiasTensor( - tflite::testing::kBiasData, bias_quantized, bias_dims, input_scale, - &filter_scales[1], scales, zero_points, &bias_quant, 0, - "bias_tensor"); - TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( - output_data, output_dims, output_scale, 0 /* quantized dimension */, - "output_tensor"); - - float input_scales[] = {1, input_scale}; - int input_zero_points[] = {1, 128}; - TfLiteAffineQuantization input_quant = { - tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; - input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - input_tensor, - filter_tensor, - bias_tensor, - output_tensor, - }; - - tflite::AsymmetricQuantize(tflite::testing::kGoldenData, golden_quantized, - output_dims_count, output_scale, 0); - - // Set filter quant to mismatched dimension. - TfLiteAffineQuantization* quant = reinterpret_cast( - filter_tensor.quantization.params); - - // Choose arbitrary incorrect scale and zero point sizes which are neither 1 - // (for broadcast case) nor the quantized dimension size. - quant->scale->size = 2; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, - tflite::testing::ValidateConvGoldens( - tensors, tensors_size, golden_quantized, output_data, - output_dims_count, &tflite::testing::common_conv_params)); - - // Set scale back to correct dimension, and make zero point array too short. - quant->scale->size = tflite::testing::kFilterShape[0]; - quant->zero_point->size = 2; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, - tflite::testing::ValidateConvGoldens( - tensors, tensors_size, golden_quantized, output_data, - output_dims_count, &tflite::testing::common_conv_params)); -} - -TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; - +// Test group 1 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel1) { + const int output_dims_count = 20; const float input_scale = 1.0f; - const float filter_scale = 1.0f; const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; - int8_t input_quantized[tflite::testing::kInputElements]; - int8_t filter_quantized[tflite::testing::kFilterElements]; - int32_t bias_quantized[tflite::testing::kBiasElements]; - int8_t golden_quantized[tflite::testing::kOutputElements]; + int8_t input_quantized[tflite::testing::kInput1Elements]; + int8_t filter_quantized[tflite::testing::kFilter1Elements]; + int32_t bias_quantized[tflite::testing::kBias1Elements]; + int8_t golden_quantized[tflite::testing::kOutput1Elements]; + int8_t output_data[output_dims_count]; - TfLiteIntArray* input_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kInputShape); - TfLiteIntArray* filter_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kFilterShape); - TfLiteIntArray* bias_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kBiasShape); - TfLiteIntArray* output_dims = - tflite::testing::IntArrayFromInts(tflite::testing::kOutputShape); + int zero_points[tflite::testing::kBias1Elements + 1]; + float scales[tflite::testing::kBias1Elements + 1]; - // Create per-layer quantized int8 input tensor. - TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( - tflite::testing::kInputData, input_quantized, input_dims, input_scale, 0, - "input_tensor"); - int input_zero_points[2] = {1, 0}; - float input_scales[2] = {1, input_scale}; - TfLiteAffineQuantization input_quant = { - tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; - input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - - // Create per-layer quantized int8 filter tensor. - TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( - tflite::testing::kFilterData, filter_quantized, filter_dims, filter_scale, - 0, "filter_tensor"); - int filter_zero_points[2] = {1, 0}; - float filter_scales[2] = {1, filter_scale}; - TfLiteAffineQuantization filter_quant = { - tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; - filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - - // Create per-layer quantized int32 bias tensor. - tflite::SymmetricQuantize(tflite::testing::kBiasData, bias_quantized, - tflite::testing::kBiasElements, - input_scale * output_scale); - TfLiteTensor bias_tensor = tflite::testing::CreateInt32Tensor( - bias_quantized, bias_dims, "bias_tensor"); - - int bias_zero_points[2] = {1, 0}; - float bias_scales[2] = {1, input_scale * filter_scale}; - TfLiteAffineQuantization bias_quant = { - tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; - bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - - // Create per-layer quantized int8 output tensor. - TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( - output_data, output_dims, output_scale, 0 /* quantized dimension */, - "output_tensor"); - int output_zero_points[2] = {1, 0}; - float output_scales[2] = {1, output_scale}; - TfLiteAffineQuantization output_quant = { - tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; - output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - input_tensor, - filter_tensor, - bias_tensor, - output_tensor, - }; - - tflite::AsymmetricQuantize(tflite::testing::kGoldenData, golden_quantized, - output_dims_count, output_scale, 0); - - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, tflite::testing::ValidateConvGoldens( - tensors, tensors_size, golden_quantized, output_data, - output_dims_count, &tflite::testing::common_conv_params)); + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput1Shape, tflite::testing::kInput1Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter1Shape, tflite::testing::kFilter1Data, + filter_quantized, tflite::testing::kBias1Shape, tflite::testing::kBias1Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput1Shape, + tflite::testing::kGolden1Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); } +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel1) { + const int output_dims_count = 20; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + +#pragma Bss(".Xdata") + static int8_t input_quantized[tflite::testing::kInput1Elements]; + static int8_t filter_quantized[tflite::testing::kFilter1Elements]; + static int32_t bias_quantized[tflite::testing::kBias1Elements]; + static int8_t output_data[output_dims_count]; +#pragma Bss() + + int8_t golden_quantized[tflite::testing::kOutput1Elements]; + int zero_points[tflite::testing::kBias1Elements + 1]; + float scales[tflite::testing::kBias1Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput1Shape, tflite::testing::kInput1Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter1Shape, tflite::testing::kFilter1Data, + filter_quantized, tflite::testing::kBias1Shape, tflite::testing::kBias1Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput1Shape, + tflite::testing::kGolden1Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +// Test group 2 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel2) { + const int output_dims_count = 80; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[tflite::testing::kInput2Elements]; + int8_t filter_quantized[tflite::testing::kFilter2Elements]; + int32_t bias_quantized[tflite::testing::kBias2Elements]; + int8_t golden_quantized[tflite::testing::kOutput2Elements]; + int8_t output_data[output_dims_count]; + + int zero_points[tflite::testing::kBias2Elements + 1]; + float scales[tflite::testing::kBias2Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput2Shape, tflite::testing::kInput2Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter2Shape, tflite::testing::kFilter2Data, + filter_quantized, tflite::testing::kBias2Shape, tflite::testing::kBias2Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput2Shape, + tflite::testing::kGolden2Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel2) { + const int output_dims_count = 80; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + +#pragma Bss(".Xdata") + static int8_t input_quantized[tflite::testing::kInput2Elements]; + static int8_t filter_quantized[tflite::testing::kFilter2Elements]; + static int32_t bias_quantized[tflite::testing::kBias2Elements]; + static int8_t output_data[output_dims_count]; +#pragma Bss() + + int8_t golden_quantized[tflite::testing::kOutput2Elements]; + int zero_points[tflite::testing::kBias2Elements + 1]; + float scales[tflite::testing::kBias2Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput2Shape, tflite::testing::kInput2Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter2Shape, tflite::testing::kFilter2Data, + filter_quantized, tflite::testing::kBias2Shape, tflite::testing::kBias2Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput2Shape, + tflite::testing::kGolden2Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +// Test group 3 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel3) { + const int output_dims_count = 4; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[tflite::testing::kInput3Elements]; + int8_t filter_quantized[tflite::testing::kFilter3Elements]; + int32_t bias_quantized[tflite::testing::kBias3Elements]; + int8_t golden_quantized[tflite::testing::kOutput3Elements]; + int8_t output_data[output_dims_count]; + + int zero_points[tflite::testing::kBias3Elements + 1]; + float scales[tflite::testing::kBias3Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput3Shape, tflite::testing::kInput3Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter3Shape, tflite::testing::kFilter3Data, + filter_quantized, tflite::testing::kBias3Shape, tflite::testing::kBias3Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput3Shape, + tflite::testing::kGolden3Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel3) { + const int output_dims_count = 4; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + +#pragma Bss(".Xdata") + static int8_t input_quantized[tflite::testing::kInput3Elements]; + static int8_t filter_quantized[tflite::testing::kFilter3Elements]; + static int32_t bias_quantized[tflite::testing::kBias3Elements]; + static int8_t output_data[output_dims_count]; +#pragma Bss() + + int8_t golden_quantized[tflite::testing::kOutput3Elements]; + int zero_points[tflite::testing::kBias3Elements + 1]; + float scales[tflite::testing::kBias3Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput3Shape, tflite::testing::kInput3Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter3Shape, tflite::testing::kFilter3Data, + filter_quantized, tflite::testing::kBias3Shape, tflite::testing::kBias3Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput3Shape, + tflite::testing::kGolden3Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +// Test group 4 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel4) { + const int output_dims_count = 8; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[tflite::testing::kInput4Elements]; + int8_t filter_quantized[tflite::testing::kFilter4Elements]; + int32_t bias_quantized[tflite::testing::kBias4Elements]; + int8_t golden_quantized[tflite::testing::kOutput4Elements]; + int8_t output_data[output_dims_count]; + + int zero_points[tflite::testing::kBias4Elements + 1]; + float scales[tflite::testing::kBias4Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput4Shape, tflite::testing::kInput4Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter4Shape, tflite::testing::kFilter4Data, + filter_quantized, tflite::testing::kBias4Shape, tflite::testing::kBias4Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput4Shape, + tflite::testing::kGolden4Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} + +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel4) { + const int output_dims_count = 8; + const float input_scale = 1.0f; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + +#pragma Bss(".Xdata") + static int8_t input_quantized[tflite::testing::kInput4Elements]; + static int8_t filter_quantized[tflite::testing::kFilter4Elements]; + static int32_t bias_quantized[tflite::testing::kBias4Elements]; + static int8_t output_data[output_dims_count]; +#pragma Bss() + + int8_t golden_quantized[tflite::testing::kOutput4Elements]; + int zero_points[tflite::testing::kBias4Elements + 1]; + float scales[tflite::testing::kBias4Elements + 1]; + + tflite::testing::TestConvQuantizedPerChannel( + tflite::testing::kInput4Shape, tflite::testing::kInput4Data, + input_quantized, input_scale, input_zero_point, + tflite::testing::kFilter4Shape, tflite::testing::kFilter4Data, + filter_quantized, tflite::testing::kBias4Shape, tflite::testing::kBias4Data, + bias_quantized, scales, zero_points, tflite::testing::kOutput4Shape, + tflite::testing::kGolden4Data, golden_quantized, output_data, output_scale, + output_zero_point, &tflite::testing::common_conv_params); +} TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc index 8b79885a8a8..fb9dd46c1e4 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc @@ -106,87 +106,6 @@ TfLiteStatus ValidateDepthwiseConvGoldens(const T* expected_output_data, return kTfLiteOk; } -void TestDepthwiseConvFloat(const int* input_dims_data, const float* input_data, - const int* filter_dims_data, - const float* filter_data, const int* bias_dims_data, - const float* bias_data, - const float* expected_output_data, - const int* output_dims_data, - TfLiteFusedActivation activation, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims, "input_tensor"), - CreateFloatTensor(filter_data, filter_dims, "filter_tensor"), - CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), - CreateFloatTensor(output_data, output_dims, "output_tensor"), - }; - - ValidateDepthwiseConvGoldens(expected_output_data, output_dims_count, - activation, 1e-5, tensors_size, tensors); -} - -void TestDepthwiseConvQuantizedPerLayer( - const int* input_dims_data, const float* input_data, - uint8_t* input_quantized, float input_scale, int input_zero_point, - const int* filter_dims_data, const float* filter_data, - uint8_t* filter_quantized, float filter_scale, int filter_zero_point, - const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized, - const float* golden, uint8_t* golden_quantized, const int* output_dims_data, - uint8_t* output_data, float output_scale, int output_zero_point, - TfLiteFusedActivation activation) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - tflite::testing::CreateQuantizedTensor(input_data, input_quantized, - input_dims, input_scale, - input_zero_point, "input_tensor"), - tflite::testing::CreateQuantizedTensor( - filter_data, filter_quantized, filter_dims, filter_scale, - filter_zero_point, "filter_tensor"), - tflite::testing::CreateQuantizedBiasTensor(bias_data, bias_quantized, - bias_dims, input_scale, - filter_scale, "bias_tensor"), - tflite::testing::CreateQuantizedTensor(output_data, output_dims, - output_scale, output_zero_point, - "output_tensor"), - }; - - // TODO(njeff): Affine Quantization Params should be set on tensor creation. - float filter_scales[] = {1, filter_scale}; - int filter_zero_points[] = {1, 128}; - TfLiteAffineQuantization filter_quant = { - FloatArrayFromFloats(filter_scales), - IntArrayFromInts(filter_zero_points)}; - tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; - - float bias_scales[] = {1, filter_scale * input_scale}; - int bias_zero_points[] = {1, 128}; - TfLiteAffineQuantization bias_quant = {FloatArrayFromFloats(bias_scales), - IntArrayFromInts(bias_zero_points)}; - tensors[2].quantization = {kTfLiteAffineQuantization, &bias_quant}; - - AsymmetricQuantize(golden, golden_quantized, output_dims_count, output_scale, - output_zero_point); - ValidateDepthwiseConvGoldens(golden_quantized, output_dims_count, activation, - 1.0, tensors_size, tensors); -} - void TestDepthwiseConvQuantizedPerChannel( const int* input_dims_data, const float* input_data, int8_t* input_quantized, float input_scale, int input_zero_point, @@ -263,183 +182,29 @@ void TestDepthwiseConvQuantizedPerChannel( TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SimpleTest) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 71, -34, 99, -20, 91, -26, 127, -4, - }; - const int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - float output_data[output_dims_count]; - tflite::testing::TestDepthwiseConvFloat( - input_shape, input_values, filter_shape, filter_values, bias_shape, - bias_values, golden, output_shape, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantized) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 71, -34, 99, -20, 91, -26, 127, -4, - }; - const int output_shape[] = {4, 1, 2, 1, 4}; - - const float input_scale = 0.5f; - const int input_zero_point = 128; - const float filter_scale = 0.5f; - const int filter_zero_point = 128; - const float output_scale = 1.0f; - const int output_zero_point = 128; - - uint8_t input_quantized[input_elements]; - uint8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - uint8_t golden_quantized[output_elements]; - uint8_t output_data[output_elements]; - - tflite::testing::TestDepthwiseConvQuantizedPerLayer( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, filter_scale, - filter_zero_point, bias_shape, bias_values, bias_quantized, golden, - golden_quantized, output_shape, output_data, output_scale, - output_zero_point, kTfLiteActNone); -} - -TF_LITE_MICRO_TEST(SimpleTestRelu) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - const float golden_relu[] = {71, 0, 99, 0, 91, 0, 127, 0}; - float output_data[output_dims_count]; - - tflite::testing::TestDepthwiseConvFloat( - input_shape, input_values, filter_shape, filter_values, bias_shape, - bias_values, golden_relu, output_shape, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestReluQuantized) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; - const float golden_relu[] = {71, 0, 99, 0, 91, 0, 127, 0}; - - const float input_scale = 0.5f; - const int input_zero_point = 128; - const float filter_scale = 0.5f; - const int filter_zero_point = 128; - const float output_scale = 1.0f; - const int output_zero_point = 128; - - uint8_t input_quantized[input_elements]; - uint8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - uint8_t golden_quantized[output_elements]; - uint8_t output_data[output_elements]; - - tflite::testing::TestDepthwiseConvQuantizedPerLayer( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, filter_scale, - filter_zero_point, bias_shape, bias_values, bias_quantized, golden_relu, - golden_quantized, output_shape, output_data, output_scale, - output_zero_point, kTfLiteActRelu); -} - -TF_LITE_MICRO_TEST(SimpleTestOptimizedFilterWidth) { - const int input_elements = 12; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const float bias_values[] = {1, 2, 3, 4}; - const int output_dims_count = 9; - const int input_shape[] = {4, 1, 1, 9, 1}; - const int filter_shape[] = {4, 2, 1, 8, 1}; - const int bias_shape[] = {1, 1}; - const float goldens[] = { - 92, 56, 12, 22, 33, 72, 44, 20, 5, - }; - const int output_shape[] = {4, 1, 1, 9, 1}; - - const float input_scale = 1.0f; - const int input_zero_point = 128; - const float filter_scale = 0.5f; - const int filter_zero_point = 128; - const float output_scale = 1.0f; - const int output_zero_point = 128; - - uint8_t input_quantized[input_elements]; - uint8_t filter_quantized[filter_elements]; - int32_t bias_quantized[bias_elements]; - uint8_t golden_quantized[output_dims_count]; - uint8_t output_data[output_dims_count]; - - tflite::testing::TestDepthwiseConvQuantizedPerLayer( - input_shape, input_values, input_quantized, input_scale, input_zero_point, - filter_shape, filter_values, filter_quantized, filter_scale, - filter_zero_point, bias_shape, bias_values, bias_quantized, goldens, - golden_quantized, output_shape, output_data, output_scale, - output_zero_point, kTfLiteActNone); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 71, -34, 99, -20, 91, -26, 127, -4, - }; - const int output_shape[] = {4, 1, 2, 1, 4}; - const int output_dims_count = 8; +// Test group 1 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel1) { + const int input_elements = 20; + const int input_shape[] = {4, 1, 5, 2, 2}; + const float input_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; + const int filter_elements = 36; + const int filter_shape[] = {4, 2, 3, 3, 2}; + const float filter_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; + const int bias_elements = 2; + const int bias_shape[] = {4, 1, 1, 1, 2}; + const int output_elements = 20; + const float bias_values[] = {2, 2}; + const float golden[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + const int output_shape[] = {4, 1, 5, 2, 2}; + const int output_dims_count = 20; int8_t output_data[output_dims_count]; - const float input_scale = 0.5; + const float input_scale = 1.0; const float output_scale = 1.0f; const int input_zero_point = 0; const int output_zero_point = 0; @@ -458,28 +223,188 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { output_scale, output_zero_point, kTfLiteActNone); } -TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelDepthMultiplier1) { - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 8; - const int filter_shape[] = {4, 1, 2, 2, 2}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12}; +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel1) { + const int input_elements = 20; + const int input_shape[] = {4, 1, 5, 2, 2}; + const int filter_elements = 36; + const int filter_shape[] = {4, 2, 3, 3, 2}; const int bias_elements = 2; const int bias_shape[] = {4, 1, 1, 1, 2}; + const int output_elements = 20; + const int output_shape[] = {4, 1, 5, 2, 2}; + const int output_dims_count = 20; + +#pragma Bss(".Zdata") + const float input_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; + const float filter_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; + const float bias_values[] = {2, 2}; + int8_t output_data[output_dims_count]; +#pragma Bss() + + const float golden[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} + +// Test group 2 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel2) { + const int input_elements = 80; + const int input_shape[] = {4, 1, 20, 2, 2}; + const float input_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; + const int filter_elements = 36; + const int filter_shape[] = {4, 2, 3, 3, 2}; + const float filter_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; + const int bias_elements = 2; + const int bias_shape[] = {4, 1, 1, 1, 2}; + const int output_elements = 80; + const float bias_values[] = {2, 2}; + const float golden[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + const int output_shape[] = {4, 1, 20, 2, 2}; + const int output_dims_count = 80; + int8_t output_data[output_dims_count]; + + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel2) { + const int input_elements = 80; + const int input_shape[] = {4, 1, 20, 2, 2}; + const int filter_elements = 36; + const int filter_shape[] = {4, 2, 3, 3, 2}; + const int bias_elements = 2; + const int bias_shape[] = {4, 1, 1, 1, 2}; + const int output_elements = 80; + const int output_shape[] = {4, 1, 20, 2, 2}; + const int output_dims_count = 80; + +#pragma Bss(".Zdata") + float input_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; + float filter_values[] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2}; + float bias_values[] = {2, 2}; + int8_t output_data[output_dims_count]; +#pragma Bss() + + const float golden[] = {34, 34, 34, 34, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 34, 34, 34, 34}; + + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} + +// Test group 3 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel3) { + const int input_elements = 40; + const int input_shape[] = {4, 1, 2, 2, 10}; + const float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int filter_elements = 90; + const int filter_shape[] = {4, 1, 3, 3, 10}; + const float filter_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int bias_elements = 1; + const int bias_shape[] = {4, 1, 1, 1, 1}; const int output_elements = 4; - const float bias_values[] = {1, 2}; - const float golden[] = { - -103, - 127, - -128, - 127, - }; - const int output_shape[] = {4, 1, 2, 1, 2}; + const float bias_values[] = {1}; + const float golden[] = {41, 41, 41, 41}; + const int output_shape[] = {4, 1, 2, 2, 1}; const int output_dims_count = 4; int8_t output_data[output_dims_count]; - const float input_scale = 1.0f; + const float input_scale = 1.0; const float output_scale = 1.0f; const int input_zero_point = 0; const int output_zero_point = 0; @@ -498,30 +423,41 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelDepthMultiplier1) { output_scale, output_zero_point, kTfLiteActNone); } -TF_LITE_MICRO_TEST(TestQuantizedPerChannelDepthMultiplier1Relu6) { - const int input_elements = 24; - const int input_shape[] = {4, 1, 3, 2, 4}; - const float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {0, 1, 8, -2, -1, 2, -10, 0, - -1, 3, -18, 0, 0, 4, 20, -3}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 0, 6, 3, 0, 0, 6, 3, 0, - }; - const int output_shape[] = {4, 1, 2, 1, 4}; - int8_t output_data[output_elements]; - float output_float[output_elements]; +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel3) { + const int input_elements = 40; + const int input_shape[] = {4, 1, 2, 2, 10}; + const int filter_elements = 90; + const int filter_shape[] = {4, 1, 3, 3, 10}; + const int bias_elements = 1; + const int bias_shape[] = {4, 1, 1, 1, 1}; + const int output_elements = 4; + const int output_shape[] = {4, 1, 2, 2, 1}; + const int output_dims_count = 4; - const float input_scale = 0.023529f; - const float output_scale = 0.023529f; - const int input_zero_point = -128; - const int output_zero_point = -128; +#pragma Bss(".Zdata") + float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float filter_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float bias_values[] = {1}; + int8_t output_data[output_dims_count]; +#pragma Bss() + + const float golden[] = {41, 41, 41, 41}; + + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; int8_t input_quantized[input_elements]; int8_t filter_quantized[filter_elements]; @@ -530,239 +466,115 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelDepthMultiplier1Relu6) { int zero_points[bias_elements + 1]; float scales[bias_elements + 1]; - tflite::testing::TestDepthwiseConvFloat( - input_shape, input_values, filter_shape, filter_values, bias_shape, - bias_values, golden, output_shape, kTfLiteActRelu6, output_float); - tflite::testing::TestDepthwiseConvQuantizedPerChannel( input_shape, input_values, input_quantized, input_scale, input_zero_point, filter_shape, filter_values, filter_quantized, bias_shape, bias_values, bias_quantized, output_shape, golden, golden_quantized, output_data, - output_scale, output_zero_point, kTfLiteActRelu6); -} - -TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) { - const int input_dims[] = {4, 1, 2, 3, 2}; - const float input_data[] = {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4}; - const int filter_dims[] = {4, 1, 2, 2, 4}; - const float filter_data[] = {1, 2, 3, 4, 3, 4, 5, 6, 7, 8, 5, 6, 3, 4, 1, 2}; - const int bias_dims[] = {4, 1, 1, 1, 4}; - const float bias_data[] = {3, -2, 4, 6}; - const int output_dims[] = {4, 1, 1, 2, 4}; - const float golden[] = {43, 48, 18, 22, 3, -4, -28, -36}; - - const int input_size = 12; - const int filter_size = 16; - const int output_size = 8; - const int bias_size = 4; - int8_t input_quantized[input_size]; - int8_t filter_quantized[filter_size]; - int32_t bias_quantized[bias_size]; - int8_t golden_quantized[output_size]; - int zero_points[bias_size + 1]; - float scales[bias_size + 1]; - int8_t output_data[output_size]; - float output_float[output_size]; - - const float input_scale = 0.5; - const float output_scale = 1.0; - const int input_zero_point = 0; - const int output_zero_point = 0; - - tflite::testing::TestDepthwiseConvQuantizedPerChannel( - input_dims, input_data, input_quantized, input_scale, input_zero_point, - filter_dims, filter_data, filter_quantized, bias_dims, bias_data, - bias_quantized, output_dims, golden, golden_quantized, output_data, output_scale, output_zero_point, kTfLiteActNone); - - tflite::testing::TestDepthwiseConvFloat( - input_dims, input_data, filter_dims, filter_data, bias_dims, bias_data, - golden, output_dims, kTfLiteActNone, output_float); } -TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { - const int input_shape[] = {4, 1, 2, 3, 2}; - const float input_data[] = {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4}; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_data[] = {1, 2, 3, 4, 3, 4, 5, 6, 7, 8, 5, 6, 3, 4, 1, 2}; - const int bias_shape[] = {4, 1, 1, 1, 4}; - const float bias_data[] = {3, -2, 4, 6}; - const int output_shape[] = {4, 1, 1, 2, 4}; - const float golden[] = {43, 48, 18, 22, 3, -4, -28, -36}; - - const int input_size = 12; - const int filter_size = 16; - const int output_size = 8; - const int bias_size = 4; - int8_t input_quantized[input_size]; - int8_t filter_quantized[filter_size]; - int32_t bias_quantized[bias_size]; - int8_t golden_quantized[output_size]; - int zero_points[bias_size + 1]; - float scales[bias_size + 1]; - int8_t output_data[output_size]; - float output_float[output_size]; - - const float input_scale = 0.5; - const float output_scale = 1.0; - const int input_zero_point = 0; - const int output_zero_point = 0; - - TfLiteIntArray* input_dims = tflite::testing::IntArrayFromInts(input_shape); - TfLiteIntArray* filter_dims = tflite::testing::IntArrayFromInts(filter_shape); - TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); - TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); - - int filter_zero_points[5]; - float filter_scales[5]; - TfLiteAffineQuantization filter_quant; - TfLiteAffineQuantization bias_quant; - TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( - input_data, input_quantized, input_dims, input_scale, input_zero_point, - "input_tensor"); - TfLiteTensor filter_tensor = - tflite::testing::CreateSymmetricPerChannelQuantizedTensor( - filter_data, filter_quantized, filter_dims, filter_scales, - filter_zero_points, &filter_quant, 0 /* quantized dimension */, - "filter_tensor"); - TfLiteTensor bias_tensor = - tflite::testing::CreatePerChannelQuantizedBiasTensor( - bias_data, bias_quantized, bias_dims, input_scale, &filter_scales[1], - scales, zero_points, &bias_quant, 0, "bias_tensor"); - TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( - output_data, output_dims, output_scale, output_zero_point, - "output_tensor"); - - float input_scales[] = {1, input_scale}; - int input_zero_points[] = {1, input_zero_point}; - TfLiteAffineQuantization input_quant = { - tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; - input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - input_tensor, - filter_tensor, - bias_tensor, - output_tensor, - }; - - // Set filter quant to mismatched dimension. - TfLiteAffineQuantization* quant = reinterpret_cast( - filter_tensor.quantization.params); - quant->scale->size = 2; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, tflite::testing::ValidateDepthwiseConvGoldens( - golden_quantized, output_size, kTfLiteActNone, 1e-5, - tensors_size, tensors)); - - // Set scale back to correct dimension, and make zero point array too short. - quant->scale->size = filter_shape[0]; - quant->zero_point->size = 2; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, tflite::testing::ValidateDepthwiseConvGoldens( - golden_quantized, output_size, kTfLiteActNone, 1e-5, - tensors_size, tensors)); -} - -TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { - const float input_scale = 1.0f; - const float filter_scale = 1.0f; - const float output_scale = 1.0f; - - const int input_elements = 12; - const int input_shape[] = {4, 1, 3, 2, 2}; - const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; - const int filter_shape[] = {4, 1, 2, 2, 4}; - const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, - 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; - const int bias_shape[] = {4, 1, 1, 1, 4}; +// Test group 4 +TF_LITE_MICRO_TEST(SystemTestQuantizedPerChannel4) { + const int input_elements = 80; + const int input_shape[] = {4, 1, 4, 2, 10}; + const float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int filter_elements = 90; + const int filter_shape[] = {4, 1, 3, 3, 10}; + const float filter_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int bias_elements = 1; + const int bias_shape[] = {4, 1, 1, 1, 1}; const int output_elements = 8; - const float bias_values[] = {1, 2, 3, 4}; - const float golden[] = { - 71, -34, 99, -20, 91, -26, 127, -4, - }; - const int output_shape[] = {4, 1, 2, 1, 4}; + const float bias_values[] = {1}; + const float golden[] = {41, 41, 61, 61, 61, 61, 41, 41}; + const int output_shape[] = {4, 1, 4, 2, 1}; const int output_dims_count = 8; int8_t output_data[output_dims_count]; + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + int8_t input_quantized[input_elements]; int8_t filter_quantized[filter_elements]; int32_t bias_quantized[bias_elements]; int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; - TfLiteIntArray* input_dims = tflite::testing::IntArrayFromInts(input_shape); - TfLiteIntArray* filter_dims = tflite::testing::IntArrayFromInts(filter_shape); - TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); - TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); - - // Create per-layer quantized int8 input tensor. - TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( - input_values, input_quantized, input_dims, input_scale, 0, - "input_tensor"); - int input_zero_points[2] = {1, 0}; - float input_scales[2] = {1, input_scale}; - TfLiteAffineQuantization input_quant = { - tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; - input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - - // Create per-layer quantized int8 filter tensor. - TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( - filter_values, filter_quantized, filter_dims, filter_scale, 0, - "filter_tensor"); - int filter_zero_points[2] = {1, 0}; - float filter_scales[2] = {1, filter_scale}; - TfLiteAffineQuantization filter_quant = { - tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; - filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - - // Create per-layer quantized int32 bias tensor. - tflite::SymmetricQuantize(bias_values, bias_quantized, bias_elements, - input_scale * output_scale); - TfLiteTensor bias_tensor = tflite::testing::CreateInt32Tensor( - bias_quantized, bias_dims, "bias_tensor"); - - int bias_zero_points[2] = {1, 0}; - float bias_scales[2] = {1, input_scale * filter_scale}; - TfLiteAffineQuantization bias_quant = { - tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; - bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - - // Create per-layer quantized int8 output tensor. - TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( - output_data, output_dims, output_scale, 0, "output_tensor"); - int output_zero_points[2] = {1, 0}; - float output_scales[2] = {1, output_scale}; - TfLiteAffineQuantization output_quant = { - tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; - output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - input_tensor, - filter_tensor, - bias_tensor, - output_tensor, - }; - - tflite::AsymmetricQuantize(golden, golden_quantized, output_dims_count, - output_scale, 0); - - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, tflite::testing::ValidateDepthwiseConvGoldens( - golden_quantized, output_dims_count, kTfLiteActNone, 1e-5, - tensors_size, tensors)); + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); } +TF_LITE_MICRO_TEST(LocalTestQuantizedPerChannel4) { + const int input_elements = 80; + const int input_shape[] = {4, 1, 4, 2, 10}; + const int filter_elements = 90; + const int filter_shape[] = {4, 1, 3, 3, 10}; + const int bias_elements = 1; + const int bias_shape[] = {4, 1, 1, 1, 1}; + const int output_elements = 8; + const int output_shape[] = {4, 1, 4, 2, 1}; + const int output_dims_count = 8; + +#pragma Bss(".Zdata") + float input_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float filter_values[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float bias_values[] = {1}; + int8_t output_data[output_dims_count]; +#pragma Bss() + + const float golden[] = {41, 41, 61, 61, 61, 61, 41, 41}; + + const float input_scale = 1.0; + const float output_scale = 1.0f; + const int input_zero_point = 0; + const int output_zero_point = 0; + + int8_t input_quantized[input_elements]; + int8_t filter_quantized[filter_elements]; + int32_t bias_quantized[bias_elements]; + int8_t golden_quantized[output_elements]; + int zero_points[bias_elements + 1]; + float scales[bias_elements + 1]; + + tflite::testing::TestDepthwiseConvQuantizedPerChannel( + input_shape, input_values, input_quantized, input_scale, input_zero_point, + filter_shape, filter_values, filter_quantized, bias_shape, bias_values, + bias_quantized, output_shape, golden, golden_quantized, output_data, + output_scale, output_zero_point, kTfLiteActNone); +} TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc index 539c7ecc3a4..78cb2873c54 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc @@ -25,74 +25,6 @@ namespace tflite { namespace testing { namespace { -void TestFullyConnectedFloat( - const int* input_dims_data, const float* input_data, - const int* weights_dims_data, const float* weights_data, - const int* bias_dims_data, const float* bias_data, - const float* expected_output_data, const int* output_dims_data, - TfLiteFusedActivation activation, float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* weights_dims = IntArrayFromInts(weights_dims_data); - TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 3; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims, "input_tensor"), - CreateFloatTensor(weights_data, weights_dims, "weights_tensor"), - CreateFloatTensor(bias_data, bias_dims, "bias_tensor"), - CreateFloatTensor(output_data, output_dims, "output_tensor"), - }; - - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::ops::micro::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteFullyConnectedParams builtin_data = { - activation, - kTfLiteFullyConnectedWeightsFormatDefault, - }; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {3, 0, 1, 2}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 3}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - int temporaries_array_data[] = {0}; - TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.temporaries = temporaries_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - node.delegate = nullptr; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); - } -} - template void TestFullyConnectedQuantized( const int* input_dims_data, const T* input_data, const float input_min, @@ -121,6 +53,10 @@ void TestFullyConnectedQuantized( output_min, output_max), }; + tensors[0].params.zero_point = 0; + tensors[1].params.zero_point = 0; + tensors[3].params.zero_point = 0; + TfLiteContext context; PopulateContext(tensors, tensors_size, micro_test::reporter, &context); @@ -176,466 +112,23 @@ void TestFullyConnectedQuantized( TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SimpleTest) { - const int input_dims_data[] = {2, 2, 10}; - const float input_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 - 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 - }; - const int weights_dims_data[] = {2, 3, 10}; - const float weights_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 - }; - const int bias_dims_data[] = {1, 3}; - const float bias_data[] = {1, 2, 3}; - const float expected_output_data[] = { - 24, 25, 26, 58, 59, 60, - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - float output_data[output_dims_count]; - tflite::testing::TestFullyConnectedFloat( - input_dims_data, input_data, weights_dims_data, weights_data, - bias_dims_data, bias_data, expected_output_data, output_dims_data, - kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTest2) { - const int input_dims_data[] = {2, 2, 2}; - const float input_data[] = { - 1, 2, // b = 0 - 2, 1, // b = 1 - }; - const int weights_dims_data[] = {2, 1, 2}; - const float weights_data[] = { - 2, 4, // u = 0 - }; - const int bias_dims_data[] = {1, 1}; - const float bias_data[] = {1}; - const float expected_output_data[] = { - 11, - 9, - }; - const int output_dims_data[] = {2, 2, 1}; - - const int output_dims_count = 6; - float output_data[output_dims_count]; - tflite::testing::TestFullyConnectedFloat( - input_dims_data, input_data, weights_dims_data, weights_data, - bias_dims_data, bias_data, expected_output_data, output_dims_data, - kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestRelu) { - const int input_dims_data[] = {2, 2, 10}; - const float input_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 - 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 - }; - const int weights_dims_data[] = {2, 3, 10}; - const float weights_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 - -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, // u = 1 - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 - }; - const int bias_dims_data[] = {1, 3}; - const float bias_data[] = {1, -2, 3}; - const float expected_output_data[] = { - 24, 0, 26, 58, 0, 60, - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - float output_data[output_dims_count]; - tflite::testing::TestFullyConnectedFloat( - input_dims_data, input_data, weights_dims_data, weights_data, - bias_dims_data, bias_data, expected_output_data, output_dims_data, - kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8) { - using tflite::testing::F2Q; - using tflite::testing::F2Q32; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -63.5f; - const float weights_max = 64.0f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {2, 2, 10}; - const uint8_t input_data[] = { - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), - F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), - F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const uint8_t weights_data[] = { - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const uint8_t expected_output_data[] = { - F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), - F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), - F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - uint8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -// TODO(b/138811455): Fix code duplication in micro tests -TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8) { - using tflite::testing::F2Q32; - using tflite::testing::F2QS; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -64.0f; - const float weights_max = 63.5f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {2, 2, 10}; - const int8_t input_data[] = { - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), - F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), - F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const int8_t weights_data[] = { - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const int8_t expected_output_data[] = { - F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), - F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), - F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - int8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8Relu) { - using tflite::testing::F2Q; - using tflite::testing::F2Q32; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -63.5f; - const float weights_max = 64.0f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {2, 2, 10}; - const uint8_t input_data[] = { - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), - F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), - F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const uint8_t weights_data[] = { - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(-1, weights_min, weights_max), F2Q(-2, weights_min, weights_max), - F2Q(-3, weights_min, weights_max), F2Q(-4, weights_min, weights_max), - F2Q(-5, weights_min, weights_max), F2Q(-6, weights_min, weights_max), - F2Q(-7, weights_min, weights_max), F2Q(-8, weights_min, weights_max), - F2Q(-9, weights_min, weights_max), F2Q(-10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(0, bias_scale), - F2Q32(3, bias_scale), - }; - const uint8_t expected_output_data[] = { - F2Q(24, output_min, output_max), F2Q(0, output_min, output_max), - F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), - F2Q(0, output_min, output_max), F2Q(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - uint8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8Relu) { - using tflite::testing::F2Q32; - using tflite::testing::F2QS; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -64.0f; - const float weights_max = 63.5f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {2, 2, 10}; - const int8_t input_data[] = { - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), - F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), - F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const int8_t weights_data[] = { - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(-1, weights_min, weights_max), F2QS(-2, weights_min, weights_max), - F2QS(-3, weights_min, weights_max), F2QS(-4, weights_min, weights_max), - F2QS(-5, weights_min, weights_max), F2QS(-6, weights_min, weights_max), - F2QS(-7, weights_min, weights_max), F2QS(-8, weights_min, weights_max), - F2QS(-9, weights_min, weights_max), F2QS(-10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(0, bias_scale), - F2Q32(3, bias_scale), - }; - const int8_t expected_output_data[] = { - F2QS(24, output_min, output_max), F2QS(0, output_min, output_max), - F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), - F2QS(0, output_min, output_max), F2QS(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - int8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedUInt8OutputMultiplierGreaterThan1) { - using tflite::testing::F2Q; - using tflite::testing::F2Q32; - - const float input_min = -127.0f; - const float input_max = 128.0f; - const float weights_min = -127.0f; - const float weights_max = 128.0f; - const float bias_scale = 1.0f; - const float output_min = -63.5f; - const float output_max = 64.0f; - - const int input_dims_data[] = {2, 2, 10}; - const uint8_t input_data[] = { - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), - F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), - F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const uint8_t weights_data[] = { - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const uint8_t expected_output_data[] = { - F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), - F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), - F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - uint8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8OutputMultiplierGreaterThan1) { - using tflite::testing::F2Q32; - using tflite::testing::F2QS; - - const float input_min = -127.0f; - const float input_max = 128.0f; +// Test group 1 +TF_LITE_MICRO_TEST(SystemSimpleTestQuantized1) { + const float input_min = -128.0f; + const float input_max = 127.0f; const float weights_min = -128.0f; const float weights_max = 127.0f; const float bias_scale = 1.0f; - const float output_min = -63.5f; - const float output_max = 64.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; const int input_dims_data[] = {2, 2, 10}; - const int8_t input_data[] = { - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), - F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), - F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), - }; + const int8_t input_data[] = {2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}; const int weights_dims_data[] = {2, 3, 10}; - const int8_t weights_data[] = { - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - }; + const int8_t weights_data[] = {2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}; const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const int8_t expected_output_data[] = { - F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), - F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), - F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), - }; + const int32_t bias_data[] = {1,1,1}; + const int8_t expected_output_data[] = {41,41,41,41,41,41}; const int output_dims_data[] = {2, 2, 3}; const int output_dims_count = 6; @@ -647,292 +140,273 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedInt8OutputMultiplierGreaterThan1) { output_max, kTfLiteActNone, output_data); } -TF_LITE_MICRO_TEST(SimpleTest4DInput) { - const int input_dims_data[] = {4, 1, 1, 5, 1}; - const float input_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 - 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 - }; - const int weights_dims_data[] = {2, 3, 10}; - const float weights_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 - }; - const int bias_dims_data[] = {1, 3}; - const float bias_data[] = {1, 2, 3}; - const float expected_output_data[] = { - 24, 25, 26, 58, 59, 60, // Expected results. - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - float output_data[output_dims_count]; - tflite::testing::TestFullyConnectedFloat( - input_dims_data, input_data, weights_dims_data, weights_data, - bias_dims_data, bias_data, expected_output_data, output_dims_data, - kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedUInt8) { - using tflite::testing::F2Q; - using tflite::testing::F2Q32; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -63.5f; - const float weights_max = 64.0f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {4, 1, 1, 5, 1}; - const uint8_t input_data[] = { - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), - F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), - F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const uint8_t weights_data[] = { - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const uint8_t expected_output_data[] = { - F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), - F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), - F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - uint8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8) { - using tflite::testing::F2Q32; - using tflite::testing::F2QS; - - const float input_min = -63.5f; - const float input_max = 64.0f; - const float weights_min = -64.0f; - const float weights_max = 63.5f; - const float bias_scale = 0.25f; - const float output_min = -127.0f; - const float output_max = 128.0f; - - const int input_dims_data[] = {4, 1, 1, 5, 1}; - const int8_t input_data[] = { - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), - F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), - F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const int8_t weights_data[] = { - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const int8_t expected_output_data[] = { - F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), - F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), - F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - int8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST( - SimpleTest4DInputQuantizedUInt8OutputMultiplierGreaterThan1) { - using tflite::testing::F2Q; - using tflite::testing::F2Q32; - - const float input_min = -127.0f; - const float input_max = 128.0f; - const float weights_min = -127.0f; - const float weights_max = 128.0f; - const float bias_scale = 1.0f; - const float output_min = -63.5f; - const float output_max = 64.0f; - - const int input_dims_data[] = {4, 1, 1, 5, 1}; - const uint8_t input_data[] = { - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(8, input_min, input_max), - F2Q(-9, input_min, input_max), F2Q(-10, input_min, input_max), - F2Q(1, input_min, input_max), F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), F2Q(4, input_min, input_max), - F2Q(5, input_min, input_max), F2Q(6, input_min, input_max), - F2Q(7, input_min, input_max), F2Q(-8, input_min, input_max), - F2Q(9, input_min, input_max), F2Q(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const uint8_t weights_data[] = { - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - F2Q(1, weights_min, weights_max), F2Q(2, weights_min, weights_max), - F2Q(3, weights_min, weights_max), F2Q(4, weights_min, weights_max), - F2Q(5, weights_min, weights_max), F2Q(6, weights_min, weights_max), - F2Q(7, weights_min, weights_max), F2Q(8, weights_min, weights_max), - F2Q(9, weights_min, weights_max), F2Q(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const uint8_t expected_output_data[] = { - F2Q(24, output_min, output_max), F2Q(25, output_min, output_max), - F2Q(26, output_min, output_max), F2Q(58, output_min, output_max), - F2Q(59, output_min, output_max), F2Q(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; - - const int output_dims_count = 6; - uint8_t output_data[output_dims_count]; - tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleTest4DInputQuantizedInt8OutputMultiplierGreaterThan1) { - using tflite::testing::F2Q32; - using tflite::testing::F2QS; - - const float input_min = -127.0f; - const float input_max = 128.0f; +TF_LITE_MICRO_TEST(LocalSimpleTestQuantized1) { + const float input_min = -128.0f; + const float input_max = 127.0f; const float weights_min = -128.0f; const float weights_max = 127.0f; const float bias_scale = 1.0f; - const float output_min = -63.5f; - const float output_max = 64.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; - const int input_dims_data[] = {4, 1, 1, 5, 1}; - const int8_t input_data[] = { - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(8, input_min, input_max), - F2QS(-9, input_min, input_max), F2QS(-10, input_min, input_max), - F2QS(1, input_min, input_max), F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), F2QS(4, input_min, input_max), - F2QS(5, input_min, input_max), F2QS(6, input_min, input_max), - F2QS(7, input_min, input_max), F2QS(-8, input_min, input_max), - F2QS(9, input_min, input_max), F2QS(-10, input_min, input_max), - }; - const int weights_dims_data[] = {2, 3, 10}; - const int8_t weights_data[] = { - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - F2QS(1, weights_min, weights_max), F2QS(2, weights_min, weights_max), - F2QS(3, weights_min, weights_max), F2QS(4, weights_min, weights_max), - F2QS(5, weights_min, weights_max), F2QS(6, weights_min, weights_max), - F2QS(7, weights_min, weights_max), F2QS(8, weights_min, weights_max), - F2QS(9, weights_min, weights_max), F2QS(10, weights_min, weights_max), - }; - const int bias_dims_data[] = {1, 3}; - const int32_t bias_data[] = { - F2Q32(1, bias_scale), - F2Q32(2, bias_scale), - F2Q32(3, bias_scale), - }; - const int8_t expected_output_data[] = { - F2QS(24, output_min, output_max), F2QS(25, output_min, output_max), - F2QS(26, output_min, output_max), F2QS(58, output_min, output_max), - F2QS(59, output_min, output_max), F2QS(60, output_min, output_max), - }; - const int output_dims_data[] = {2, 2, 3}; + const int input_dims_data_local[] = {2, 2, 10}; + const int weights_dims_data_local[] = {2, 3, 10}; + const int bias_dims_data_local[] = {1, 3}; + const int output_dims_data_local[] = {2, 2, 3}; const int output_dims_count = 6; - int8_t output_data[output_dims_count]; + +#pragma Bss(".Zdata") + const int8_t input_data_local[] = {2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}; + const int8_t weights_data_local[] = {2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}; + const int32_t bias_data_local[] = {1,1,1}; + int8_t output_data_local[output_dims_count]; +#pragma Bss() + + const int8_t expected_output_data[] = {41,41,41,41,41,41}; + tflite::testing::TestFullyConnectedQuantized( - input_dims_data, input_data, input_min, input_max, weights_dims_data, - weights_data, weights_min, weights_max, bias_dims_data, bias_data, - bias_scale, expected_output_data, output_dims_data, output_min, - output_max, kTfLiteActNone, output_data); + input_dims_data_local, input_data_local, input_min, input_max, weights_dims_data_local, + weights_data_local, weights_min, weights_max, bias_dims_data_local, bias_data_local, + bias_scale, expected_output_data, output_dims_data_local, output_min, + output_max, kTfLiteActNone, output_data_local); +} + +// Test group 2 +TF_LITE_MICRO_TEST(SystemSimpleTestQuantized2) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_2[] = {2, 10, 4}; + const int8_t input_data_2[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int weights_dims_data_2[] = {2, 6, 4}; + const int8_t weights_data_2[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2}; + const int bias_dims_data_2[] = {1, 6}; + const int32_t bias_data_2[] = {1,1,1,1,1,1}; + const int8_t expected_output_data_2[] = {17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17}; + const int output_dims_data_2[] = {2, 10, 6}; + + const int output_dims_count_2 = 60; + int8_t output_data_2[output_dims_count_2]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_2, input_data_2, input_min, input_max, weights_dims_data_2, + weights_data_2, weights_min, weights_max, bias_dims_data_2, bias_data_2, + bias_scale, expected_output_data_2, output_dims_data_2, output_min, + output_max, kTfLiteActNone, output_data_2); +} + +TF_LITE_MICRO_TEST(LocalSimpleTestQuantized2) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_local_2[] = {2, 10, 4}; + const int weights_dims_data_local_2[] = {2, 6, 4}; + const int bias_dims_data_local_2[] = {1, 6}; + const int output_dims_data_local_2[] = {2, 10, 6}; + + const int output_dims_count_local_2 = 60; + +#pragma Bss(".Zdata") + const int8_t input_data_local_2[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int8_t weights_data_local_2[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2}; + const int32_t bias_data_local_2[] = {1,1,1,1,1,1}; + int8_t output_data_local_2[output_dims_count_local_2]; +#pragma Bss() + + const int8_t expected_output_data_local_2[] = {41,41,41,41,41,41}; + + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_local_2, input_data_local_2, input_min, input_max, weights_dims_data_local_2, + weights_data_local_2, weights_min, weights_max, bias_dims_data_local_2, bias_data_local_2, + bias_scale, expected_output_data_local_2, output_dims_data_local_2, output_min, + output_max, kTfLiteActNone, output_data_local_2); +} + +// Test group 3 +TF_LITE_MICRO_TEST(SystemSimpleTestQuantized3) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_3[] = {2, 2, 5}; + const int8_t input_data_3[] = {2,2,2,2,2,2,2,2,2,2}; + const int weights_dims_data_3[] = {2, 10, 5}; + const int8_t weights_data_3[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int bias_dims_data_3[] = {1, 10}; + const int32_t bias_data_3[] = {1,1,1,1,1,1,1,1,1,1}; + const int8_t expected_output_data_3[] = {21,21,21,21,21,21,21,21,21,21, + 21,21,21,21,21,21,21,21,21,21}; + const int output_dims_data_3[] = {2, 2, 10}; + + const int output_dims_count_3 = 20; + int8_t output_data_3[output_dims_count_3]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_3, input_data_3, input_min, input_max, weights_dims_data_3, + weights_data_3, weights_min, weights_max, bias_dims_data_3, bias_data_3, + bias_scale, expected_output_data_3, output_dims_data_3, output_min, + output_max, kTfLiteActNone, output_data_3); +} + +TF_LITE_MICRO_TEST(LocalSimpleTestQuantized3) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_local_3[] = {2, 2, 5}; + const int weights_dims_data_local_3[] = {2, 10, 5}; + const int bias_dims_data_local_3[] = {1, 10}; + const int output_dims_data_local_3[] = {2, 2, 10}; + + const int output_dims_count_local_3 = 20; + +#pragma Bss(".Zdata") + static int8_t input_data_local_3[10]; + static int8_t weights_data_local_3[50]; + static int32_t bias_data_local_3[10]; + static int8_t output_data_local_3[output_dims_count_local_3]; +#pragma Bss() + + for(int i = 0; i < 10; ++i) { + input_data_local_3[i] = 2; + } + + for(int i = 0; i < 50; ++i) { + weights_data_local_3[i] = 2; + } + + for(int i = 0; i < 10; ++i) { + bias_data_local_3[i] = 1; + } + + for(int i = 0; i < 20; ++i) { + output_data_local_3[i] = 0; + } + + const int8_t expected_output_data_local_3[] = {21,21,21,21,21,21,21,21,21,21, + 21,21,21,21,21,21,21,21,21,21}; + + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_local_3, input_data_local_3, input_min, input_max, weights_dims_data_local_3, + weights_data_local_3, weights_min, weights_max, bias_dims_data_local_3, bias_data_local_3, + bias_scale, expected_output_data_local_3, output_dims_data_local_3, output_min, + output_max, kTfLiteActNone, output_data_local_3); +} + +// Test group 4 +TF_LITE_MICRO_TEST(SystemSimpleTestQuantized4) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_4[] = {2, 5, 10}; + const int8_t input_data_4[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int weights_dims_data_4[] = {2, 5, 10}; + const int8_t weights_data_4[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int bias_dims_data_4[] = {1, 5}; + const int32_t bias_data_4[] = {1,1,1,1,1}; + const int8_t expected_output_data_4[] = {41,41,41,41,41,41,41,41,41,41, + 41,41,41,41,41,41,41,41,41,41, + 41,41,41,41,41}; + const int output_dims_data_4[] = {2, 5, 5}; + + const int output_dims_count_4 = 25; + int8_t output_data_4[output_dims_count_4]; + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_4, input_data_4, input_min, input_max, weights_dims_data_4, + weights_data_4, weights_min, weights_max, bias_dims_data_4, bias_data_4, + bias_scale, expected_output_data_4, output_dims_data_4, output_min, + output_max, kTfLiteActNone, output_data_4); +} + +TF_LITE_MICRO_TEST(LocalSimpleTestQuantized4) { + const float input_min = -128.0f; + const float input_max = 127.0f; + const float weights_min = -128.0f; + const float weights_max = 127.0f; + const float bias_scale = 1.0f; + const float output_min = -128.0f; + const float output_max = 127.0f; + + const int input_dims_data_local_4[] = {2, 5, 10}; + const int weights_dims_data_local_4[] = {2, 5, 10}; + const int bias_dims_data_local_4[] = {1, 5}; + const int output_dims_data_local_4[] = {2, 5, 5}; + + const int output_dims_count_local_4 = 25; + +#pragma Bss(".Zdata") + const int8_t input_data_local_4[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int8_t weights_data_local_4[] = {2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2}; + const int32_t bias_data_local_4[] = {1,1,1,1,1}; + int8_t output_data_local_4[output_dims_count_local_4]; +#pragma Bss() + + const int8_t expected_output_data_local_4[] = {41,41,41,41,41,41,41,41,41,41, + 41,41,41,41,41,41,41,41,41,41, + 41,41,41,41,41}; + + tflite::testing::TestFullyConnectedQuantized( + input_dims_data_local_4, input_data_local_4, input_min, input_max, weights_dims_data_local_4, + weights_data_local_4, weights_min, weights_max, bias_dims_data_local_4, bias_data_local_4, + bias_scale, expected_output_data_local_4, output_dims_data_local_4, output_min, + output_max, kTfLiteActNone, output_data_local_4); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc index 8bfeb718a1b..63737a41791 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc @@ -25,89 +25,20 @@ namespace tflite { namespace testing { namespace { -void TestAveragePoolingFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, - const int filter_height, const int filter_width, - const int stride_height, const int stride_width, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - TfLitePadding padding, - TfLiteFusedActivation activation, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 1; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims, "input_tensor"), - CreateFloatTensor(output_data, output_dims, "output_tensor"), - }; - - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::ops::micro::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLitePoolParams builtin_data = {padding, stride_width, stride_height, - filter_width, filter_height, activation}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - int temporaries_array_data[] = {0}; - TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.temporaries = temporaries_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - node.delegate = nullptr; - - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } -} - template void TestAveragePoolingQuantized( - std::initializer_list input_dims_data, - std::initializer_list input_data, const float input_min, + const int* input_dims_data, + const T* input_data, const float input_min, const float input_max, const int filter_height, const int filter_width, const int stride_height, const int stride_width, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, float output_min, + const T* expected_output_data, + const int* output_dims_data, float output_min, float output_max, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; @@ -163,94 +94,25 @@ void TestAveragePoolingQuantized( } for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } -} - -void TestMaxPoolFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, int filter_width, - int filter_height, int stride_width, int stride_height, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - TfLitePadding padding, TfLiteFusedActivation activation, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 1; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims, "input_tensor"), - CreateFloatTensor(output_data, output_dims, "output_tensor"), - }; - - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::ops::micro::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLitePoolParams builtin_data = { - padding, stride_width, stride_height, - filter_width, filter_height, activation, - }; - - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - int temporaries_array_data[] = {0}; - TfLiteIntArray* temporaries_array = IntArrayFromInts(temporaries_array_data); - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.temporaries = temporaries_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - node.delegate = nullptr; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } } template -void TestMaxPoolQuantized(std::initializer_list input_dims_data, - std::initializer_list input_data, float input_min, +void TestMaxPoolQuantized(const int* input_dims_data, + const T* input_data, float input_min, float input_max, int filter_width, int filter_height, int stride_width, int stride_height, - std::initializer_list expected_output_data, + const T* expected_output_data, float output_min, float output_max, - std::initializer_list output_dims_data, + const int* output_dims_data, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; @@ -308,7 +170,7 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, registration->free(&context, user_data); } for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } @@ -319,797 +181,269 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SimpleAveragePoolTestFloat) { - float output_data[2]; - tflite::testing::TestAveragePoolingFloat({4, 1, 2, 4, 1}, // Input shape - { // Input values - 0., 6., 2., 4., 3., 2., 10., 7.}, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 2.75, - 5.75, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestUint8) { - using tflite::testing::F2Q; - - const float input_min = -15.9375; - const float input_max = 15.9375; - const float output_min = -15.9375; - const float output_max = 15.9375; - uint8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0., input_min, input_max), - F2Q(-6., input_min, input_max), - F2Q(2., input_min, input_max), - F2Q(4., input_min, input_max), - F2Q(3., input_min, input_max), - F2Q(2., input_min, input_max), - F2Q(-10., input_min, input_max), - F2Q(7., input_min, input_max), - }, - input_min, input_max, // input quantization range - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - F2Q(0., output_min, output_max), - F2Q(0.75, output_min, output_max), - }, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2ActNone) { +TF_LITE_MICRO_TEST(SystemAveragePoolTestInt1) { using tflite::testing::F2QS; - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; - int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 2, // stride height, stride width - { // Output values - F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride1Stride2Relu) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; int8_t output_data[3]; + + const int kInput1Shape[] = {4, 1, 2, 4, 1}; + const int8_t kInput1Data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput1Shape[] = {4, 1, 1, 3, 1}; + const int8_t kGolden1Data[] = {1, 1, 1}; + tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 1, // stride height, stride width - { // Output values - F2QS(0., output_min, output_max), F2QS(0., output_min, output_max), - F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 3, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2Stride1Relu1) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; - int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 1, 2, // stride height, stride width - { // Output values - F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu1, output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2Relu6) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; - int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 2, // stride height, stride width - { // Output values - F2QS(0.5, output_min, output_max), F2QS(6., output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu6, output_data); -} - -TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingSameStride1ActNone) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; - int8_t output_data[8]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, + kInput1Shape, // Input shape + kInput1Data, input_min, input_max, // input quantization range 2, 2, // filter height, filter width 1, 1, // stride height, stride width - { // Output values - F2QS(0.5, output_min, output_max), F2QS(3.5, output_min, output_max), - F2QS(7.25, output_min, output_max), F2QS(5.5, output_min, output_max), - F2QS(2.5, output_min, output_max), F2QS(6., output_min, output_max), - F2QS(8.5, output_min, output_max), F2QS(7., output_min, output_max)}, - {4, 1, 2, 4, 1}, // Output shape + kGolden1Data, + kOutput1Shape, // Output shape output_min, output_max, // output quantization range kTfLitePaddingValid, kTfLiteActNone, output_data); } -TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloat) { - float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { // Input values - 0, 6, 2, 4, 3, 2, 10, 7}, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 6, - 10, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); -} -TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu) { - float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -1, -6, 2, 4, // - -3, -2, 10.5, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 0.0, - 10.5, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, - output_data); -} +TF_LITE_MICRO_TEST(LocalAveragePoolTestInt1) { + using tflite::testing::F2QS; -TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu1) { - float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -2.75, -6, 0.2, 0.4, // - -3, -2, -0.3, 0.7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - -1.0, - 0.7, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu1, - output_data); + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int8_t output_data[3]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -2.75, -6, -2, -4, // - -3, -2, 10, -7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - -1.0, - 1.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu1, - output_data); -} +#pragma Bss(".Zdata") + const int kInput1Shape[] = {4, 1, 2, 4, 1}; + const int8_t kInput1Data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput1Shape[] = {4, 1, 1, 3, 1}; + const int8_t kGolden1Data[] = {1, 1, 1}; +#pragma Bss() -TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu6) { - float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -1.5, -6, 12, 4, // - -3, -2, 10, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 0.0, - 6.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, - output_data); - - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 4.5, 12, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 4.5, - 6.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, - output_data); -} - -TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingSameStride1) { - float output_data[8]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 6, 2, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 1, 1, // stride width, stride height - { - // Output values - 6, 10, 10, 7, // - 3, 10, 10, 7, // - }, - {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, - output_data); -} - -TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingValidStride1) { - float output_data[3]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 6, 2, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 1, 1, // stride width, stride height - { - // Output values - 6, - 10, - 10, - }, - {4, 1, 1, 3, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); -} - -TF_LITE_MICRO_TEST(SimpleMaxPoolTestUInt8ActNone) { - using tflite::testing::F2Q; - - uint8_t output_data[2]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(6, output_min, output_max), F2Q(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + tflite::testing::TestAveragePoolingQuantized( + kInput1Shape, // Input shape + kInput1Data, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 1, 1, // stride height, stride width + kGolden1Data, + kOutput1Shape, // Output shape + output_min, output_max, // output quantization range kTfLitePaddingValid, kTfLiteActNone, output_data); } -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { - using tflite::testing::F2Q; +// Test group AVG 2 +TF_LITE_MICRO_TEST(SystemAveragePoolTestInt2) { + using tflite::testing::F2QS; - uint8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(-1.5, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(0, output_min, output_max), F2Q(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, output_data); -} + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int8_t output_data[45]; -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu1) { - using tflite::testing::F2Q; + const int kInput2Shape[] = {4, 1, 6, 10, 1}; + const int8_t kInput2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput2Shape[] = {4, 1, 5, 9, 1}; + const int8_t kGolden2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}; - uint8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(-1.7, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(-10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(-1.0, output_min, output_max), F2Q(1.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu1, output_data); -} -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { - using tflite::testing::F2Q; - - uint8_t output_data[8]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(12, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(0.0, output_min, output_max), F2Q(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); - - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(4.5, input_min, input_max), - F2Q(12, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(4.5, output_min, output_max), F2Q(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { - using tflite::testing::F2Q; - - uint8_t output_data[8]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2Q(6, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(7, output_min, output_max), - F2Q(3, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(7, output_min, output_max), - }, - output_min, output_max, {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { - using tflite::testing::F2Q; - - uint8_t output_data[3]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2Q(6, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - }, - output_min, output_max, {4, 1, 1, 3, 1}, // Output shape + tflite::testing::TestAveragePoolingQuantized( + kInput2Shape, // Input shape + kInput2Data, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 1, 1, // stride height, stride width + kGolden2Data, + kOutput2Shape, // Output shape + output_min, output_max, // output quantization range kTfLitePaddingValid, kTfLiteActNone, output_data); } -TF_LITE_MICRO_TEST(SimpleMaxPoolTestInt8ActNone) { +TF_LITE_MICRO_TEST(LocalAveragePoolTestInt2) { using tflite::testing::F2QS; - int8_t output_data[2]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(6, output_min, output_max), F2QS(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int8_t output_data[45]; + +#pragma Bss(".Zdata") + const int kInput2Shape[] = {4, 1, 6, 10, 1}; + const int8_t kInput2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput2Shape[] = {4, 1, 5, 9, 1}; + const int8_t kGolden2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}; +#pragma Bss() + + tflite::testing::TestAveragePoolingQuantized( + kInput2Shape, // Input shape + kInput2Data, + input_min, input_max, // input quantization range + 2, 2, // filter height, filter width + 1, 1, // stride height, stride width + kGolden2Data, + kOutput2Shape, // Output shape + output_min, output_max, // output quantization range kTfLitePaddingValid, kTfLiteActNone, output_data); } -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { - using tflite::testing::F2QS; - - int8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(-1.5, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(0, output_min, output_max), F2QS(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu1) { - using tflite::testing::F2QS; - - int8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(-1.7, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(-10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(-1.0, output_min, output_max), F2QS(1.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu1, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { - using tflite::testing::F2QS; - - int8_t output_data[8]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(12, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(0.0, output_min, output_max), F2QS(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); - - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(4.5, input_min, input_max), - F2QS(12, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(4.5, output_min, output_max), F2QS(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { - using tflite::testing::F2QS; - - int8_t output_data[8]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2QS(6, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(7, output_min, output_max), - F2QS(3, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(7, output_min, output_max), - }, - output_min, output_max, {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, output_data); -} - -TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { +// Test group MAX 1 +TF_LITE_MICRO_TEST(SystemMaxPoolTestInt1) { using tflite::testing::F2QS; int8_t output_data[3]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; int filter_width = 2; int filter_height = 2; int stride_width = 1; int stride_height = 1; + + const int kInput1Shape[] = {4, 1, 2, 4, 1}; + const int8_t kInput1Data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput1Shape[] = {4, 1, 1, 3, 1}; + const int8_t kGolden1Data[] = {1, 1, 1}; + tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, + kInput1Shape, // Input shape + kInput1Data, input_min, input_max, filter_width, filter_height, stride_width, stride_height, - { - // Output values - F2QS(6, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - }, - output_min, output_max, {4, 1, 1, 3, 1}, // Output shape + kGolden1Data, + output_min, output_max, kOutput1Shape, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(LocalMaxPoolTestInt1) { + using tflite::testing::F2QS; + + int8_t output_data[3]; + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + +#pragma Bss(".Zdata") + const int kInput1Shape[] = {4, 1, 2, 4, 1}; + const int8_t kInput1Data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput1Shape[] = {4, 1, 1, 3, 1}; + const int8_t kGolden1Data[] = {1, 1, 1}; +#pragma Bss() + + tflite::testing::TestMaxPoolQuantized( + kInput1Shape, // Input shape + kInput1Data, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + kGolden1Data, + output_min, output_max, kOutput1Shape, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + + +// Test group MAX 2 +TF_LITE_MICRO_TEST(SystemMaxPoolTestInt2) { + using tflite::testing::F2QS; + + int8_t output_data[45]; + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + + const int kInput2Shape[] = {4, 1, 6, 10, 1}; + const int8_t kInput2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput2Shape[] = {4, 1, 5, 9, 1}; + const int8_t kGolden2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}; + + tflite::testing::TestMaxPoolQuantized( + kInput2Shape, // Input shape + kInput2Data, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + kGolden2Data, + output_min, output_max, kOutput2Shape, // Output shape + kTfLitePaddingValid, kTfLiteActNone, output_data); +} + +TF_LITE_MICRO_TEST(LocalMaxPoolTestInt2) { + using tflite::testing::F2QS; + + int8_t output_data[45]; + const float input_min = -128; + const float input_max = 127; + const float output_min = -128; + const float output_max = 127; + int filter_width = 2; + int filter_height = 2; + int stride_width = 1; + int stride_height = 1; + + #pragma Bss(".Zdata") + const int kInput2Shape[] = {4, 1, 6, 10, 1}; + const int8_t kInput2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + const int kOutput2Shape[] = {4, 1, 5, 9, 1}; + const int8_t kGolden2Data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}; +#pragma Bss() + + tflite::testing::TestMaxPoolQuantized( + kInput2Shape, // Input shape + kInput2Data, + input_min, input_max, filter_width, filter_height, stride_width, + stride_height, + kGolden2Data, + output_min, output_max, kOutput2Shape, // Output shape kTfLitePaddingValid, kTfLiteActNone, output_data); } From 9996df4d7c3cbd8fadf342f27df4ae3d225b56b0 Mon Sep 17 00:00:00 2001 From: jacco Date: Wed, 29 Apr 2020 12:37:40 +0200 Subject: [PATCH 34/45] Small fix in mli slicing code for fully connect kernel --- tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 70d1fda4c2b..89eae356f51 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -158,7 +158,7 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node, mli_mov_cfg_for_copy(©_config); const int weight_out_dimension = 0; const int out_tensor_dimension = 1; - const int batch_dimension = 0; + const int input_size_dimension = 1; int slice_size = mli_weights.shape[weight_out_dimension]; /* allocate the local buffers, and compute the slice size */ @@ -192,13 +192,14 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node, mli_mov_tensor_sync(w_slice.Sub(), ©_config, w_ptr); mli_mov_tensor_sync(b_slice.Sub(), ©_config, b_ptr); - TensorSlicer in_slice(&mli_in, batch_dimension, 1); + // Slice the input over the batches (one at a time with the size of a complete input) + TensorSlicer in_slice(&mli_in, input_size_dimension, mli_in.shape[input_size_dimension]); /* output tensor is alreade sliced in the output size dimension. out_ch_slice.Sub() is the tensor for the amount of output size of this itteration of the weight slice loop. This tensor needs to be further sliced over the batch */ - TensorSlicer out_slice(out_ch_slice.Sub(), batch_dimension, 1); + TensorSlicer out_slice(out_ch_slice.Sub(), out_tensor_dimension, slice_size); /* setup the pointers to the local or remote tensor to make the code * inside the loop easier. */ From 21e7a9fffa8461f670abe50d2ef6a1724597d352 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Thu, 23 Apr 2020 14:09:21 +0300 Subject: [PATCH 35/45] Updated embARC MLI version for downloading + Package with pre-built libraries for various platforms --- .../micro/tools/make/ext_libs/arc_mli.inc | 26 +++++++++++-------- .../tools/make/targets/arc/arc_common.inc | 2 ++ .../tools/make/targets/arc_emsdp_makefile.inc | 3 +++ .../tools/make/third_party_downloads.inc | 8 +++--- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc index ee3cc8113c1..a95b4550417 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc @@ -21,19 +21,9 @@ ifeq ($(TARGET_ARCH), arc) # by passing 'no_arc_mli' tag (make -f TAGS=no_arc_mli ...) ifeq ($(filter no_arc_mli,$(ALL_TAGS)),) - ALL_TAGS += arc_mli -ifeq ($(PRE_COMPILED_MLI),true) - # TODO: Replace with proper arc_mli pre-builts. - $(eval $(call add_third_party_download,$(EMBARC_OSP_URL),$(EMBARC_OSP_MD5),embarc_osp,)) - - MLI_INCLUDE_FOLDER = embarc_osp/library/embarc_mli/include - MLI_LIB = third_party/embarc_osp/library/embarc_mli/lib/arcem9d/libmli_iotdk.a - - THIRD_PARTY_CC_HDRS += \ - third_party/embarc_osp/LICENSE -else +ifeq ($(BUILD_ARC_MLI),true) MLI_LIB_DIR = arc_mli_$(basename $(TCF_FILE_NAME)) $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) @@ -44,6 +34,20 @@ else THIRD_PARTY_CC_HDRS += \ third_party/$(MLI_LIB_DIR)/LICENSE +else +ifneq ($(ARC_MLI_PRE_COMPILED_TARGET),) + MLI_LIB_DIR = arc_mli_package + $(eval $(call add_third_party_download,$(EMBARC_MLI_PRE_COMPILED_URL),$(EMBARC_MLI_PRE_COMPILED_MD5),$(MLI_LIB_DIR),)) + + MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include + MLI_LIB = third_party/$(MLI_LIB_DIR)/bin/$(ARC_MLI_PRE_COMPILED_TARGET)/release/libmli.a + MICROLITE_LIBS += $(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/bin/$(ARC_MLI_PRE_COMPILED_TARGET)/release/libmli.a + + THIRD_PARTY_CC_HDRS += \ + third_party/$(MLI_LIB_DIR)/LICENSE +else +$(error Target for pre compiled ARC MLI library is not defined) +endif endif THIRD_PARTY_CC_HDRS += $(MLI_LIB) diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc index 4a9a5ccdfc3..9462c3852f2 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -89,6 +89,8 @@ ifeq ($(ARC_TOOLCHAIN), mwdt) LCF_FILE ?= + BUILD_ARC_MLI ?= true + # The variable TCF_FILE_NAME stores the TCF file name (including .tcf extension), # this variable is used later to add the option to the linker/compiler flags. # This condition also handles the case when the user/makefile specifies diff --git a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc index a84dd15e4e8..b81bcea0eb8 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc @@ -23,6 +23,9 @@ ifeq ($(TARGET), arc_emsdp) UBOOT_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/uboot.env UBOOT_FILE_NAME := $(notdir $(UBOOT_FILE)) + BUILD_ARC_MLI := false + ARC_MLI_PRE_COMPILED_TARGET := emsdp_em11d_em9d_dfss + include $(MAKEFILE_DIR)/targets/arc/arc_common.inc ARC_EXTRA_APP_SETTINGS = \ diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index ce24ba29542..db420b7fd1b 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -71,11 +71,11 @@ PERSON_MODEL_MD5 := "fe2934bd0788f1dcc7af3f0a954542ab" PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_01_13.zip" PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" -EMBARC_OSP_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp/archive/embarc_mli.zip" -EMBARC_OSP_MD5 := "9eaf7b3a1ed05872a03da9796672a776" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/7026ad09bb7f967324eb29e069f776bc44a08886.zip" +EMBARC_MLI_MD5 := "7eebd730db79c6834399f87e509115fb" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/4b6c6eed65395dced1564006be8188781af16035.zip" -EMBARC_MLI_MD5 := "47167553c17ff8c7cd59fb1afb90c304" +EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC1/embARC_MLI_package.zip" +EMBARC_MLI_PRE_COMPILED_MD5 := "b85b8b89446757735342795367e37d22" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From f9b6799aadacfc19032994bbb1c4eba67e53c598 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 24 Apr 2020 13:31:42 +0300 Subject: [PATCH 36/45] Fixes in project generation for ARC specific projects --- tensorflow/lite/micro/tools/make/helper_functions.inc | 2 ++ .../lite/micro/tools/make/targets/arc/arc_common.inc | 2 +- .../lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf | 4 ++-- .../lite/micro/tools/make/targets/arc_emsdp_makefile.inc | 7 +++++-- tensorflow/lite/micro/tools/make/targets/arc_makefile.inc | 2 ++ .../lite/micro/tools/make/templates/arc/README_ARC.md.tpl | 2 ++ .../micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl | 2 ++ 7 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl create mode 100644 tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index 8d321d42490..1cf9afa8794 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -150,6 +150,8 @@ $(PRJDIR)$(3)/$(1)/Makefile: tensorflow/lite/micro/tools/make/templates/arc/arc_ sed -E 's#\%\{APP_DEBUG_CMD\}\%#$(ARC_APP_DEBUG_CMD)#g' | \ sed -E 's#\%\{EXTRA_EXECUTE_RULES\}\%#$(ARC_EXTRA_EXECUTE_RULES)#g' > $$@ +$(PRJDIR)$(3)/$(1)/%: tensorflow/lite/micro/tools/make/templates/arc/%.tpl + @cp $$< $$@ $(foreach var,$(ARC_TARGET_FILES_DIRS),$(eval $(call path_changing_copy_file,$(PRJDIR)$(3)/$(1),$(var)))) diff --git a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc index 9462c3852f2..596f219d3d1 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc/arc_common.inc @@ -105,7 +105,7 @@ endif PLATFORM_FLAGS = -tcf=$(TCF_FILE_NAME) -tcf_core_config - PLATFORM_FLAGS += -Hnocopyr -O3 -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections + PLATFORM_FLAGS += -Hnocopyr -Hpurge -Hdense_prologue -Hon=Long_enums -fslp-vectorize-aggressive -ffunction-sections -fdata-sections # Use compact CRT. It requires pre-defined heap size PLATFORM_FLAGS += -Hcl -Hcrt_fast_memcpy -Hcrt_fast_memset diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf index d17c807e250..c13dea5c6a0 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf @@ -28,11 +28,11 @@ SECTIONS { .text? : { *('.text$crt*') } * (TEXT): {} * (LIT): {} - } > ICCM0 + } > SRAM GROUP BLOCK(4): { .Zdata? : {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:8K): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:32K): {} .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} } > DCCM diff --git a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc index b81bcea0eb8..211437bd9f4 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc @@ -54,8 +54,11 @@ ifeq ($(filter $(ARC_TARGET_FILES_DIRS), $(dir $(UBOOT_FILE))),) ARC_TARGET_FILES_DIRS += $(dir $(UBOOT_FILE)) endif - # for default EMSD configuration we can use default em9d rt libs + MAKE_PROJECT_FILES := $(filter-out README_MAKE.md, $(MAKE_PROJECT_FILES)) README_ARC_EMSDP.md + + # for default EMSDP configuration we can use em9d_va rt libs # for better performance runtime should be built for emsdp configuration - PLATFORM_LDFLAGS += -Hlib=em9d_voice_audio + # No hostlink library for smaller codesize purpose + PLATFORM_LDFLAGS += -Hlib=em9d_voice_audio -Hhostlib= endif diff --git a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc index d379eea86f1..9f5442b4c6c 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_makefile.inc @@ -33,6 +33,8 @@ endif include $(MAKEFILE_DIR)/targets/arc/arc_common.inc +MAKE_PROJECT_FILES := $(filter-out README_MAKE.md, $(MAKE_PROJECT_FILES)) README_ARC.md + endif # $(TARGET) endif # $(TARGET_ARCH)... diff --git a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl new file mode 100644 index 00000000000..b722b9c441d --- /dev/null +++ b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl @@ -0,0 +1,2 @@ +# Mock Project Readme for common ARC target + diff --git a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl new file mode 100644 index 00000000000..b3d9257f4d2 --- /dev/null +++ b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl @@ -0,0 +1,2 @@ +# Mock Project Readme for ARC EMSDP target + From 0fece983977cbf914a3a413005b8de7648963735 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 24 Apr 2020 17:45:52 +0300 Subject: [PATCH 37/45] ARC EMSDP specific patch of generated projects for examples --- .../micro_speech/arc_emsdp/Makefile.inc | 22 +++++++ .../person_detection/arc_emsdp/Makefile.inc | 19 ++++++ .../person_detection/arc_emsdp/emsdp.lcf | 61 ++++++++++++++++++ .../arc_emsdp/Makefile.inc | 16 +++++ .../arc_emsdp/emsdp.lcf | 63 +++++++++++++++++++ 5 files changed, 181 insertions(+) create mode 100644 tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf create mode 100644 tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf diff --git a/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc new file mode 100644 index 00000000000..7fe4906cdf9 --- /dev/null +++ b/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc @@ -0,0 +1,22 @@ +ifeq ($(TARGET), arc_emsdp) + + MICRO_SPEECH_HDRS += \ + micro_speech_patch.txt + + MICRO_SPEECH_TEST_HDRS += \ + micro_speech_patch.txt + + MICRO_SPEECH_MOCK_HDRS += \ + micro_speech_patch.txt + +%/micro_speech_patch.txt: %/emsdp.lcf %/Makefile + @cp tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf $< + @echo emsdp.lcf > $@ + @sed -E -i 's#-Hheap=[^ ]*#\-Hheap=16K \-Hstack=16K#g' $(word 2, $^) + @sed -E -i 's#MLI_ONLY *\?= *false#MLI_ONLY \?= false\n\ + CXXFLAGS += -DSCRATCH_MEM_X_SIZE=0 -DSCRATCH_MEM_Y_SIZE=0 -DSCRATCH_MEM_Z_SIZE=0\ + CCFLAGS += -DSCRATCH_MEM_X_SIZE=0 -DSCRATCH_MEM_Y_SIZE=0 -DSCRATCH_MEM_Z_SIZE=0#'\ + $(word 2, $^) + @echo Makefile >> $@ + +endif diff --git a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc new file mode 100644 index 00000000000..cb7ba57ecb1 --- /dev/null +++ b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc @@ -0,0 +1,19 @@ +ifeq ($(TARGET), arc_emsdp) + + person_detection_HDRS += \ + person_detection_patch.txt + + person_detection_TEST_HDRS += \ + person_detection_patch.txt + + +%/person_detection_patch.txt: %/emsdp.lcf %/Makefile + @cp tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf $< + @echo emsdp.lcf > $@ + @sed -E -i 's#MLI_ONLY *\?= *false#MLI_ONLY \?= false\n\ + CXXFLAGS += -DSCRATCH_MEM_X_SIZE=0 -DSCRATCH_MEM_Y_SIZE=0 -DSCRATCH_MEM_Z_SIZE=0\ + CCFLAGS += -DSCRATCH_MEM_X_SIZE=0 -DSCRATCH_MEM_Y_SIZE=0 -DSCRATCH_MEM_Z_SIZE=0#'\ + $(word 2, $^) + @echo Makefile >> $@ + +endif diff --git a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf new file mode 100644 index 00000000000..34ed267652c --- /dev/null +++ b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf @@ -0,0 +1,61 @@ +# SYSTEM memory regions indicate where external memory might be located. +# The TCF has no specific knowledge of whether SYSTEM regions contain +# external memory or not. +# CCMWRAP memory regions indicate unusable portions of the address space +# due to CCM memory wrapping into upper addresses beyond its size + +MEMORY { + PSRAM : ORIGIN = 0x10000400, LENGTH = (0x01000000 >> 1) - 0x400 + SRAM : ORIGIN = 0x20000000, LENGTH = 0x00040000 + IVT : ORIGIN = 0x60000000, LENGTH = 0x400 + ICCM0 : ORIGIN = 0x60000400, LENGTH = (0x00020000 - 0x400) +# CCMWRAP0: ORIGIN = 0x60020000, LENGTH = 0x0ffe0000 + DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 +# CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 + XCCM : ORIGIN = 0x90000000, LENGTH = 0x00004000 +# CCMWRAP2: ORIGIN = 0x90004000, LENGTH = 0x0fffc000 + YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00004000 +# CCMWRAP3: ORIGIN = 0xa0004000, LENGTH = 0x0fffc000 + } + +SECTIONS { + + GROUP BLOCK(4) : { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) + } > IVT + + GROUP BLOCK(4): { + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + } > ICCM0 + + GROUP BLOCK(4): { + .rodata_in_data? : {} + } > PSRAM + + GROUP BLOCK(4): { + .debug_log? : {} + } > SRAM + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .Zdata? : {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:8K): {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} + } > DCCM + + GROUP BLOCK(4): { + .Xdata? : {} + } > XCCM + + GROUP BLOCK(4): { + .Ydata? : {} + } > YCCM +} + + diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc new file mode 100644 index 00000000000..94d73f903ed --- /dev/null +++ b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc @@ -0,0 +1,16 @@ +ifeq ($(TARGET), arc_emsdp) + + person_detection_HDRS += \ + person_detection_int8_patch.txt + + person_detection_TEST_HDRS += \ + person_detection_int8_patch.txt + + +%/person_detection_int8_patch.txt: %/emsdp.lcf %/Makefile + @cp tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf $< + @echo emsdp.lcf > $@ + @sed -E -i 's#MLI_ONLY *\?= *false#MLI_ONLY \?= true#' $(word 2, $^) + @echo Makefile > $@ + +endif diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf new file mode 100644 index 00000000000..98b7e1d911f --- /dev/null +++ b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf @@ -0,0 +1,63 @@ +# SYSTEM memory regions indicate where external memory might be located. +# The TCF has no specific knowledge of whether SYSTEM regions contain +# external memory or not. +# CCMWRAP memory regions indicate unusable portions of the address space +# due to CCM memory wrapping into upper addresses beyond its size + +MEMORY { + PSRAM : ORIGIN = 0x10000400, LENGTH = (0x01000000 >> 1) - 0x400 + SRAM : ORIGIN = 0x20000000, LENGTH = 0x00040000 + IVT : ORIGIN = 0x60000000, LENGTH = 0x400 + ICCM0 : ORIGIN = 0x60000400, LENGTH = (0x00020000 - 0x400) +# CCMWRAP0: ORIGIN = 0x60020000, LENGTH = 0x0ffe0000 + DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 +# CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 + XCCM : ORIGIN = 0x90000000, LENGTH = 0x00004000 +# CCMWRAP2: ORIGIN = 0x90004000, LENGTH = 0x0fffc000 + YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00004000 +# CCMWRAP3: ORIGIN = 0xa0004000, LENGTH = 0x0fffc000 + } + +SECTIONS { + + GROUP BLOCK(4) : { + .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) + } > IVT + + GROUP BLOCK(4): { + .text? : { *('.text$crt*') } + * (TEXT): {} + * (LIT): {} + } > ICCM0 + + GROUP BLOCK(4): { + .rodata_in_data? : {} + } > PSRAM + + GROUP BLOCK(4): { + /* _SDA_BASE_ computed implicitly */ + .sdata?: {} + .sbss?: {} + * (DATA): {} + * (BSS): {} + .debug_log? : {} + } > SRAM + + GROUP BLOCK(4): { +# TODO: Move tensor arena to DCCM when it will be possible +# .tensor_arena? : {} + .Zdata? : {} + .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} + .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:8K): {} + } > DCCM + + GROUP BLOCK(4): { + .Xdata? : {} + } > XCCM + + GROUP BLOCK(4): { + .Ydata? : {} + } > YCCM +} + + From afef62b9764bc08289006e3a1ea60cffa9c55888 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 29 Apr 2020 14:42:14 +0300 Subject: [PATCH 38/45] ARC: Move shared lcf + Cleanup and comments --- .../micro_speech/arc_emsdp/Makefile.inc | 8 +- .../person_detection/arc_emsdp/Makefile.inc | 7 +- .../arc_emsdp/Makefile.inc | 5 + .../arc_emsdp/emsdp.lcf | 8 +- .../tools/make/targets/arc/emsdp/emsdp.lcf | 15 +- .../make/targets/arc/emsdp/emsdp_v2.lcf} | 7 +- .../tools/make/targets/arc/iotdk/iotdk.lcf | 47 - .../tools/make/targets/arc/iotdk/iotdk.tcf | 4621 ----------------- .../micro/tools/make/targets/arc/memory.lcf | 50 - 9 files changed, 39 insertions(+), 4729 deletions(-) rename tensorflow/lite/micro/{examples/person_detection/arc_emsdp/emsdp.lcf => tools/make/targets/arc/emsdp/emsdp_v2.lcf} (90%) delete mode 100644 tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf delete mode 100644 tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf delete mode 100644 tensorflow/lite/micro/tools/make/targets/arc/memory.lcf diff --git a/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc index 7fe4906cdf9..850263f0eb9 100644 --- a/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc +++ b/tensorflow/lite/micro/examples/micro_speech/arc_emsdp/Makefile.inc @@ -1,5 +1,11 @@ ifeq ($(TARGET), arc_emsdp) +# Patch of arc make project to adjust it specifically for micro speech example. +# In particular: +# - Extend Heap and stack size for application needs +# - Use Linker command file with better usage of fast memory +# - In case project was generated with MLI usage, reduce scratch buffers. + MICRO_SPEECH_HDRS += \ micro_speech_patch.txt @@ -10,7 +16,7 @@ ifeq ($(TARGET), arc_emsdp) micro_speech_patch.txt %/micro_speech_patch.txt: %/emsdp.lcf %/Makefile - @cp tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf $< + @cp tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_v2.lcf $< @echo emsdp.lcf > $@ @sed -E -i 's#-Hheap=[^ ]*#\-Hheap=16K \-Hstack=16K#g' $(word 2, $^) @sed -E -i 's#MLI_ONLY *\?= *false#MLI_ONLY \?= false\n\ diff --git a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc index cb7ba57ecb1..29a09466e83 100644 --- a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc +++ b/tensorflow/lite/micro/examples/person_detection/arc_emsdp/Makefile.inc @@ -1,5 +1,10 @@ ifeq ($(TARGET), arc_emsdp) +# Patch of arc make project to adjust it specifically +# for person detection example. In particular: +# - Use Linker command file with better usage of fast memory +# - In case project was generated with MLI usage, reduce scratch buffers. + person_detection_HDRS += \ person_detection_patch.txt @@ -8,7 +13,7 @@ ifeq ($(TARGET), arc_emsdp) %/person_detection_patch.txt: %/emsdp.lcf %/Makefile - @cp tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf $< + @cp tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_v2.lcf $< @echo emsdp.lcf > $@ @sed -E -i 's#MLI_ONLY *\?= *false#MLI_ONLY \?= false\n\ CXXFLAGS += -DSCRATCH_MEM_X_SIZE=0 -DSCRATCH_MEM_Y_SIZE=0 -DSCRATCH_MEM_Z_SIZE=0\ diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc index 94d73f903ed..c00f9b89953 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/Makefile.inc @@ -1,5 +1,10 @@ ifeq ($(TARGET), arc_emsdp) +# Patch of arc make project to adjust it specifically +# for experimental person detection example. In particular: +# - Use Linker command file with better usage of fast memory +# - Stripout TFLM reference code by default. + person_detection_HDRS += \ person_detection_int8_patch.txt diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf index 98b7e1d911f..2d7954217d3 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf @@ -1,6 +1,8 @@ -# SYSTEM memory regions indicate where external memory might be located. -# The TCF has no specific knowledge of whether SYSTEM regions contain -# external memory or not. +# Difference with common EMSDP LCF file (to reduce data access time): +# - move data from external PSRAM to on-chip memory +# - move text from SRAM to ICCM +# - TODO: Move tensor arena to DCCM to reduce data flow between fast and extrnal memory +# # CCMWRAP memory regions indicate unusable portions of the address space # due to CCM memory wrapping into upper addresses beyond its size diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf index c13dea5c6a0..b01b4835071 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp.lcf @@ -1,6 +1,15 @@ -# SYSTEM memory regions indicate where external memory might be located. -# The TCF has no specific knowledge of whether SYSTEM regions contain -# external memory or not. +# Common EMSDP LCF File for applications +# +# external SRAM memory is used for code, because some TFLM applications includes the whole +# set of supported kernels which doesn't fit to ICCM0. +# It could slow performance a bit. Smaller applications can use ICCM0 instead. +# +# External PSRAM is used for potentially big sections. In particular: +# - rodata_in data which typically includes protobuf with model. +# - other .data which typically includes tensor arena. +# +# stack and heap are kept in DCCM which is the closest memory to the core + # CCMWRAP memory regions indicate unusable portions of the address space # due to CCM memory wrapping into upper addresses beyond its size diff --git a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_v2.lcf similarity index 90% rename from tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf rename to tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_v2.lcf index 34ed267652c..a379fe69e21 100644 --- a/tensorflow/lite/micro/examples/person_detection/arc_emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_v2.lcf @@ -1,6 +1,7 @@ -# SYSTEM memory regions indicate where external memory might be located. -# The TCF has no specific knowledge of whether SYSTEM regions contain -# external memory or not. +# Difference with common EMSDP LCF file (to reduce data access time): +# - move data from external PSRAM to DCCM +# - move text from SRAM to ICCM +# # CCMWRAP memory regions indicate unusable portions of the address space # due to CCM memory wrapping into upper addresses beyond its size diff --git a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf deleted file mode 100644 index da39ae911ff..00000000000 --- a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.lcf +++ /dev/null @@ -1,47 +0,0 @@ -# SYSTEM memory regions indicate where external memory might be located. -# The TCF has no specific knowledge of whether SYSTEM regions contain -# external memory or not. -# CCMWRAP memory regions indicate unusable portions of the address space -# due to CCM memory wrapping into upper addresses beyond its size - -MEMORY { -# SYSTEM0 : ORIGIN = 0x00000000, LENGTH = 0x20000000 - ICCM0 : ORIGIN = 0x20000000, LENGTH = 0x00040000 -# CCMWRAP0: ORIGIN = 0x20040000, LENGTH = 0x0ffc0000 -# SYSTEM1 : ORIGIN = 0x30000000, LENGTH = 0x50000000 - DCCM : ORIGIN = 0x80000000, LENGTH = 0x00020000 -# CCMWRAP1: ORIGIN = 0x80020000, LENGTH = 0x0ffe0000 -# SYSTEM2 : ORIGIN = 0x90000000, LENGTH = 0x30000000 - XCCM : ORIGIN = 0xc0000000, LENGTH = 0x00008000 -# CCMWRAP2: ORIGIN = 0xc0008000, LENGTH = 0x0fff8000 -# SYSTEM3 : ORIGIN = 0xd0000000, LENGTH = 0x10000000 - YCCM : ORIGIN = 0xe0000000, LENGTH = 0x00008000 -# CCMWRAP3: ORIGIN = 0xe0008000, LENGTH = 0x0fff8000 -# SYSTEM4 : ORIGIN = 0xf0000000, LENGTH = 0x10000000 - } -SECTIONS { - GROUP: { - .text? : { *('.text$crt*') } - * (TEXT): {} - * (LIT): {} - } > ICCM0 - - GROUP: { - /* _SDA_BASE_ computed implicitly */ - .sdata?: {} - .sbss?: {} - * (DATA): {} - * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:32768): {} - .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - } > DCCM - GROUP: { - .Xdata? : {} - } > XCCM - GROUP: { - .Ydata? : {} - } > YCCM - GROUP BIND(0x0): { - .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:684): {} = FILL(0xa5a5a5a5,4) - } - } diff --git a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf b/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf deleted file mode 100644 index 004215a2f6a..00000000000 --- a/tensorflow/lite/micro/tools/make/targets/arc/iotdk/iotdk.tcf +++ /dev/null @@ -1,4621 +0,0 @@ - - - - - - - - - - - - - - - - - - - - 10*2) -# -# The speed of simulation can be greatly increased by using a faster JTAG clock, but a dependency will warn if it exceeds 1/2 of the cpu clock. -# --jtag_tclk 4 - -# execution_trace_level --- -# This traces committed instructions as they execute, and gathers statistics -# visible in the debugger for counting instructions & cycle delays. -# At the "stats" level ony the statistics are gathered and no trace is printed. -# "file" is equivalent to "full", but the results go to a trace .txt file instead. -# --execution_trace_level stats - -# generate_ipxact --- -# Generate ipxact.xml file describing the CPUisle or archipelago frontier -# --generate_ipxact false - -# ipxact_relative_path_names --- -# Use relative path names for Verilog files in the ipxact. -# Otherwise, absolute path names are used. -# --ipxact_relative_path_names true - -# optional_encryption --- -# When selected, encrypted RTL output is generated. -# --optional_encryption false - -# ignore_encrypt_license --- -# When selected, pretend the encryption license is missing. For testing. -# --ignore_encrypt_license false - -# ignore_clear_license --- -# When selected, pretend the cleartest license is missing. For testing. -# --ignore_clear_license false - - -######## Tool Configuration --- cgen.1_0 ######## - -# Create Tool Configuration --create cgen.1_0 "System.Tool Configuration" - -# mwdt_version --- Selects the MetaWare version to be used with the TCF file. -# Change from the default to an older or newer toolset version if you want the TCF file to be used with an older or newer version of the MetaWare tools. --mwdt_version K-2015.09 - -# code_base_addr --- -# The base address to assign to the executable code segment in the linker command file when there is no ICCM in the build. This value is ignored when there is an ICCM. -# --code_base_addr 0 - -# data_base_addr --- -# The base address to assign to the data segment in the linker command file when the data is not being mapped to a DCCM. This value is ignored when the data segment is mapped to a DCCM, as in that case the base address of the DCCM memory is used. -# -# A value of 0xffffffff means that the data segment will not be mapped to any specific address. -# --data_base_addr 4294967295 - - -######## IO Software --- com.arc.software.dfss.sw_io.1_0 ######## - -# Create IO Software --create com.arc.software.dfss.sw_io.1_0 "System.IO Software" - -# sw_io --- Command line option for Software element 'IO Software' --sw_io true - - -######## DSP Software --- com.arc.software.dfss.sw_dsp.1_0 ######## - -# Create DSP Software --create com.arc.software.dfss.sw_dsp.1_0 "System.DSP Software" - -# sw_dsp --- Command line option for Software element 'DSP Software' --sw_dsp true - - -######## Infrastructure Software --- com.arc.software.dfss.sw_infra.1_0 ######## - -# Create Infrastructure Software --create com.arc.software.dfss.sw_infra.1_0 "System.Infrastructure Software" - -# sw_infra --- Command line option for Software element 'Infrastructure Software' --sw_infra true - - -######## CPUisle --- com.arc.hardware.CPU_isle.1_0 ######## - -# Create CPUisle --create com.arc.hardware.CPU_isle.1_0 System.CPUisle - -# unique_name --- verilog module modifier prefix --unique_name "" - -# ArcNum --- The processor number as read back in the ARCNUM field of the IDENTITY register. --arc_num 1 - -# instances --- -# The number of instantiations of this core. -# --instances 1 - -# CPUFloorplan --- Floorplan giving relative placement of the RAMs for the given configuration of ARCv2HS or ARCv2EM in this CPUisle --cpu_floorplan em9d_xyccm - -# userCPUFloorplanPath --- Pathname of user floorplan for the CPU when using a hierarchical implementation --usercpufloorplan_path "" - -# pinLocationConstraintsFile --- Pathname+filename of the physical pin location constraints file or just "side1" (all pins on l.h.s) or "side2" (pins on top only) or "side3" (pins on r.h.s. only) or "side4" (pins on bottom only) to get a template file generated --pin_location_constraints_file "" - - -######## ARCv2EM --- com.arc.hardware.ARCv2EM.1_0 ######## - -# Create ARCv2EM --create com.arc.hardware.ARCv2EM.1_0 System.CPUisle.ARCv2EM - -# arcv2em --- Description to follow --arcv2em true - -# def_div2ref --- This specifies the clock division factor at reset. It is used for mss clock controller to generate core clock, and the value N means core is running at (1/N) x ref_clk. --def_div2ref 1 - -# addr_size --- This defines the address bus width (in bits). --addr_size 32 - -# pc_size --- This defines the program counter (in bits). --pc_size 32 - -# lpc_size --- This defines the size of the loop counter (in bits). --lpc_size 32 - -# halt_on_reset --- This defines whether the core is halted initially on reset. --halt_on_reset true - -# byte_order --- This defines the endianness of the core. --byte_order little - -# code_density_option --- This reduces the size of program memory by adding instructions that condense commonly used instruction patterns with some marginal increase in processor gate count. The added instructions are ENTER_S, LEAVE_S, JLI_S, BI, BIH. --code_density_option true - -# bitscan_option --- This adds instructions for efficient search of bits within a 32 bit word, including normalize (NORM, NORMH, NORMW) and find first or last set bit (FFS, FLS) instructions. --bitscan_option true - -# shift_option --- The Shift ISA option adds variable and multi-length shift rotation instructions: (0) No shift/rotation instructions (1) ASR16, ASR8, LSR8, LSL8, ROL8, ROR8 (2) ASRM, ASLM, LSRM, RORM (3) ASR16, ASR8, LSR8, LSL8, ROL8, ROR8, ASRM, ASLM, LSRM, RORM --shift_option 3 - -# swap_option --- This adds two instructions used to swap half-words or bytes in a 32b word. Useful for converting between little to big endianess and vice-versa. --swap_option true - -# div_rem_option --- The DIV/REM option adds non-blocking multi-cycle implementation of integer divide/remainder functions. Added instructions are DIV, DIVU (integer divide), REM and REMU (integer divide remainder).radix2 takes 33 cycles. radix4_enhanced takes 3 to 19 cycles per operation. --div_rem_option none - -# mpy_option --- The Multiplier ISA option allows selection between several multiplier configurations to tradeoff performance with silicon area. -# For select multiply options, when the DIV/REM option is also selected, some datapath resources will be shared between the multiply and divide pipeline to minimize total area. -# -# Cycle count (16-bit, lower 32-bit or upper 32-bit) for the different configurations is as follows: -#

-# 
-# option  16/L32/U32  Instructions
-# ------  ----------  ---------------------
-#       
-# none	  -/-/-     None
-# wlh1	  1/1/1     MPYW/U, MPY/U, MPYH/U
-# wlh2	  2/2/2     MPYW/U, MPY/U, MPYH/U
-# wlh3	  2/3/3     MPYW/U, MPY/U, MPYH/U
-# wlh4	  2/4/5     MPYW/U, MPY/U, MPYH/U
-# wlh5	  5/9/9     MPYW/U, MPY/U, MPYH/U
-# 
-# --mpy_option none - -# code_protection --- The ARC EM architecture divides the memory into 16 regions, which can be protected individually. This feature adds a 16-bit input to the processor core, one bit per region. When the protect bit is set, the processor disables any load or store to the corresponding region. An attempt to access a protected region raises an EV_ProtV exception. --code_protection true - -# stack_checking --- Stack checking is a mechanism for checking stack accesses and raising an exception when a stack overflow or underflow is detected. --stack_checking true - -# unaligned_option --- This enables unaligned loads and stores. --unaligned_option true - -# intvbase_preset --- This sets the interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE. --intvbase_preset 0 - -# rgf_impl --- This defines whether the register file is implemented using flip-flops, or with a hard macro. --rgf_impl flip_flops - -# rgf_num_regs --- This defines the size (in 32b register) of the processor register file. --rgf_num_regs 32 - -# rgf_wr_ports --- This defines the number of write ports on the register file. --rgf_wr_ports 2 - -# rgf_num_banks --- Dual register banks are useful if Fast IRQ has been configured, but may be selected even if not. --rgf_num_banks 2 - -# rgf_banked_regs --- This selects the number of registers that are replicated in the second register-file bank. --rgf_banked_regs 32 - -# turbo_boost --- This enables the Turbo Boost synthesis option. By enabling this option, the achievable clock frequency is increased, but at the cost of an additional cycle latency on branch instructions. --turbo_boost false - -# infer_alu_adder --- infer: datapath is described as behavioral code: A + B -# instantiate: datapath is instantiated as a detailed multi-stage code of a carry-lookahead adder. It is generally preferable to use the infer option and add directives for your target synthesizer. --infer_alu_adder infer - -# infer_mpy_wtree --- infer: datapath is described as behavioral code: A * B (applies to only wlh3, wlh4 and wlh5 designs) -# instantiate: datapath is instantiated as a detailed multi-stage code of a Wallace Tree multiplier It is generally preferable to use the infer option and add directives for your target synthesizer. --infer_mpy_wtree instantiate - -# power_domains --- Adds three separate power domains to the core, and propagates power-gate control signals to the top level of the core. Also generates UPF constraints and commands in the low-power scripts --power_domains true - -# dvfs --- Adds logic to the core to allow dynamic controlling of voltage and frequency and propagates the associated control signals to the top level of core --dvfs true - -# voltage_domains --- Creates a voltage domain split between RAM and std cell parts to support Ultra Low Voltage on cells and generates UPF constraints --voltage_domains false - -# mem_bus_option --- The core supports three bus protocols for accessing external memory: AHB & AHB-Lite. AHB-Lite-single means instruction fetch and data access share a single AHB-Lite port. AHB-Lite-dual means separate AHB-Lite port for each initiator. --mem_bus_option AHB-Lite-dual - -# mem_bus_reg_interface --- Specifies whether the memory bus interface is registered. --mem_bus_reg_interface true - -# dmi_burst_option --- This will enable high-throughput burst support on the DMI slave interfaces. By enabling this option, the peak DMI read throughput goes from 1 word per 3 cycles to N words per N+2 cycles, in which N is the AHB burst lengthDMI write throughput goes from 1 word per 3 cycles to 1 word per cycle. --dmi_burst_option false - -# has_dmp_peripheral --- This option enables the redirection of load/store accesses to one segment (1/16) of the addressable space to a dedicated peripheral bus. This offers high system integration and reduces overall system cost. --has_dmp_peripheral false - -# per_bus_option --- The core supports one bus protocol for accessing the peripheral space, when enabled: AHB-Lite. --per_bus_option AHB-Lite - -# per_bus_reg_interface --- Specifies whether the peripheral bus interface is registered. --per_bus_reg_interface false - -# clock_gating --- This enables the insertion of architectural clock gate elements in the design. By enabling this option, the clocks to various parts of the design will be disabled when the logic they drive is not in use to save power. --clock_gating true - -# byte_parity --- If parity protection on the CCMs is configured, this option is used to enable parity protection on a per-byte basis. Otherwise, parity will be per word basis --byte_parity false - -# prot_pipelined --- Check the box if CCM memories are configured for ECC, and you want single-bit errors to be corrected, written back to memory, and re-fetched. When unchecked, single bit errors are corrected when read from memory, but the offending memory location itself is not corrected with a writeback --prot_pipelined false - -# cct_test_ena --- When ECC is configured, this option enables automatic generation of error conditions in relevant testbench memories to exercise error detection and correction features --cct_test_ena false - - -######## AGU --- com.arc.hardware.AGU.1_0 ######## - -# Create AGU --create com.arc.hardware.AGU.1_0 System.CPUisle.ARCv2EM.AGU - -# agu_size --- Predefined configurations of modifiers, address -# pointers and offset registers -#
-# 
-#         address     address                     
-#         pointers    offset regs      modifiers  
-#        ----------- --------------- ------------ 
-# small:     4           2                 4      
-# medium:    8           4                 12     
-# large:     12          8                 24     
-# 
-# --agu_size small - -# agu_accord --- Enable the accordion stage if operating frequency is critical --agu_accord true - -# agu_wb_depth --- Write buffer depth --agu_wb_depth 2 - - -######## DSP --- com.arc.hardware.DSP.1_0 ######## - -# Create DSP --create com.arc.hardware.DSP.1_0 System.CPUisle.ARCv2EM.DSP - -# dsp_complex --- Enable/disable support for single cycle 16b+16b complex instructions and butterfly operations, else 2-cycle complex instructions only without butterfly support --dsp_complex true - -# dsp_itu --- Enable/disable support for ITU bit-accurate 1 bit fractional shift before accumulation, else 1-bit fractional shift result after accumulation only --dsp_itu true - -# dsp_divsqrt --- Enable/disable support for divide and square root operations: DIV(U), REM(U), SQRT --dsp_divsqrt radix2 - -# dsp_accshift --- Select support for accumulator shift operations: no supported, limited shift support only or full shift support and convergent rounding --dsp_accshift full - -# dsp_impl --- The datapath components may be inferred from Verilog for better area or optimized using carry-save components for better timing --dsp_impl optimized - - -######## Interrupt Controller --- com.arc.hardware.Interrupt_Controller.1_0 ######## - -# Create Interrupt Controller --create com.arc.hardware.Interrupt_Controller.1_0 "System.CPUisle.ARCv2EM.Interrupt Controller" - -# number_of_interrupts --- This is the total number of interrupts available to the core. Some interrupts are allocated statically to a specific interrupt line (for example, timer interrupts). For more information on Interrupt and register-file options, see DesignWare ARCv2 ISA Programmers Reference Manual. --number_of_interrupts 95 - -# number_of_levels --- Priority levels in the interrupt controller. --number_of_levels 4 - -# external_interrupts --- This is the total number of interrupt pins available for external system components. This parameter must be less than the total number of interrupts. --external_interrupts 60 - -# firq_option --- This enables the fast-interrupts option, (priority level 0 interrupts), which uses an alternate register bank (if configured) instead of saving the context to memory. --firq_option true - - -######## Timer 0 --- com.arc.hardware.Timer_0.1_0 ######## - -# Create Timer 0 --create com.arc.hardware.Timer_0.1_0 "System.CPUisle.ARCv2EM.Timer 0" - -# timer_0_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 0. --timer_0_int_level 1 - - -######## Timer 1 --- com.arc.hardware.Timer_1.1_0 ######## - -# Create Timer 1 --create com.arc.hardware.Timer_1.1_0 "System.CPUisle.ARCv2EM.Timer 1" - -# timer_1_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 1. --timer_1_int_level 0 - - -######## Watchdog Timer --- com.arc.hardware.Watchdog_Timer.1_0 ######## - -# Create Watchdog Timer --create com.arc.hardware.Watchdog_Timer.1_0 "System.CPUisle.ARCv2EM.Watchdog Timer" - -# watchdog_size --- Specifies the bit width of the internal counter used within the timer. --watchdog_size 16 - -# watchdog_clk --- Specifies whether the timer should be driven from a separate clock. --watchdog_clk true - - -######## Data Memory Initiator --- com.arc.hardware.Data_Memory_Initiator.1_0 ######## - -# Create Data Memory Initiator --create com.arc.hardware.Data_Memory_Initiator.1_0 "System.CPUisle.ARCv2EM.Data Memory Initiator" - -######## Instruction Fetch Queue --- com.arc.hardware.Instruction_Fetch_Queue.1_0 ######## - -# Create Instruction Fetch Queue --create com.arc.hardware.Instruction_Fetch_Queue.1_0 "System.CPUisle.ARCv2EM.Instruction Fetch Queue" - -# ifqueue_size --- This defines the number of entires in the Instruction Fetch Queue. --ifqueue_size 4 - -# ifqueue_burst_size --- This sets the burst size for bus data transfers (in 32-bit words). It cannot exceed the number of entries. --ifqueue_burst_size 2 - - -######## DCCM --- com.arc.hardware.DCCM.1_0 ######## - -# Create DCCM --create com.arc.hardware.DCCM.1_0 System.CPUisle.ARCv2EM.DCCM - -# dccm_size --- This defines the size of the Data Closely Coupled Memory (DCCM) in bytes --dccm_size 131072 - -# dccm_base --- Sets the initial memory region assignment for DCCM --dccm_base 8 - -# dccm_interleave --- Split DCCM into even/odd memory banks. --dccm_interleave false - -# dccm_prot --- Specifies the type of protection built for the DCCM. --dccm_prot None - -# dccm_prot_level --- Specifies the level protection. --dccm_prot_level Data_Only - -# dccm_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the DCCM --dccm_prot_exceptions true - -# dccm_dmi --- This enables external access through a DMI (direct memory interface) port. --dccm_dmi true - - -######## ICCM0 --- com.arc.hardware.ICCM0.1_0 ######## - -# Create ICCM0 --create com.arc.hardware.ICCM0.1_0 System.CPUisle.ARCv2EM.ICCM0 - -# iccm0_size --- This defines the size of ICCM0 in bytes.This ICCM has 0 wait states. --iccm0_size 262144 - -# iccm0_base --- Sets the initial memory region assignment for ICCM0 --iccm0_base 2 - -# iccm0_wide --- Creates ICCM0 as 64b memory to reduce accesses. --iccm0_wide true - -# iccm0_prot --- Specifies the type of protection built for ICCM0. --iccm0_prot None - -# iccm0_prot_level --- Specifies the level of protection. --iccm0_prot_level Data_Only - -# iccm0_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the ICCM0 --iccm0_prot_exceptions true - -# iccm0_dmi --- This enables external access through a DMI (direct memory interface) port. --iccm0_dmi true - - -######## XY --- com.arc.hardware.XY.1_0 ######## - -# Create XY --create com.arc.hardware.XY.1_0 System.CPUisle.ARCv2EM.XY - -# xy_config --- XY memory configuration: -# One memory: DCCM only. -# Two memories: DCCM + Y. -# Three memories: DCCM + X + Y. --xy_config dccm_x_y - -# xy_size --- Size of X and Y memories if included. -# X and Y memories both have the same configured size. --xy_size 32768 - -# xy_interleave --- Split XY memories into odd/even instances to enable single cycle unaligned access. --xy_interleave true - -# xy_x_base --- Base region for X memory. All accesses to this region will initiate a transfer on the X memory. --xy_x_base 12 - -# xy_y_base --- Base region for Y memory. All accesses to this region will initiate a transfer on the Y memory. --xy_y_base 14 - - -######## DMA Controller --- com.arc.hardware.DMA_Controller.1_0 ######## - -# Create DMA Controller --create com.arc.hardware.DMA_Controller.1_0 "System.CPUisle.ARCv2EM.DMA Controller" - -# dmac_channels --- This options specifies the number of DMA channels implemented in the DMA controller --dmac_channels 16 - -# dmac_fifo_depth --- This option specifies the DMA transfer FIFO depth in 32b words. --dmac_fifo_depth 4 - -# dmac_int_config --- None: the DMA controller cannot raise an interrupt -# Single-External: single done and single error interrupt signal for all DMA channels, and the interrupt signals are routed to a port at the top of the EM logical hierarchy -# Multiple-External: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are routed to ports at the top of the EM logical hierarchy -# Single-Internal: single done and single error interrupt signals for all DMA channels, and the interrupt signals are internal to the EM core -# Multiple-Internal: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are internal to the EM core --dmac_int_config Multiple-Internal - -# dmac_registers --- This option defines the number of DMA channels with their registers located in auxiliary space. --dmac_registers 16 - -# dmac_mem_if --- This option specifies whether the DMA controller system memory interface is integrated into the existing EM system memory interfaces or has its own interface. --dmac_mem_if separate - - -######## JTAG Interface --- com.arc.hardware.JTAG_Interface.1_0 ######## - -# Create JTAG Interface --create com.arc.hardware.JTAG_Interface.1_0 "System.CPUisle.ARCv2EM.JTAG Interface" - -######## Debug Interface --- com.arc.hardware.Debug_Interface.1_0 ######## - -# Create Debug Interface --create com.arc.hardware.Debug_Interface.1_0 "System.CPUisle.ARCv2EM.Debug Interface" - -######## Actionpoints --- com.arc.hardware.Actionpoints.1_0 ######## - -# Create Actionpoints --create com.arc.hardware.Actionpoints.1_0 System.CPUisle.ARCv2EM.Actionpoints - -# num_actionpoints --- This is the number of trigger events available. --num_actionpoints 8 - -# aps_feature --- Selects Actionpoint feature set --aps_feature min - - -######## SmaRT --- com.arc.hardware.SmaRT.1_0 ######## - -# Create SmaRT --create com.arc.hardware.SmaRT.1_0 System.CPUisle.ARCv2EM.SmaRT - -# smart_stack_entries --- This specifies the number of entries in the trace buffer. --smart_stack_entries 64 - -# smart_implementation --- Flip-flop = FF-based design. Memory = memory-based design (provides better density for larger trace buffers). --smart_implementation memory - - -######## Memory Protection Unit --- com.arc.hardware.Memory_Protection_Unit.1_0 ######## - -# Create Memory Protection Unit --create com.arc.hardware.Memory_Protection_Unit.1_0 "System.CPUisle.ARCv2EM.Memory Protection Unit" - -# mpu_num_regions --- Number of configured memory regions. --mpu_num_regions 16 - -# mpu_32b --- Set the minimal region size to be 32 byte instead of 2KB. --mpu_32b false - - -######## Floating-point unit --- com.arc.hardware.Floating_point_unit.1_0 ######## - -# Create Floating-point unit --create com.arc.hardware.Floating_point_unit.1_0 "System.CPUisle.ARCv2EM.Floating-point unit" - -# fpu_dp_assist --- This enables double-precision acceleration instructions. --fpu_dp_assist true - -# fpu_fma_option --- This enables the fused multiply-add & multiply-subtract instructions. --fpu_fma_option true - -# fpu_mas_cycles --- Make mul/add/sub multicycle to achieve a higher clock speed. --fpu_mas_cycles 2 - -# fpu_div_option --- This enables divide & square-root acceleration --fpu_div_option true - -# fpu_div_cycles --- "inferred" option infers DSP datapath elements from verilog operators for better area and "optimized" option selects hardware for better timing --fpu_div_cycles 17 - - -######## Performance Monitor --- com.arc.hardware.Performance_Monitor.1_0 ######## - -# Create Performance Monitor --create com.arc.hardware.Performance_Monitor.1_0 "System.CPUisle.ARCv2EM.Performance Monitor" - -# pct_counters --- Number of counters for performance monitoring. --pct_counters 8 - - -######## dsp_trig --- com.arc.hardware.dfss.dsp_trig.1_0 ######## - -# Create dsp_trig --create com.arc.hardware.dfss.dsp_trig.1_0 System.CPUisle.ARCv2EM.dsp_trig - -# dsp_trig --- Command line option for EIA extension component 'dsp_trig'. --dsp_trig true - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_4b0 --- com.arc.hardware.dfss.io_gpio_4b0.1_0 ######## - -# Create io_gpio_4b0 --create com.arc.hardware.dfss.io_gpio_4b0.1_0 System.CPUisle.ARCv2EM.io_gpio_4b0 - -# io_gpio_4b0 --- Command line option for EIA extension component 'io_gpio_4b0'. --io_gpio_4b0 true - -# io_gpio_4b0_debounce --- Selects the inclusion of Debounce logic --io_gpio_4b0_debounce 1 - -# io_gpio_4b0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_4b0_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_4b1 --- com.arc.hardware.dfss.io_gpio_4b1.1_0 ######## - -# Create io_gpio_4b1 --create com.arc.hardware.dfss.io_gpio_4b1.1_0 System.CPUisle.ARCv2EM.io_gpio_4b1 - -# io_gpio_4b1 --- Command line option for EIA extension component 'io_gpio_4b1'. --io_gpio_4b1 true - -# io_gpio_4b1_debounce --- Selects the inclusion of Debounce logic --io_gpio_4b1_debounce 1 - -# io_gpio_4b1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_4b1_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_4b2 --- com.arc.hardware.dfss.io_gpio_4b2.1_0 ######## - -# Create io_gpio_4b2 --create com.arc.hardware.dfss.io_gpio_4b2.1_0 System.CPUisle.ARCv2EM.io_gpio_4b2 - -# io_gpio_4b2 --- Command line option for EIA extension component 'io_gpio_4b2'. --io_gpio_4b2 true - -# io_gpio_4b2_debounce --- Selects the inclusion of Debounce logic --io_gpio_4b2_debounce 1 - -# io_gpio_4b2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_4b2_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_8b0 --- com.arc.hardware.dfss.io_gpio_8b0.1_0 ######## - -# Create io_gpio_8b0 --create com.arc.hardware.dfss.io_gpio_8b0.1_0 System.CPUisle.ARCv2EM.io_gpio_8b0 - -# io_gpio_8b0 --- Command line option for EIA extension component 'io_gpio_8b0'. --io_gpio_8b0 true - -# io_gpio_8b0_debounce --- Selects the inclusion of Debounce logic --io_gpio_8b0_debounce 1 - -# io_gpio_8b0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_8b0_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_8b1 --- com.arc.hardware.dfss.io_gpio_8b1.1_0 ######## - -# Create io_gpio_8b1 --create com.arc.hardware.dfss.io_gpio_8b1.1_0 System.CPUisle.ARCv2EM.io_gpio_8b1 - -# io_gpio_8b1 --- Command line option for EIA extension component 'io_gpio_8b1'. --io_gpio_8b1 true - -# io_gpio_8b1_debounce --- Selects the inclusion of Debounce logic --io_gpio_8b1_debounce 1 - -# io_gpio_8b1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_8b1_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_8b2 --- com.arc.hardware.dfss.io_gpio_8b2.1_0 ######## - -# Create io_gpio_8b2 --create com.arc.hardware.dfss.io_gpio_8b2.1_0 System.CPUisle.ARCv2EM.io_gpio_8b2 - -# io_gpio_8b2 --- Command line option for EIA extension component 'io_gpio_8b2'. --io_gpio_8b2 true - -# io_gpio_8b2_debounce --- Selects the inclusion of Debounce logic --io_gpio_8b2_debounce 1 - -# io_gpio_8b2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_8b2_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio_8b3 --- com.arc.hardware.dfss.io_gpio_8b3.1_0 ######## - -# Create io_gpio_8b3 --create com.arc.hardware.dfss.io_gpio_8b3.1_0 System.CPUisle.ARCv2EM.io_gpio_8b3 - -# io_gpio_8b3 --- Command line option for EIA extension component 'io_gpio_8b3'. --io_gpio_8b3 true - -# io_gpio_8b3_debounce --- Selects the inclusion of Debounce logic --io_gpio_8b3_debounce 1 - -# io_gpio_8b3_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio_8b3_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2c_mst0 --- com.arc.hardware.dfss.io_i2c_mst0.1_0 ######## - -# Create io_i2c_mst0 --create com.arc.hardware.dfss.io_i2c_mst0.1_0 System.CPUisle.ARCv2EM.io_i2c_mst0 - -# io_i2c_mst0 --- Command line option for APEX extension component 'io_i2c_mst0'. --io_i2c_mst0 true - -# io_i2c_mst0_fs --- RX/TX FIFO size --io_i2c_mst0_fs 16 - -# io_i2c_mst0_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst0_dma_support None - -# io_i2c_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst0_cdc_included 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2c_mst1 --- com.arc.hardware.dfss.io_i2c_mst1.1_0 ######## - -# Create io_i2c_mst1 --create com.arc.hardware.dfss.io_i2c_mst1.1_0 System.CPUisle.ARCv2EM.io_i2c_mst1 - -# io_i2c_mst1 --- Command line option for APEX extension component 'io_i2c_mst1'. --io_i2c_mst1 true - -# io_i2c_mst1_fs --- RX/TX FIFO size --io_i2c_mst1_fs 16 - -# io_i2c_mst1_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst1_dma_support None - -# io_i2c_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst1_cdc_included 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2c_mst2 --- com.arc.hardware.dfss.io_i2c_mst2.1_0 ######## - -# Create io_i2c_mst2 --create com.arc.hardware.dfss.io_i2c_mst2.1_0 System.CPUisle.ARCv2EM.io_i2c_mst2 - -# io_i2c_mst2 --- Command line option for APEX extension component 'io_i2c_mst2'. --io_i2c_mst2 true - -# io_i2c_mst2_fs --- RX/TX FIFO size --io_i2c_mst2_fs 16 - -# io_i2c_mst2_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst2_dma_support None - -# io_i2c_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst2_cdc_included 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst0 --- com.arc.hardware.dfss.io_spi_mst0.1_0 ######## - -# Create io_spi_mst0 --create com.arc.hardware.dfss.io_spi_mst0.1_0 System.CPUisle.ARCv2EM.io_spi_mst0 - -# io_spi_mst0 --- Command line option for APEX extension component 'io_spi_mst0'. --io_spi_mst0 true - -# io_spi_mst0_fz --- RX/TX FIFO depth --io_spi_mst0_fs 16 - -# io_spi_mst0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst0_max_xfer_size 16 - -# io_spi_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst0_cdc_included 1 - -# io_spi_mst0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst0_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst1 --- com.arc.hardware.dfss.io_spi_mst1.1_0 ######## - -# Create io_spi_mst1 --create com.arc.hardware.dfss.io_spi_mst1.1_0 System.CPUisle.ARCv2EM.io_spi_mst1 - -# io_spi_mst1 --- Command line option for APEX extension component 'io_spi_mst1'. --io_spi_mst1 true - -# io_spi_mst1_fz --- RX/TX FIFO depth --io_spi_mst1_fs 16 - -# io_spi_mst1_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst1_max_xfer_size 16 - -# io_spi_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst1_cdc_included 1 - -# io_spi_mst1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst1_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst2 --- com.arc.hardware.dfss.io_spi_mst2.1_0 ######## - -# Create io_spi_mst2 --create com.arc.hardware.dfss.io_spi_mst2.1_0 System.CPUisle.ARCv2EM.io_spi_mst2 - -# io_spi_mst2 --- Command line option for APEX extension component 'io_spi_mst2'. --io_spi_mst2 true - -# io_spi_mst2_fz --- RX/TX FIFO depth --io_spi_mst2_fs 16 - -# io_spi_mst2_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst2_max_xfer_size 16 - -# io_spi_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst2_cdc_included 1 - -# io_spi_mst2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst2_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_slv0 --- com.arc.hardware.dfss.io_spi_slv0.1_0 ######## - -# Create io_spi_slv0 --create com.arc.hardware.dfss.io_spi_slv0.1_0 System.CPUisle.ARCv2EM.io_spi_slv0 - -# io_spi_slv0 --- Command line option for APEX extension component 'io_spi_slv0'. --io_spi_slv0 true - -# io_spi_slv0_fz --- RX/TX FIFO depth --io_spi_slv0_fs 16 - -# io_spi_slv0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_slv0_max_xfer_size 16 - -# io_spi_slv0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_slv0_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart0 --- com.arc.hardware.dfss.io_uart0.1_0 ######## - -# Create io_uart0 --create com.arc.hardware.dfss.io_uart0.1_0 System.CPUisle.ARCv2EM.io_uart0 - -# io_uart0 --- Command line option for EIA extension component 'io_uart0'. --io_uart0 true - -# io_uart0_fifo_mode --- Set the UART FIFO mode --io_uart0_fifo_mode 16 - -# io_uart0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart0_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart1 --- com.arc.hardware.dfss.io_uart1.1_0 ######## - -# Create io_uart1 --create com.arc.hardware.dfss.io_uart1.1_0 System.CPUisle.ARCv2EM.io_uart1 - -# io_uart1 --- Command line option for EIA extension component 'io_uart1'. --io_uart1 true - -# io_uart1_fifo_mode --- Set the UART FIFO mode --io_uart1_fifo_mode 16 - -# io_uart1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart1_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart2 --- com.arc.hardware.dfss.io_uart2.1_0 ######## - -# Create io_uart2 --create com.arc.hardware.dfss.io_uart2.1_0 System.CPUisle.ARCv2EM.io_uart2 - -# io_uart2 --- Command line option for EIA extension component 'io_uart2'. --io_uart2 true - -# io_uart2_fifo_mode --- Set the UART FIFO mode --io_uart2_fifo_mode 16 - -# io_uart2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart2_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart3 --- com.arc.hardware.dfss.io_uart3.1_0 ######## - -# Create io_uart3 --create com.arc.hardware.dfss.io_uart3.1_0 System.CPUisle.ARCv2EM.io_uart3 - -# io_uart3 --- Command line option for EIA extension component 'io_uart3'. --io_uart3 true - -# io_uart3_fifo_mode --- Set the UART FIFO mode --io_uart3_fifo_mode 16 - -# io_uart3_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart3_dma_support Aux-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_creg_mst0 --- com.arc.hardware.dfss.io_creg_mst0.1_0 ######## - -# Create io_creg_mst0 --create com.arc.hardware.dfss.io_creg_mst0.1_0 System.CPUisle.ARCv2EM.io_creg_mst0 - -# io_creg_mst0 --- Command line option for EIA extension component 'io_creg_mst0'. --io_creg_mst0 true - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_creg_slv0 --- com.arc.hardware.dfss.io_creg_slv0.1_0 ######## - -# Create io_creg_slv0 --create com.arc.hardware.dfss.io_creg_slv0.1_0 System.CPUisle.ARCv2EM.io_creg_slv0 - -# io_creg_slv0 --- Command line option for EIA extension component 'io_creg_slv0'. --io_creg_slv0 true - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## subsys_bcr --- com.arc.hardware.dfss.subsys_bcr.1_0 ######## - -# Create subsys_bcr --create com.arc.hardware.dfss.subsys_bcr.1_0 System.CPUisle.ARCv2EM.subsys_bcr - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## subsys_infra --- com.arc.hardware.dfss.subsys_infra.1_0 ######## - -# Create subsys_infra --create com.arc.hardware.dfss.subsys_infra.1_0 System.subsys_infra - -# subsys_infra --- Command line option for EIA glue logic. --subsys_infra true - -# internal_interrupt --- Connect the IO interrupts internally --internal_interrupt true - -# internal_dma_handshake --- Connect the DMA handshake signals internally --internal_dma_handshake true - - -######## ARConnect --- com.arc.hardware.ARConnect.1_0 ######## - -# Create ARConnect --create com.arc.hardware.ARConnect.1_0 System.ARConnect - -# mcip_def_div2ref --- This specifies the clock division factor at reset. It is used for mss clock controller to generate ARConnect clock, and the value N means ARConnect is running at (1/N) x ref_clk. --mcip_def_div2ref 1 - -# mcip_has_intrpt --- This specifies whether the Inter-core Interrupt Unit exists --mcip_has_intrpt false - -# mcip_has_sema --- This specifies whether the Inter-core Semaphore Unit exists --mcip_has_sema false - -# mcip_sema_num --- This specifies the number of semaphores in the Inter-core Semaphores Unit --mcip_sema_num 16 - -# mcip_has_msg_sram --- This specifies whether the Inter-core Message Unit exists --mcip_has_msg_sram false - -# mcip_msg_sram_size --- This specifies the bytes of SRAM in the Inter-core Message Unit --mcip_msg_sram_size 512 - -# mcip_msg_1cycle --- True: The access path to message SRAM is 1 clock cycle; False: The access path to message SRAM 1.5 cycles. Note: The 1.5 cycles path use clock negetive edge for SRAM, but can acheive higher frequency. No performance difference caused by the value of this option --mcip_msg_1cycle false - -# mcip_has_debug --- This specifies whether the Inter-core Debug Unit exists --mcip_has_debug false - -# mcip_has_grtc --- This specifies whether the Global Real-Time Counter Unit exists --mcip_has_grtc false - -# mcip_has_pmu --- This specifies whether the external Power Management Unit exists --mcip_has_pmu true - -# mcip_power_domains --- This specifies whether the ARConnect Power Domain Management Unit exists --mcip_power_domains true - -# mcip_llm_size --- This specifies the KBytes of SRAM in the Low Latency Memory Unit --mcip_llm_size 32 - -# mcip_llm_base --- This specifies the default memory region of Low Latency Memory Unit --mcip_llm_base 2 - -# mcip_llm_ecc --- This specifies the ECC mode of SRAM in Low Latency Memory Unit. none = No checking; parity = Parity only; SECDED = single-error correction and double-error detection (SECDED) --mcip_llm_ecc SECDED - -# mcip_idu_cirq_num --- This specifies the number of common interrupts supported by IDU --mcip_idu_cirq_num 4 - -# mcip_bsu_dbw --- This specifies the data bus width of Bus Slave Unit --mcip_bsu_dbw 64 - -# mcip_bsu_type --- This specifies the bus protocol of Bus Slave Unit --mcip_bsu_type AXI - - -]]> - - - - - - - - - - - - - - - ICCM0 - - GROUP: { - /* _SDA_BASE_ computed implicitly */ - .sdata?: {} - .sbss?: {} - * (DATA): {} - * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:32768): {} - .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - } > DCCM - GROUP: { - .Xdata? : {} - } > XCCM - GROUP: { - .Ydata? : {} - } > YCCM - GROUP BIND(0x0): { - .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:684): {} = FILL(0xa5a5a5a5,4) - } - } - -]]> - - - - - - 0x07, sub_opcode => 0x1E , latency_cycles => 8) - -// User extension instruction - dsp_sin -extern long dsp_sin(long); -#pragma intrinsic(dsp_sin, opcode => 0x07, sub_opcode => 0x1F , latency_cycles => 8) - -// User extension instruction - dsp_tan -extern long dsp_tan(long); -#pragma intrinsic(dsp_tan, opcode => 0x07, sub_opcode => 0x22 , latency_cycles => 11) - -// User extension instruction - dsp_acos -extern long dsp_acos(long); -#pragma intrinsic(dsp_acos, opcode => 0x07, sub_opcode => 0x23 , latency_cycles => 31) - -// User extension instruction - dsp_asin -extern long dsp_asin(long); -#pragma intrinsic(dsp_asin, opcode => 0x07, sub_opcode => 0x24 , latency_cycles => 31) - -// User extension instruction - dsp_atan -extern long dsp_atan(long); -#pragma intrinsic(dsp_atan, opcode => 0x07, sub_opcode => 0x25 , latency_cycles => 13) - -// User extension instruction - dsp_sqrt -extern long dsp_sqrt(long); -#pragma intrinsic(dsp_sqrt, opcode => 0x07, sub_opcode => 0x20 , latency_cycles => 31) - -// User extension instruction - dsp_sqrt15 -extern long dsp_sqrt15(long); -#pragma intrinsic(dsp_sqrt15, opcode => 0x07, sub_opcode => 0x21 , latency_cycles => 15) - -#define APEX_COM_ARC_HARDWARE_DFSS_DSP_TRIG_PRESENT 1 -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B0_IO_GPIO_4B0_PRESENT 1 - -// User extension aux register io_gpio_4b0_debounce -#define AR_IO_GPIO_4B0_DEBOUNCE 0x80017c48 -#pragma Aux_register(0x80017c48, name=>"io_gpio_4b0_debounce") - -// User extension aux register io_gpio_4b0_clken -#define AR_IO_GPIO_4B0_CLKEN 0x80017c80 -#pragma Aux_register(0x80017c80, name=>"io_gpio_4b0_clken") - -// User extension aux register io_gpio_4b0_swporta_dr -#define AR_IO_GPIO_4B0_SWPORTA_DR 0x80017c00 -#pragma Aux_register(0x80017c00, name=>"io_gpio_4b0_swporta_dr") - -// User extension aux register io_gpio_4b0_swporta_ddr -#define AR_IO_GPIO_4B0_SWPORTA_DDR 0x80017c04 -#pragma Aux_register(0x80017c04, name=>"io_gpio_4b0_swporta_ddr") - -// User extension aux register io_gpio_4b0_inten -#define AR_IO_GPIO_4B0_INTEN 0x80017c30 -#pragma Aux_register(0x80017c30, name=>"io_gpio_4b0_inten") - -// User extension aux register io_gpio_4b0_intmask -#define AR_IO_GPIO_4B0_INTMASK 0x80017c34 -#pragma Aux_register(0x80017c34, name=>"io_gpio_4b0_intmask") - -// User extension aux register io_gpio_4b0_inttype_level -#define AR_IO_GPIO_4B0_INTTYPE_LEVEL 0x80017c38 -#pragma Aux_register(0x80017c38, name=>"io_gpio_4b0_inttype_level") - -// User extension aux register io_gpio_4b0_int_polarity -#define AR_IO_GPIO_4B0_INT_POLARITY 0x80017c3c -#pragma Aux_register(0x80017c3c, name=>"io_gpio_4b0_int_polarity") - -// User extension aux register io_gpio_4b0_intstatus -#define AR_IO_GPIO_4B0_INTSTATUS 0x80017c40 -#pragma Aux_register(0x80017c40, name=>"io_gpio_4b0_intstatus") - -// User extension aux register io_gpio_4b0_raw_intstatus -#define AR_IO_GPIO_4B0_RAW_INTSTATUS 0x80017c44 -#pragma Aux_register(0x80017c44, name=>"io_gpio_4b0_raw_intstatus") - -// User extension aux register io_gpio_4b0_porta_eoi -#define AR_IO_GPIO_4B0_PORTA_EOI 0x80017c4c -#pragma Aux_register(0x80017c4c, name=>"io_gpio_4b0_porta_eoi") - -// User extension aux register io_gpio_4b0_ext_porta -#define AR_IO_GPIO_4B0_EXT_PORTA 0x80017c50 -#pragma Aux_register(0x80017c50, name=>"io_gpio_4b0_ext_porta") - -// User extension aux register io_gpio_4b0_ls_sync -#define AR_IO_GPIO_4B0_LS_SYNC 0x80017c60 -#pragma Aux_register(0x80017c60, name=>"io_gpio_4b0_ls_sync") - -// User extension aux register io_gpio_4b0_int_bothedge -#define AR_IO_GPIO_4B0_INT_BOTHEDGE 0x80017c68 -#pragma Aux_register(0x80017c68, name=>"io_gpio_4b0_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B1_IO_GPIO_4B1_PRESENT 1 - -// User extension aux register io_gpio_4b1_debounce -#define AR_IO_GPIO_4B1_DEBOUNCE 0x80017d48 -#pragma Aux_register(0x80017d48, name=>"io_gpio_4b1_debounce") - -// User extension aux register io_gpio_4b1_clken -#define AR_IO_GPIO_4B1_CLKEN 0x80017d80 -#pragma Aux_register(0x80017d80, name=>"io_gpio_4b1_clken") - -// User extension aux register io_gpio_4b1_swporta_dr -#define AR_IO_GPIO_4B1_SWPORTA_DR 0x80017d00 -#pragma Aux_register(0x80017d00, name=>"io_gpio_4b1_swporta_dr") - -// User extension aux register io_gpio_4b1_swporta_ddr -#define AR_IO_GPIO_4B1_SWPORTA_DDR 0x80017d04 -#pragma Aux_register(0x80017d04, name=>"io_gpio_4b1_swporta_ddr") - -// User extension aux register io_gpio_4b1_inten -#define AR_IO_GPIO_4B1_INTEN 0x80017d30 -#pragma Aux_register(0x80017d30, name=>"io_gpio_4b1_inten") - -// User extension aux register io_gpio_4b1_intmask -#define AR_IO_GPIO_4B1_INTMASK 0x80017d34 -#pragma Aux_register(0x80017d34, name=>"io_gpio_4b1_intmask") - -// User extension aux register io_gpio_4b1_inttype_level -#define AR_IO_GPIO_4B1_INTTYPE_LEVEL 0x80017d38 -#pragma Aux_register(0x80017d38, name=>"io_gpio_4b1_inttype_level") - -// User extension aux register io_gpio_4b1_int_polarity -#define AR_IO_GPIO_4B1_INT_POLARITY 0x80017d3c -#pragma Aux_register(0x80017d3c, name=>"io_gpio_4b1_int_polarity") - -// User extension aux register io_gpio_4b1_intstatus -#define AR_IO_GPIO_4B1_INTSTATUS 0x80017d40 -#pragma Aux_register(0x80017d40, name=>"io_gpio_4b1_intstatus") - -// User extension aux register io_gpio_4b1_raw_intstatus -#define AR_IO_GPIO_4B1_RAW_INTSTATUS 0x80017d44 -#pragma Aux_register(0x80017d44, name=>"io_gpio_4b1_raw_intstatus") - -// User extension aux register io_gpio_4b1_porta_eoi -#define AR_IO_GPIO_4B1_PORTA_EOI 0x80017d4c -#pragma Aux_register(0x80017d4c, name=>"io_gpio_4b1_porta_eoi") - -// User extension aux register io_gpio_4b1_ext_porta -#define AR_IO_GPIO_4B1_EXT_PORTA 0x80017d50 -#pragma Aux_register(0x80017d50, name=>"io_gpio_4b1_ext_porta") - -// User extension aux register io_gpio_4b1_ls_sync -#define AR_IO_GPIO_4B1_LS_SYNC 0x80017d60 -#pragma Aux_register(0x80017d60, name=>"io_gpio_4b1_ls_sync") - -// User extension aux register io_gpio_4b1_int_bothedge -#define AR_IO_GPIO_4B1_INT_BOTHEDGE 0x80017d68 -#pragma Aux_register(0x80017d68, name=>"io_gpio_4b1_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_4B2_IO_GPIO_4B2_PRESENT 1 - -// User extension aux register io_gpio_4b2_debounce -#define AR_IO_GPIO_4B2_DEBOUNCE 0x80017e48 -#pragma Aux_register(0x80017e48, name=>"io_gpio_4b2_debounce") - -// User extension aux register io_gpio_4b2_clken -#define AR_IO_GPIO_4B2_CLKEN 0x80017e80 -#pragma Aux_register(0x80017e80, name=>"io_gpio_4b2_clken") - -// User extension aux register io_gpio_4b2_swporta_dr -#define AR_IO_GPIO_4B2_SWPORTA_DR 0x80017e00 -#pragma Aux_register(0x80017e00, name=>"io_gpio_4b2_swporta_dr") - -// User extension aux register io_gpio_4b2_swporta_ddr -#define AR_IO_GPIO_4B2_SWPORTA_DDR 0x80017e04 -#pragma Aux_register(0x80017e04, name=>"io_gpio_4b2_swporta_ddr") - -// User extension aux register io_gpio_4b2_inten -#define AR_IO_GPIO_4B2_INTEN 0x80017e30 -#pragma Aux_register(0x80017e30, name=>"io_gpio_4b2_inten") - -// User extension aux register io_gpio_4b2_intmask -#define AR_IO_GPIO_4B2_INTMASK 0x80017e34 -#pragma Aux_register(0x80017e34, name=>"io_gpio_4b2_intmask") - -// User extension aux register io_gpio_4b2_inttype_level -#define AR_IO_GPIO_4B2_INTTYPE_LEVEL 0x80017e38 -#pragma Aux_register(0x80017e38, name=>"io_gpio_4b2_inttype_level") - -// User extension aux register io_gpio_4b2_int_polarity -#define AR_IO_GPIO_4B2_INT_POLARITY 0x80017e3c -#pragma Aux_register(0x80017e3c, name=>"io_gpio_4b2_int_polarity") - -// User extension aux register io_gpio_4b2_intstatus -#define AR_IO_GPIO_4B2_INTSTATUS 0x80017e40 -#pragma Aux_register(0x80017e40, name=>"io_gpio_4b2_intstatus") - -// User extension aux register io_gpio_4b2_raw_intstatus -#define AR_IO_GPIO_4B2_RAW_INTSTATUS 0x80017e44 -#pragma Aux_register(0x80017e44, name=>"io_gpio_4b2_raw_intstatus") - -// User extension aux register io_gpio_4b2_porta_eoi -#define AR_IO_GPIO_4B2_PORTA_EOI 0x80017e4c -#pragma Aux_register(0x80017e4c, name=>"io_gpio_4b2_porta_eoi") - -// User extension aux register io_gpio_4b2_ext_porta -#define AR_IO_GPIO_4B2_EXT_PORTA 0x80017e50 -#pragma Aux_register(0x80017e50, name=>"io_gpio_4b2_ext_porta") - -// User extension aux register io_gpio_4b2_ls_sync -#define AR_IO_GPIO_4B2_LS_SYNC 0x80017e60 -#pragma Aux_register(0x80017e60, name=>"io_gpio_4b2_ls_sync") - -// User extension aux register io_gpio_4b2_int_bothedge -#define AR_IO_GPIO_4B2_INT_BOTHEDGE 0x80017e68 -#pragma Aux_register(0x80017e68, name=>"io_gpio_4b2_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B0_IO_GPIO_8B0_PRESENT 1 - -// User extension aux register io_gpio_8b0_debounce -#define AR_IO_GPIO_8B0_DEBOUNCE 0x80017848 -#pragma Aux_register(0x80017848, name=>"io_gpio_8b0_debounce") - -// User extension aux register io_gpio_8b0_clken -#define AR_IO_GPIO_8B0_CLKEN 0x80017880 -#pragma Aux_register(0x80017880, name=>"io_gpio_8b0_clken") - -// User extension aux register io_gpio_8b0_swporta_dr -#define AR_IO_GPIO_8B0_SWPORTA_DR 0x80017800 -#pragma Aux_register(0x80017800, name=>"io_gpio_8b0_swporta_dr") - -// User extension aux register io_gpio_8b0_swporta_ddr -#define AR_IO_GPIO_8B0_SWPORTA_DDR 0x80017804 -#pragma Aux_register(0x80017804, name=>"io_gpio_8b0_swporta_ddr") - -// User extension aux register io_gpio_8b0_inten -#define AR_IO_GPIO_8B0_INTEN 0x80017830 -#pragma Aux_register(0x80017830, name=>"io_gpio_8b0_inten") - -// User extension aux register io_gpio_8b0_intmask -#define AR_IO_GPIO_8B0_INTMASK 0x80017834 -#pragma Aux_register(0x80017834, name=>"io_gpio_8b0_intmask") - -// User extension aux register io_gpio_8b0_inttype_level -#define AR_IO_GPIO_8B0_INTTYPE_LEVEL 0x80017838 -#pragma Aux_register(0x80017838, name=>"io_gpio_8b0_inttype_level") - -// User extension aux register io_gpio_8b0_int_polarity -#define AR_IO_GPIO_8B0_INT_POLARITY 0x8001783c -#pragma Aux_register(0x8001783c, name=>"io_gpio_8b0_int_polarity") - -// User extension aux register io_gpio_8b0_intstatus -#define AR_IO_GPIO_8B0_INTSTATUS 0x80017840 -#pragma Aux_register(0x80017840, name=>"io_gpio_8b0_intstatus") - -// User extension aux register io_gpio_8b0_raw_intstatus -#define AR_IO_GPIO_8B0_RAW_INTSTATUS 0x80017844 -#pragma Aux_register(0x80017844, name=>"io_gpio_8b0_raw_intstatus") - -// User extension aux register io_gpio_8b0_porta_eoi -#define AR_IO_GPIO_8B0_PORTA_EOI 0x8001784c -#pragma Aux_register(0x8001784c, name=>"io_gpio_8b0_porta_eoi") - -// User extension aux register io_gpio_8b0_ext_porta -#define AR_IO_GPIO_8B0_EXT_PORTA 0x80017850 -#pragma Aux_register(0x80017850, name=>"io_gpio_8b0_ext_porta") - -// User extension aux register io_gpio_8b0_ls_sync -#define AR_IO_GPIO_8B0_LS_SYNC 0x80017860 -#pragma Aux_register(0x80017860, name=>"io_gpio_8b0_ls_sync") - -// User extension aux register io_gpio_8b0_int_bothedge -#define AR_IO_GPIO_8B0_INT_BOTHEDGE 0x80017868 -#pragma Aux_register(0x80017868, name=>"io_gpio_8b0_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B1_IO_GPIO_8B1_PRESENT 1 - -// User extension aux register io_gpio_8b1_debounce -#define AR_IO_GPIO_8B1_DEBOUNCE 0x80017948 -#pragma Aux_register(0x80017948, name=>"io_gpio_8b1_debounce") - -// User extension aux register io_gpio_8b1_clken -#define AR_IO_GPIO_8B1_CLKEN 0x80017980 -#pragma Aux_register(0x80017980, name=>"io_gpio_8b1_clken") - -// User extension aux register io_gpio_8b1_swporta_dr -#define AR_IO_GPIO_8B1_SWPORTA_DR 0x80017900 -#pragma Aux_register(0x80017900, name=>"io_gpio_8b1_swporta_dr") - -// User extension aux register io_gpio_8b1_swporta_ddr -#define AR_IO_GPIO_8B1_SWPORTA_DDR 0x80017904 -#pragma Aux_register(0x80017904, name=>"io_gpio_8b1_swporta_ddr") - -// User extension aux register io_gpio_8b1_inten -#define AR_IO_GPIO_8B1_INTEN 0x80017930 -#pragma Aux_register(0x80017930, name=>"io_gpio_8b1_inten") - -// User extension aux register io_gpio_8b1_intmask -#define AR_IO_GPIO_8B1_INTMASK 0x80017934 -#pragma Aux_register(0x80017934, name=>"io_gpio_8b1_intmask") - -// User extension aux register io_gpio_8b1_inttype_level -#define AR_IO_GPIO_8B1_INTTYPE_LEVEL 0x80017938 -#pragma Aux_register(0x80017938, name=>"io_gpio_8b1_inttype_level") - -// User extension aux register io_gpio_8b1_int_polarity -#define AR_IO_GPIO_8B1_INT_POLARITY 0x8001793c -#pragma Aux_register(0x8001793c, name=>"io_gpio_8b1_int_polarity") - -// User extension aux register io_gpio_8b1_intstatus -#define AR_IO_GPIO_8B1_INTSTATUS 0x80017940 -#pragma Aux_register(0x80017940, name=>"io_gpio_8b1_intstatus") - -// User extension aux register io_gpio_8b1_raw_intstatus -#define AR_IO_GPIO_8B1_RAW_INTSTATUS 0x80017944 -#pragma Aux_register(0x80017944, name=>"io_gpio_8b1_raw_intstatus") - -// User extension aux register io_gpio_8b1_porta_eoi -#define AR_IO_GPIO_8B1_PORTA_EOI 0x8001794c -#pragma Aux_register(0x8001794c, name=>"io_gpio_8b1_porta_eoi") - -// User extension aux register io_gpio_8b1_ext_porta -#define AR_IO_GPIO_8B1_EXT_PORTA 0x80017950 -#pragma Aux_register(0x80017950, name=>"io_gpio_8b1_ext_porta") - -// User extension aux register io_gpio_8b1_ls_sync -#define AR_IO_GPIO_8B1_LS_SYNC 0x80017960 -#pragma Aux_register(0x80017960, name=>"io_gpio_8b1_ls_sync") - -// User extension aux register io_gpio_8b1_int_bothedge -#define AR_IO_GPIO_8B1_INT_BOTHEDGE 0x80017968 -#pragma Aux_register(0x80017968, name=>"io_gpio_8b1_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B2_IO_GPIO_8B2_PRESENT 1 - -// User extension aux register io_gpio_8b2_debounce -#define AR_IO_GPIO_8B2_DEBOUNCE 0x80017a48 -#pragma Aux_register(0x80017a48, name=>"io_gpio_8b2_debounce") - -// User extension aux register io_gpio_8b2_clken -#define AR_IO_GPIO_8B2_CLKEN 0x80017a80 -#pragma Aux_register(0x80017a80, name=>"io_gpio_8b2_clken") - -// User extension aux register io_gpio_8b2_swporta_dr -#define AR_IO_GPIO_8B2_SWPORTA_DR 0x80017a00 -#pragma Aux_register(0x80017a00, name=>"io_gpio_8b2_swporta_dr") - -// User extension aux register io_gpio_8b2_swporta_ddr -#define AR_IO_GPIO_8B2_SWPORTA_DDR 0x80017a04 -#pragma Aux_register(0x80017a04, name=>"io_gpio_8b2_swporta_ddr") - -// User extension aux register io_gpio_8b2_inten -#define AR_IO_GPIO_8B2_INTEN 0x80017a30 -#pragma Aux_register(0x80017a30, name=>"io_gpio_8b2_inten") - -// User extension aux register io_gpio_8b2_intmask -#define AR_IO_GPIO_8B2_INTMASK 0x80017a34 -#pragma Aux_register(0x80017a34, name=>"io_gpio_8b2_intmask") - -// User extension aux register io_gpio_8b2_inttype_level -#define AR_IO_GPIO_8B2_INTTYPE_LEVEL 0x80017a38 -#pragma Aux_register(0x80017a38, name=>"io_gpio_8b2_inttype_level") - -// User extension aux register io_gpio_8b2_int_polarity -#define AR_IO_GPIO_8B2_INT_POLARITY 0x80017a3c -#pragma Aux_register(0x80017a3c, name=>"io_gpio_8b2_int_polarity") - -// User extension aux register io_gpio_8b2_intstatus -#define AR_IO_GPIO_8B2_INTSTATUS 0x80017a40 -#pragma Aux_register(0x80017a40, name=>"io_gpio_8b2_intstatus") - -// User extension aux register io_gpio_8b2_raw_intstatus -#define AR_IO_GPIO_8B2_RAW_INTSTATUS 0x80017a44 -#pragma Aux_register(0x80017a44, name=>"io_gpio_8b2_raw_intstatus") - -// User extension aux register io_gpio_8b2_porta_eoi -#define AR_IO_GPIO_8B2_PORTA_EOI 0x80017a4c -#pragma Aux_register(0x80017a4c, name=>"io_gpio_8b2_porta_eoi") - -// User extension aux register io_gpio_8b2_ext_porta -#define AR_IO_GPIO_8B2_EXT_PORTA 0x80017a50 -#pragma Aux_register(0x80017a50, name=>"io_gpio_8b2_ext_porta") - -// User extension aux register io_gpio_8b2_ls_sync -#define AR_IO_GPIO_8B2_LS_SYNC 0x80017a60 -#pragma Aux_register(0x80017a60, name=>"io_gpio_8b2_ls_sync") - -// User extension aux register io_gpio_8b2_int_bothedge -#define AR_IO_GPIO_8B2_INT_BOTHEDGE 0x80017a68 -#pragma Aux_register(0x80017a68, name=>"io_gpio_8b2_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO_8B3_IO_GPIO_8B3_PRESENT 1 - -// User extension aux register io_gpio_8b3_debounce -#define AR_IO_GPIO_8B3_DEBOUNCE 0x80017b48 -#pragma Aux_register(0x80017b48, name=>"io_gpio_8b3_debounce") - -// User extension aux register io_gpio_8b3_clken -#define AR_IO_GPIO_8B3_CLKEN 0x80017b80 -#pragma Aux_register(0x80017b80, name=>"io_gpio_8b3_clken") - -// User extension aux register io_gpio_8b3_swporta_dr -#define AR_IO_GPIO_8B3_SWPORTA_DR 0x80017b00 -#pragma Aux_register(0x80017b00, name=>"io_gpio_8b3_swporta_dr") - -// User extension aux register io_gpio_8b3_swporta_ddr -#define AR_IO_GPIO_8B3_SWPORTA_DDR 0x80017b04 -#pragma Aux_register(0x80017b04, name=>"io_gpio_8b3_swporta_ddr") - -// User extension aux register io_gpio_8b3_inten -#define AR_IO_GPIO_8B3_INTEN 0x80017b30 -#pragma Aux_register(0x80017b30, name=>"io_gpio_8b3_inten") - -// User extension aux register io_gpio_8b3_intmask -#define AR_IO_GPIO_8B3_INTMASK 0x80017b34 -#pragma Aux_register(0x80017b34, name=>"io_gpio_8b3_intmask") - -// User extension aux register io_gpio_8b3_inttype_level -#define AR_IO_GPIO_8B3_INTTYPE_LEVEL 0x80017b38 -#pragma Aux_register(0x80017b38, name=>"io_gpio_8b3_inttype_level") - -// User extension aux register io_gpio_8b3_int_polarity -#define AR_IO_GPIO_8B3_INT_POLARITY 0x80017b3c -#pragma Aux_register(0x80017b3c, name=>"io_gpio_8b3_int_polarity") - -// User extension aux register io_gpio_8b3_intstatus -#define AR_IO_GPIO_8B3_INTSTATUS 0x80017b40 -#pragma Aux_register(0x80017b40, name=>"io_gpio_8b3_intstatus") - -// User extension aux register io_gpio_8b3_raw_intstatus -#define AR_IO_GPIO_8B3_RAW_INTSTATUS 0x80017b44 -#pragma Aux_register(0x80017b44, name=>"io_gpio_8b3_raw_intstatus") - -// User extension aux register io_gpio_8b3_porta_eoi -#define AR_IO_GPIO_8B3_PORTA_EOI 0x80017b4c -#pragma Aux_register(0x80017b4c, name=>"io_gpio_8b3_porta_eoi") - -// User extension aux register io_gpio_8b3_ext_porta -#define AR_IO_GPIO_8B3_EXT_PORTA 0x80017b50 -#pragma Aux_register(0x80017b50, name=>"io_gpio_8b3_ext_porta") - -// User extension aux register io_gpio_8b3_ls_sync -#define AR_IO_GPIO_8B3_LS_SYNC 0x80017b60 -#pragma Aux_register(0x80017b60, name=>"io_gpio_8b3_ls_sync") - -// User extension aux register io_gpio_8b3_int_bothedge -#define AR_IO_GPIO_8B3_INT_BOTHEDGE 0x80017b68 -#pragma Aux_register(0x80017b68, name=>"io_gpio_8b3_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST0_IO_I2C_MST0_PRESENT 1 - -// User extension aux register io_i2c_mst0_clken -#define AR_IO_I2C_MST0_CLKEN 0x800120c0 -#pragma Aux_register(0x800120c0, name=>"io_i2c_mst0_clken") - -// User extension aux register io_i2c_mst0_con -#define AR_IO_I2C_MST0_CON 0x80012000 -#pragma Aux_register(0x80012000, name=>"io_i2c_mst0_con") - -// User extension aux register io_i2c_mst0_tar -#define AR_IO_I2C_MST0_TAR 0x80012004 -#pragma Aux_register(0x80012004, name=>"io_i2c_mst0_tar") - -// User extension aux register io_i2c_mst0_data_cmd -#define AR_IO_I2C_MST0_DATA_CMD 0x80012010 -#pragma Aux_register(0x80012010, name=>"io_i2c_mst0_data_cmd") - -// User extension aux register io_i2c_mst0_ss_scl_hcnt -#define AR_IO_I2C_MST0_SS_SCL_HCNT 0x80012014 -#pragma Aux_register(0x80012014, name=>"io_i2c_mst0_ss_scl_hcnt") - -// User extension aux register io_i2c_mst0_ss_scl_lcnt -#define AR_IO_I2C_MST0_SS_SCL_LCNT 0x80012018 -#pragma Aux_register(0x80012018, name=>"io_i2c_mst0_ss_scl_lcnt") - -// User extension aux register io_i2c_mst0_fs_scl_hcnt -#define AR_IO_I2C_MST0_FS_SCL_HCNT 0x8001201c -#pragma Aux_register(0x8001201c, name=>"io_i2c_mst0_fs_scl_hcnt") - -// User extension aux register io_i2c_mst0_fs_scl_lcnt -#define AR_IO_I2C_MST0_FS_SCL_LCNT 0x80012020 -#pragma Aux_register(0x80012020, name=>"io_i2c_mst0_fs_scl_lcnt") - -// User extension aux register io_i2c_mst0_intr_stat -#define AR_IO_I2C_MST0_INTR_STAT 0x8001202c -#pragma Aux_register(0x8001202c, name=>"io_i2c_mst0_intr_stat") - -// User extension aux register io_i2c_mst0_intr_mask -#define AR_IO_I2C_MST0_INTR_MASK 0x80012030 -#pragma Aux_register(0x80012030, name=>"io_i2c_mst0_intr_mask") - -// User extension aux register io_i2c_mst0_raw_intr_stat -#define AR_IO_I2C_MST0_RAW_INTR_STAT 0x80012034 -#pragma Aux_register(0x80012034, name=>"io_i2c_mst0_raw_intr_stat") - -// User extension aux register io_i2c_mst0_rx_tl -#define AR_IO_I2C_MST0_RX_TL 0x80012038 -#pragma Aux_register(0x80012038, name=>"io_i2c_mst0_rx_tl") - -// User extension aux register io_i2c_mst0_tx_tl -#define AR_IO_I2C_MST0_TX_TL 0x8001203c -#pragma Aux_register(0x8001203c, name=>"io_i2c_mst0_tx_tl") - -// User extension aux register io_i2c_mst0_clr_intr -#define AR_IO_I2C_MST0_CLR_INTR 0x80012040 -#pragma Aux_register(0x80012040, name=>"io_i2c_mst0_clr_intr") - -// User extension aux register io_i2c_mst0_clr_rx_under -#define AR_IO_I2C_MST0_CLR_RX_UNDER 0x80012044 -#pragma Aux_register(0x80012044, name=>"io_i2c_mst0_clr_rx_under") - -// User extension aux register io_i2c_mst0_clr_rx_over -#define AR_IO_I2C_MST0_CLR_RX_OVER 0x80012048 -#pragma Aux_register(0x80012048, name=>"io_i2c_mst0_clr_rx_over") - -// User extension aux register io_i2c_mst0_clr_tx_over -#define AR_IO_I2C_MST0_CLR_TX_OVER 0x8001204c -#pragma Aux_register(0x8001204c, name=>"io_i2c_mst0_clr_tx_over") - -// User extension aux register io_i2c_mst0_clr_tx_abrt -#define AR_IO_I2C_MST0_CLR_TX_ABRT 0x80012054 -#pragma Aux_register(0x80012054, name=>"io_i2c_mst0_clr_tx_abrt") - -// User extension aux register io_i2c_mst0_clr_activity -#define AR_IO_I2C_MST0_CLR_ACTIVITY 0x8001205c -#pragma Aux_register(0x8001205c, name=>"io_i2c_mst0_clr_activity") - -// User extension aux register io_i2c_mst0_clr_stop_det -#define AR_IO_I2C_MST0_CLR_STOP_DET 0x80012060 -#pragma Aux_register(0x80012060, name=>"io_i2c_mst0_clr_stop_det") - -// User extension aux register io_i2c_mst0_clr_start_det -#define AR_IO_I2C_MST0_CLR_START_DET 0x80012064 -#pragma Aux_register(0x80012064, name=>"io_i2c_mst0_clr_start_det") - -// User extension aux register io_i2c_mst0_enable -#define AR_IO_I2C_MST0_ENABLE 0x8001206c -#pragma Aux_register(0x8001206c, name=>"io_i2c_mst0_enable") - -// User extension aux register io_i2c_mst0_status -#define AR_IO_I2C_MST0_STATUS 0x80012070 -#pragma Aux_register(0x80012070, name=>"io_i2c_mst0_status") - -// User extension aux register io_i2c_mst0_txflr -#define AR_IO_I2C_MST0_TXFLR 0x80012074 -#pragma Aux_register(0x80012074, name=>"io_i2c_mst0_txflr") - -// User extension aux register io_i2c_mst0_rxflr -#define AR_IO_I2C_MST0_RXFLR 0x80012078 -#pragma Aux_register(0x80012078, name=>"io_i2c_mst0_rxflr") - -// User extension aux register io_i2c_mst0_sda_hold -#define AR_IO_I2C_MST0_SDA_HOLD 0x8001207c -#pragma Aux_register(0x8001207c, name=>"io_i2c_mst0_sda_hold") - -// User extension aux register io_i2c_mst0_tx_abrt_source -#define AR_IO_I2C_MST0_TX_ABRT_SOURCE 0x80012080 -#pragma Aux_register(0x80012080, name=>"io_i2c_mst0_tx_abrt_source") - -// User extension aux register io_i2c_mst0_enable_status -#define AR_IO_I2C_MST0_ENABLE_STATUS 0x8001209c -#pragma Aux_register(0x8001209c, name=>"io_i2c_mst0_enable_status") - -// User extension aux register io_i2c_mst0_fs_spklen -#define AR_IO_I2C_MST0_FS_SPKLEN 0x800120a0 -#pragma Aux_register(0x800120a0, name=>"io_i2c_mst0_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST1_IO_I2C_MST1_PRESENT 1 - -// User extension aux register io_i2c_mst1_clken -#define AR_IO_I2C_MST1_CLKEN 0x800121c0 -#pragma Aux_register(0x800121c0, name=>"io_i2c_mst1_clken") - -// User extension aux register io_i2c_mst1_con -#define AR_IO_I2C_MST1_CON 0x80012100 -#pragma Aux_register(0x80012100, name=>"io_i2c_mst1_con") - -// User extension aux register io_i2c_mst1_tar -#define AR_IO_I2C_MST1_TAR 0x80012104 -#pragma Aux_register(0x80012104, name=>"io_i2c_mst1_tar") - -// User extension aux register io_i2c_mst1_data_cmd -#define AR_IO_I2C_MST1_DATA_CMD 0x80012110 -#pragma Aux_register(0x80012110, name=>"io_i2c_mst1_data_cmd") - -// User extension aux register io_i2c_mst1_ss_scl_hcnt -#define AR_IO_I2C_MST1_SS_SCL_HCNT 0x80012114 -#pragma Aux_register(0x80012114, name=>"io_i2c_mst1_ss_scl_hcnt") - -// User extension aux register io_i2c_mst1_ss_scl_lcnt -#define AR_IO_I2C_MST1_SS_SCL_LCNT 0x80012118 -#pragma Aux_register(0x80012118, name=>"io_i2c_mst1_ss_scl_lcnt") - -// User extension aux register io_i2c_mst1_fs_scl_hcnt -#define AR_IO_I2C_MST1_FS_SCL_HCNT 0x8001211c -#pragma Aux_register(0x8001211c, name=>"io_i2c_mst1_fs_scl_hcnt") - -// User extension aux register io_i2c_mst1_fs_scl_lcnt -#define AR_IO_I2C_MST1_FS_SCL_LCNT 0x80012120 -#pragma Aux_register(0x80012120, name=>"io_i2c_mst1_fs_scl_lcnt") - -// User extension aux register io_i2c_mst1_intr_stat -#define AR_IO_I2C_MST1_INTR_STAT 0x8001212c -#pragma Aux_register(0x8001212c, name=>"io_i2c_mst1_intr_stat") - -// User extension aux register io_i2c_mst1_intr_mask -#define AR_IO_I2C_MST1_INTR_MASK 0x80012130 -#pragma Aux_register(0x80012130, name=>"io_i2c_mst1_intr_mask") - -// User extension aux register io_i2c_mst1_raw_intr_stat -#define AR_IO_I2C_MST1_RAW_INTR_STAT 0x80012134 -#pragma Aux_register(0x80012134, name=>"io_i2c_mst1_raw_intr_stat") - -// User extension aux register io_i2c_mst1_rx_tl -#define AR_IO_I2C_MST1_RX_TL 0x80012138 -#pragma Aux_register(0x80012138, name=>"io_i2c_mst1_rx_tl") - -// User extension aux register io_i2c_mst1_tx_tl -#define AR_IO_I2C_MST1_TX_TL 0x8001213c -#pragma Aux_register(0x8001213c, name=>"io_i2c_mst1_tx_tl") - -// User extension aux register io_i2c_mst1_clr_intr -#define AR_IO_I2C_MST1_CLR_INTR 0x80012140 -#pragma Aux_register(0x80012140, name=>"io_i2c_mst1_clr_intr") - -// User extension aux register io_i2c_mst1_clr_rx_under -#define AR_IO_I2C_MST1_CLR_RX_UNDER 0x80012144 -#pragma Aux_register(0x80012144, name=>"io_i2c_mst1_clr_rx_under") - -// User extension aux register io_i2c_mst1_clr_rx_over -#define AR_IO_I2C_MST1_CLR_RX_OVER 0x80012148 -#pragma Aux_register(0x80012148, name=>"io_i2c_mst1_clr_rx_over") - -// User extension aux register io_i2c_mst1_clr_tx_over -#define AR_IO_I2C_MST1_CLR_TX_OVER 0x8001214c -#pragma Aux_register(0x8001214c, name=>"io_i2c_mst1_clr_tx_over") - -// User extension aux register io_i2c_mst1_clr_tx_abrt -#define AR_IO_I2C_MST1_CLR_TX_ABRT 0x80012154 -#pragma Aux_register(0x80012154, name=>"io_i2c_mst1_clr_tx_abrt") - -// User extension aux register io_i2c_mst1_clr_activity -#define AR_IO_I2C_MST1_CLR_ACTIVITY 0x8001215c -#pragma Aux_register(0x8001215c, name=>"io_i2c_mst1_clr_activity") - -// User extension aux register io_i2c_mst1_clr_stop_det -#define AR_IO_I2C_MST1_CLR_STOP_DET 0x80012160 -#pragma Aux_register(0x80012160, name=>"io_i2c_mst1_clr_stop_det") - -// User extension aux register io_i2c_mst1_clr_start_det -#define AR_IO_I2C_MST1_CLR_START_DET 0x80012164 -#pragma Aux_register(0x80012164, name=>"io_i2c_mst1_clr_start_det") - -// User extension aux register io_i2c_mst1_enable -#define AR_IO_I2C_MST1_ENABLE 0x8001216c -#pragma Aux_register(0x8001216c, name=>"io_i2c_mst1_enable") - -// User extension aux register io_i2c_mst1_status -#define AR_IO_I2C_MST1_STATUS 0x80012170 -#pragma Aux_register(0x80012170, name=>"io_i2c_mst1_status") - -// User extension aux register io_i2c_mst1_txflr -#define AR_IO_I2C_MST1_TXFLR 0x80012174 -#pragma Aux_register(0x80012174, name=>"io_i2c_mst1_txflr") - -// User extension aux register io_i2c_mst1_rxflr -#define AR_IO_I2C_MST1_RXFLR 0x80012178 -#pragma Aux_register(0x80012178, name=>"io_i2c_mst1_rxflr") - -// User extension aux register io_i2c_mst1_sda_hold -#define AR_IO_I2C_MST1_SDA_HOLD 0x8001217c -#pragma Aux_register(0x8001217c, name=>"io_i2c_mst1_sda_hold") - -// User extension aux register io_i2c_mst1_tx_abrt_source -#define AR_IO_I2C_MST1_TX_ABRT_SOURCE 0x80012180 -#pragma Aux_register(0x80012180, name=>"io_i2c_mst1_tx_abrt_source") - -// User extension aux register io_i2c_mst1_enable_status -#define AR_IO_I2C_MST1_ENABLE_STATUS 0x8001219c -#pragma Aux_register(0x8001219c, name=>"io_i2c_mst1_enable_status") - -// User extension aux register io_i2c_mst1_fs_spklen -#define AR_IO_I2C_MST1_FS_SPKLEN 0x800121a0 -#pragma Aux_register(0x800121a0, name=>"io_i2c_mst1_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST2_IO_I2C_MST2_PRESENT 1 - -// User extension aux register io_i2c_mst2_clken -#define AR_IO_I2C_MST2_CLKEN 0x800122c0 -#pragma Aux_register(0x800122c0, name=>"io_i2c_mst2_clken") - -// User extension aux register io_i2c_mst2_con -#define AR_IO_I2C_MST2_CON 0x80012200 -#pragma Aux_register(0x80012200, name=>"io_i2c_mst2_con") - -// User extension aux register io_i2c_mst2_tar -#define AR_IO_I2C_MST2_TAR 0x80012204 -#pragma Aux_register(0x80012204, name=>"io_i2c_mst2_tar") - -// User extension aux register io_i2c_mst2_data_cmd -#define AR_IO_I2C_MST2_DATA_CMD 0x80012210 -#pragma Aux_register(0x80012210, name=>"io_i2c_mst2_data_cmd") - -// User extension aux register io_i2c_mst2_ss_scl_hcnt -#define AR_IO_I2C_MST2_SS_SCL_HCNT 0x80012214 -#pragma Aux_register(0x80012214, name=>"io_i2c_mst2_ss_scl_hcnt") - -// User extension aux register io_i2c_mst2_ss_scl_lcnt -#define AR_IO_I2C_MST2_SS_SCL_LCNT 0x80012218 -#pragma Aux_register(0x80012218, name=>"io_i2c_mst2_ss_scl_lcnt") - -// User extension aux register io_i2c_mst2_fs_scl_hcnt -#define AR_IO_I2C_MST2_FS_SCL_HCNT 0x8001221c -#pragma Aux_register(0x8001221c, name=>"io_i2c_mst2_fs_scl_hcnt") - -// User extension aux register io_i2c_mst2_fs_scl_lcnt -#define AR_IO_I2C_MST2_FS_SCL_LCNT 0x80012220 -#pragma Aux_register(0x80012220, name=>"io_i2c_mst2_fs_scl_lcnt") - -// User extension aux register io_i2c_mst2_intr_stat -#define AR_IO_I2C_MST2_INTR_STAT 0x8001222c -#pragma Aux_register(0x8001222c, name=>"io_i2c_mst2_intr_stat") - -// User extension aux register io_i2c_mst2_intr_mask -#define AR_IO_I2C_MST2_INTR_MASK 0x80012230 -#pragma Aux_register(0x80012230, name=>"io_i2c_mst2_intr_mask") - -// User extension aux register io_i2c_mst2_raw_intr_stat -#define AR_IO_I2C_MST2_RAW_INTR_STAT 0x80012234 -#pragma Aux_register(0x80012234, name=>"io_i2c_mst2_raw_intr_stat") - -// User extension aux register io_i2c_mst2_rx_tl -#define AR_IO_I2C_MST2_RX_TL 0x80012238 -#pragma Aux_register(0x80012238, name=>"io_i2c_mst2_rx_tl") - -// User extension aux register io_i2c_mst2_tx_tl -#define AR_IO_I2C_MST2_TX_TL 0x8001223c -#pragma Aux_register(0x8001223c, name=>"io_i2c_mst2_tx_tl") - -// User extension aux register io_i2c_mst2_clr_intr -#define AR_IO_I2C_MST2_CLR_INTR 0x80012240 -#pragma Aux_register(0x80012240, name=>"io_i2c_mst2_clr_intr") - -// User extension aux register io_i2c_mst2_clr_rx_under -#define AR_IO_I2C_MST2_CLR_RX_UNDER 0x80012244 -#pragma Aux_register(0x80012244, name=>"io_i2c_mst2_clr_rx_under") - -// User extension aux register io_i2c_mst2_clr_rx_over -#define AR_IO_I2C_MST2_CLR_RX_OVER 0x80012248 -#pragma Aux_register(0x80012248, name=>"io_i2c_mst2_clr_rx_over") - -// User extension aux register io_i2c_mst2_clr_tx_over -#define AR_IO_I2C_MST2_CLR_TX_OVER 0x8001224c -#pragma Aux_register(0x8001224c, name=>"io_i2c_mst2_clr_tx_over") - -// User extension aux register io_i2c_mst2_clr_tx_abrt -#define AR_IO_I2C_MST2_CLR_TX_ABRT 0x80012254 -#pragma Aux_register(0x80012254, name=>"io_i2c_mst2_clr_tx_abrt") - -// User extension aux register io_i2c_mst2_clr_activity -#define AR_IO_I2C_MST2_CLR_ACTIVITY 0x8001225c -#pragma Aux_register(0x8001225c, name=>"io_i2c_mst2_clr_activity") - -// User extension aux register io_i2c_mst2_clr_stop_det -#define AR_IO_I2C_MST2_CLR_STOP_DET 0x80012260 -#pragma Aux_register(0x80012260, name=>"io_i2c_mst2_clr_stop_det") - -// User extension aux register io_i2c_mst2_clr_start_det -#define AR_IO_I2C_MST2_CLR_START_DET 0x80012264 -#pragma Aux_register(0x80012264, name=>"io_i2c_mst2_clr_start_det") - -// User extension aux register io_i2c_mst2_enable -#define AR_IO_I2C_MST2_ENABLE 0x8001226c -#pragma Aux_register(0x8001226c, name=>"io_i2c_mst2_enable") - -// User extension aux register io_i2c_mst2_status -#define AR_IO_I2C_MST2_STATUS 0x80012270 -#pragma Aux_register(0x80012270, name=>"io_i2c_mst2_status") - -// User extension aux register io_i2c_mst2_txflr -#define AR_IO_I2C_MST2_TXFLR 0x80012274 -#pragma Aux_register(0x80012274, name=>"io_i2c_mst2_txflr") - -// User extension aux register io_i2c_mst2_rxflr -#define AR_IO_I2C_MST2_RXFLR 0x80012278 -#pragma Aux_register(0x80012278, name=>"io_i2c_mst2_rxflr") - -// User extension aux register io_i2c_mst2_sda_hold -#define AR_IO_I2C_MST2_SDA_HOLD 0x8001227c -#pragma Aux_register(0x8001227c, name=>"io_i2c_mst2_sda_hold") - -// User extension aux register io_i2c_mst2_tx_abrt_source -#define AR_IO_I2C_MST2_TX_ABRT_SOURCE 0x80012280 -#pragma Aux_register(0x80012280, name=>"io_i2c_mst2_tx_abrt_source") - -// User extension aux register io_i2c_mst2_enable_status -#define AR_IO_I2C_MST2_ENABLE_STATUS 0x8001229c -#pragma Aux_register(0x8001229c, name=>"io_i2c_mst2_enable_status") - -// User extension aux register io_i2c_mst2_fs_spklen -#define AR_IO_I2C_MST2_FS_SPKLEN 0x800122a0 -#pragma Aux_register(0x800122a0, name=>"io_i2c_mst2_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST0_IO_SPI_MST0_PRESENT 1 - -// User extension aux register io_spi_mst0_ctrlr0 -#define AR_IO_SPI_MST0_CTRLR0 0x80010000 -#pragma Aux_register(0x80010000, name=>"io_spi_mst0_ctrlr0") - -// User extension aux register io_spi_mst0_ctrlr1 -#define AR_IO_SPI_MST0_CTRLR1 0x80010001 -#pragma Aux_register(0x80010001, name=>"io_spi_mst0_ctrlr1") - -// User extension aux register io_spi_mst0_spien -#define AR_IO_SPI_MST0_SPIEN 0x80010002 -#pragma Aux_register(0x80010002, name=>"io_spi_mst0_spien") - -// User extension aux register io_spi_mst0_ser -#define AR_IO_SPI_MST0_SER 0x80010004 -#pragma Aux_register(0x80010004, name=>"io_spi_mst0_ser") - -// User extension aux register io_spi_mst0_baudr -#define AR_IO_SPI_MST0_BAUDR 0x80010005 -#pragma Aux_register(0x80010005, name=>"io_spi_mst0_baudr") - -// User extension aux register io_spi_mst0_txftlr -#define AR_IO_SPI_MST0_TXFTLR 0x80010006 -#pragma Aux_register(0x80010006, name=>"io_spi_mst0_txftlr") - -// User extension aux register io_spi_mst0_rxftlr -#define AR_IO_SPI_MST0_RXFTLR 0x80010007 -#pragma Aux_register(0x80010007, name=>"io_spi_mst0_rxftlr") - -// User extension aux register io_spi_mst0_txflr -#define AR_IO_SPI_MST0_TXFLR 0x80010008 -#pragma Aux_register(0x80010008, name=>"io_spi_mst0_txflr") - -// User extension aux register io_spi_mst0_rxflr -#define AR_IO_SPI_MST0_RXFLR 0x80010009 -#pragma Aux_register(0x80010009, name=>"io_spi_mst0_rxflr") - -// User extension aux register io_spi_mst0_sr -#define AR_IO_SPI_MST0_SR 0x8001000a -#pragma Aux_register(0x8001000a, name=>"io_spi_mst0_sr") - -// User extension aux register io_spi_mst0_imr -#define AR_IO_SPI_MST0_IMR 0x8001000b -#pragma Aux_register(0x8001000b, name=>"io_spi_mst0_imr") - -// User extension aux register io_spi_mst0_isr -#define AR_IO_SPI_MST0_ISR 0x8001000c -#pragma Aux_register(0x8001000c, name=>"io_spi_mst0_isr") - -// User extension aux register io_spi_mst0_risr -#define AR_IO_SPI_MST0_RISR 0x8001000d -#pragma Aux_register(0x8001000d, name=>"io_spi_mst0_risr") - -// User extension aux register io_spi_mst0_txoicr -#define AR_IO_SPI_MST0_TXOICR 0x8001000e -#pragma Aux_register(0x8001000e, name=>"io_spi_mst0_txoicr") - -// User extension aux register io_spi_mst0_rxoicr -#define AR_IO_SPI_MST0_RXOICR 0x8001000f -#pragma Aux_register(0x8001000f, name=>"io_spi_mst0_rxoicr") - -// User extension aux register io_spi_mst0_rxuicr -#define AR_IO_SPI_MST0_RXUICR 0x80010010 -#pragma Aux_register(0x80010010, name=>"io_spi_mst0_rxuicr") - -// User extension aux register io_spi_mst0_icr -#define AR_IO_SPI_MST0_ICR 0x80010012 -#pragma Aux_register(0x80010012, name=>"io_spi_mst0_icr") - -// User extension aux register io_spi_mst0_clken -#define AR_IO_SPI_MST0_CLKEN 0x80010016 -#pragma Aux_register(0x80010016, name=>"io_spi_mst0_clken") - -// User extension aux register io_spi_mst0_dr -#define AR_IO_SPI_MST0_DR 0x80010018 -#pragma Aux_register(0x80010018, name=>"io_spi_mst0_dr") - -// User extension aux register io_spi_mst0_rx_sample_dly -#define AR_IO_SPI_MST0_RX_SAMPLE_DLY 0x8001003c -#pragma Aux_register(0x8001003c, name=>"io_spi_mst0_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST1_IO_SPI_MST1_PRESENT 1 - -// User extension aux register io_spi_mst1_ctrlr0 -#define AR_IO_SPI_MST1_CTRLR0 0x80010100 -#pragma Aux_register(0x80010100, name=>"io_spi_mst1_ctrlr0") - -// User extension aux register io_spi_mst1_ctrlr1 -#define AR_IO_SPI_MST1_CTRLR1 0x80010101 -#pragma Aux_register(0x80010101, name=>"io_spi_mst1_ctrlr1") - -// User extension aux register io_spi_mst1_spien -#define AR_IO_SPI_MST1_SPIEN 0x80010102 -#pragma Aux_register(0x80010102, name=>"io_spi_mst1_spien") - -// User extension aux register io_spi_mst1_ser -#define AR_IO_SPI_MST1_SER 0x80010104 -#pragma Aux_register(0x80010104, name=>"io_spi_mst1_ser") - -// User extension aux register io_spi_mst1_baudr -#define AR_IO_SPI_MST1_BAUDR 0x80010105 -#pragma Aux_register(0x80010105, name=>"io_spi_mst1_baudr") - -// User extension aux register io_spi_mst1_txftlr -#define AR_IO_SPI_MST1_TXFTLR 0x80010106 -#pragma Aux_register(0x80010106, name=>"io_spi_mst1_txftlr") - -// User extension aux register io_spi_mst1_rxftlr -#define AR_IO_SPI_MST1_RXFTLR 0x80010107 -#pragma Aux_register(0x80010107, name=>"io_spi_mst1_rxftlr") - -// User extension aux register io_spi_mst1_txflr -#define AR_IO_SPI_MST1_TXFLR 0x80010108 -#pragma Aux_register(0x80010108, name=>"io_spi_mst1_txflr") - -// User extension aux register io_spi_mst1_rxflr -#define AR_IO_SPI_MST1_RXFLR 0x80010109 -#pragma Aux_register(0x80010109, name=>"io_spi_mst1_rxflr") - -// User extension aux register io_spi_mst1_sr -#define AR_IO_SPI_MST1_SR 0x8001010a -#pragma Aux_register(0x8001010a, name=>"io_spi_mst1_sr") - -// User extension aux register io_spi_mst1_imr -#define AR_IO_SPI_MST1_IMR 0x8001010b -#pragma Aux_register(0x8001010b, name=>"io_spi_mst1_imr") - -// User extension aux register io_spi_mst1_isr -#define AR_IO_SPI_MST1_ISR 0x8001010c -#pragma Aux_register(0x8001010c, name=>"io_spi_mst1_isr") - -// User extension aux register io_spi_mst1_risr -#define AR_IO_SPI_MST1_RISR 0x8001010d -#pragma Aux_register(0x8001010d, name=>"io_spi_mst1_risr") - -// User extension aux register io_spi_mst1_txoicr -#define AR_IO_SPI_MST1_TXOICR 0x8001010e -#pragma Aux_register(0x8001010e, name=>"io_spi_mst1_txoicr") - -// User extension aux register io_spi_mst1_rxoicr -#define AR_IO_SPI_MST1_RXOICR 0x8001010f -#pragma Aux_register(0x8001010f, name=>"io_spi_mst1_rxoicr") - -// User extension aux register io_spi_mst1_rxuicr -#define AR_IO_SPI_MST1_RXUICR 0x80010110 -#pragma Aux_register(0x80010110, name=>"io_spi_mst1_rxuicr") - -// User extension aux register io_spi_mst1_icr -#define AR_IO_SPI_MST1_ICR 0x80010112 -#pragma Aux_register(0x80010112, name=>"io_spi_mst1_icr") - -// User extension aux register io_spi_mst1_clken -#define AR_IO_SPI_MST1_CLKEN 0x80010116 -#pragma Aux_register(0x80010116, name=>"io_spi_mst1_clken") - -// User extension aux register io_spi_mst1_dr -#define AR_IO_SPI_MST1_DR 0x80010118 -#pragma Aux_register(0x80010118, name=>"io_spi_mst1_dr") - -// User extension aux register io_spi_mst1_rx_sample_dly -#define AR_IO_SPI_MST1_RX_SAMPLE_DLY 0x8001013c -#pragma Aux_register(0x8001013c, name=>"io_spi_mst1_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST2_IO_SPI_MST2_PRESENT 1 - -// User extension aux register io_spi_mst2_ctrlr0 -#define AR_IO_SPI_MST2_CTRLR0 0x80010200 -#pragma Aux_register(0x80010200, name=>"io_spi_mst2_ctrlr0") - -// User extension aux register io_spi_mst2_ctrlr1 -#define AR_IO_SPI_MST2_CTRLR1 0x80010201 -#pragma Aux_register(0x80010201, name=>"io_spi_mst2_ctrlr1") - -// User extension aux register io_spi_mst2_spien -#define AR_IO_SPI_MST2_SPIEN 0x80010202 -#pragma Aux_register(0x80010202, name=>"io_spi_mst2_spien") - -// User extension aux register io_spi_mst2_ser -#define AR_IO_SPI_MST2_SER 0x80010204 -#pragma Aux_register(0x80010204, name=>"io_spi_mst2_ser") - -// User extension aux register io_spi_mst2_baudr -#define AR_IO_SPI_MST2_BAUDR 0x80010205 -#pragma Aux_register(0x80010205, name=>"io_spi_mst2_baudr") - -// User extension aux register io_spi_mst2_txftlr -#define AR_IO_SPI_MST2_TXFTLR 0x80010206 -#pragma Aux_register(0x80010206, name=>"io_spi_mst2_txftlr") - -// User extension aux register io_spi_mst2_rxftlr -#define AR_IO_SPI_MST2_RXFTLR 0x80010207 -#pragma Aux_register(0x80010207, name=>"io_spi_mst2_rxftlr") - -// User extension aux register io_spi_mst2_txflr -#define AR_IO_SPI_MST2_TXFLR 0x80010208 -#pragma Aux_register(0x80010208, name=>"io_spi_mst2_txflr") - -// User extension aux register io_spi_mst2_rxflr -#define AR_IO_SPI_MST2_RXFLR 0x80010209 -#pragma Aux_register(0x80010209, name=>"io_spi_mst2_rxflr") - -// User extension aux register io_spi_mst2_sr -#define AR_IO_SPI_MST2_SR 0x8001020a -#pragma Aux_register(0x8001020a, name=>"io_spi_mst2_sr") - -// User extension aux register io_spi_mst2_imr -#define AR_IO_SPI_MST2_IMR 0x8001020b -#pragma Aux_register(0x8001020b, name=>"io_spi_mst2_imr") - -// User extension aux register io_spi_mst2_isr -#define AR_IO_SPI_MST2_ISR 0x8001020c -#pragma Aux_register(0x8001020c, name=>"io_spi_mst2_isr") - -// User extension aux register io_spi_mst2_risr -#define AR_IO_SPI_MST2_RISR 0x8001020d -#pragma Aux_register(0x8001020d, name=>"io_spi_mst2_risr") - -// User extension aux register io_spi_mst2_txoicr -#define AR_IO_SPI_MST2_TXOICR 0x8001020e -#pragma Aux_register(0x8001020e, name=>"io_spi_mst2_txoicr") - -// User extension aux register io_spi_mst2_rxoicr -#define AR_IO_SPI_MST2_RXOICR 0x8001020f -#pragma Aux_register(0x8001020f, name=>"io_spi_mst2_rxoicr") - -// User extension aux register io_spi_mst2_rxuicr -#define AR_IO_SPI_MST2_RXUICR 0x80010210 -#pragma Aux_register(0x80010210, name=>"io_spi_mst2_rxuicr") - -// User extension aux register io_spi_mst2_icr -#define AR_IO_SPI_MST2_ICR 0x80010212 -#pragma Aux_register(0x80010212, name=>"io_spi_mst2_icr") - -// User extension aux register io_spi_mst2_clken -#define AR_IO_SPI_MST2_CLKEN 0x80010216 -#pragma Aux_register(0x80010216, name=>"io_spi_mst2_clken") - -// User extension aux register io_spi_mst2_dr -#define AR_IO_SPI_MST2_DR 0x80010218 -#pragma Aux_register(0x80010218, name=>"io_spi_mst2_dr") - -// User extension aux register io_spi_mst2_rx_sample_dly -#define AR_IO_SPI_MST2_RX_SAMPLE_DLY 0x8001023c -#pragma Aux_register(0x8001023c, name=>"io_spi_mst2_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_SLV0_IO_SPI_SLV0_PRESENT 1 - -// User extension aux register io_spi_slv0_ctrlr0 -#define AR_IO_SPI_SLV0_CTRLR0 0x80011000 -#pragma Aux_register(0x80011000, name=>"io_spi_slv0_ctrlr0") - -// User extension aux register io_spi_slv0_spien -#define AR_IO_SPI_SLV0_SPIEN 0x80011002 -#pragma Aux_register(0x80011002, name=>"io_spi_slv0_spien") - -// User extension aux register io_spi_slv0_txftlr -#define AR_IO_SPI_SLV0_TXFTLR 0x80011006 -#pragma Aux_register(0x80011006, name=>"io_spi_slv0_txftlr") - -// User extension aux register io_spi_slv0_rxftlr -#define AR_IO_SPI_SLV0_RXFTLR 0x80011007 -#pragma Aux_register(0x80011007, name=>"io_spi_slv0_rxftlr") - -// User extension aux register io_spi_slv0_txflr -#define AR_IO_SPI_SLV0_TXFLR 0x80011008 -#pragma Aux_register(0x80011008, name=>"io_spi_slv0_txflr") - -// User extension aux register io_spi_slv0_rxflr -#define AR_IO_SPI_SLV0_RXFLR 0x80011009 -#pragma Aux_register(0x80011009, name=>"io_spi_slv0_rxflr") - -// User extension aux register io_spi_slv0_sr -#define AR_IO_SPI_SLV0_SR 0x8001100a -#pragma Aux_register(0x8001100a, name=>"io_spi_slv0_sr") - -// User extension aux register io_spi_slv0_imr -#define AR_IO_SPI_SLV0_IMR 0x8001100b -#pragma Aux_register(0x8001100b, name=>"io_spi_slv0_imr") - -// User extension aux register io_spi_slv0_isr -#define AR_IO_SPI_SLV0_ISR 0x8001100c -#pragma Aux_register(0x8001100c, name=>"io_spi_slv0_isr") - -// User extension aux register io_spi_slv0_risr -#define AR_IO_SPI_SLV0_RISR 0x8001100d -#pragma Aux_register(0x8001100d, name=>"io_spi_slv0_risr") - -// User extension aux register io_spi_slv0_txoicr -#define AR_IO_SPI_SLV0_TXOICR 0x8001100e -#pragma Aux_register(0x8001100e, name=>"io_spi_slv0_txoicr") - -// User extension aux register io_spi_slv0_rxoicr -#define AR_IO_SPI_SLV0_RXOICR 0x8001100f -#pragma Aux_register(0x8001100f, name=>"io_spi_slv0_rxoicr") - -// User extension aux register io_spi_slv0_rxuicr -#define AR_IO_SPI_SLV0_RXUICR 0x80011010 -#pragma Aux_register(0x80011010, name=>"io_spi_slv0_rxuicr") - -// User extension aux register io_spi_slv0_icr -#define AR_IO_SPI_SLV0_ICR 0x80011012 -#pragma Aux_register(0x80011012, name=>"io_spi_slv0_icr") - -// User extension aux register io_spi_slv0_clken -#define AR_IO_SPI_SLV0_CLKEN 0x80011016 -#pragma Aux_register(0x80011016, name=>"io_spi_slv0_clken") - -// User extension aux register io_spi_slv0_dr -#define AR_IO_SPI_SLV0_DR 0x80011018 -#pragma Aux_register(0x80011018, name=>"io_spi_slv0_dr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART0_IO_UART0_PRESENT 1 - -// User extension aux register io_uart0_clken -#define AR_IO_UART0_CLKEN 0x800140c0 -#pragma Aux_register(0x800140c0, name=>"io_uart0_clken") - -// User extension aux register io_uart0_rbr_thr_dll -#define AR_IO_UART0_RBR_THR_DLL 0x80014000 -#pragma Aux_register(0x80014000, name=>"io_uart0_rbr_thr_dll") - -// User extension aux register io_uart0_ier_dlh -#define AR_IO_UART0_IER_DLH 0x80014004 -#pragma Aux_register(0x80014004, name=>"io_uart0_ier_dlh") - -// User extension aux register io_uart0_iir_fcr -#define AR_IO_UART0_IIR_FCR 0x80014008 -#pragma Aux_register(0x80014008, name=>"io_uart0_iir_fcr") - -// User extension aux register io_uart0_lcr -#define AR_IO_UART0_LCR 0x8001400c -#pragma Aux_register(0x8001400c, name=>"io_uart0_lcr") - -// User extension aux register io_uart0_mcr -#define AR_IO_UART0_MCR 0x80014010 -#pragma Aux_register(0x80014010, name=>"io_uart0_mcr") - -// User extension aux register io_uart0_lsr -#define AR_IO_UART0_LSR 0x80014014 -#pragma Aux_register(0x80014014, name=>"io_uart0_lsr") - -// User extension aux register io_uart0_msr -#define AR_IO_UART0_MSR 0x80014018 -#pragma Aux_register(0x80014018, name=>"io_uart0_msr") - -// User extension aux register io_uart0_usr -#define AR_IO_UART0_USR 0x8001407c -#pragma Aux_register(0x8001407c, name=>"io_uart0_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART1_IO_UART1_PRESENT 1 - -// User extension aux register io_uart1_clken -#define AR_IO_UART1_CLKEN 0x800141c0 -#pragma Aux_register(0x800141c0, name=>"io_uart1_clken") - -// User extension aux register io_uart1_rbr_thr_dll -#define AR_IO_UART1_RBR_THR_DLL 0x80014100 -#pragma Aux_register(0x80014100, name=>"io_uart1_rbr_thr_dll") - -// User extension aux register io_uart1_ier_dlh -#define AR_IO_UART1_IER_DLH 0x80014104 -#pragma Aux_register(0x80014104, name=>"io_uart1_ier_dlh") - -// User extension aux register io_uart1_iir_fcr -#define AR_IO_UART1_IIR_FCR 0x80014108 -#pragma Aux_register(0x80014108, name=>"io_uart1_iir_fcr") - -// User extension aux register io_uart1_lcr -#define AR_IO_UART1_LCR 0x8001410c -#pragma Aux_register(0x8001410c, name=>"io_uart1_lcr") - -// User extension aux register io_uart1_mcr -#define AR_IO_UART1_MCR 0x80014110 -#pragma Aux_register(0x80014110, name=>"io_uart1_mcr") - -// User extension aux register io_uart1_lsr -#define AR_IO_UART1_LSR 0x80014114 -#pragma Aux_register(0x80014114, name=>"io_uart1_lsr") - -// User extension aux register io_uart1_msr -#define AR_IO_UART1_MSR 0x80014118 -#pragma Aux_register(0x80014118, name=>"io_uart1_msr") - -// User extension aux register io_uart1_usr -#define AR_IO_UART1_USR 0x8001417c -#pragma Aux_register(0x8001417c, name=>"io_uart1_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART2_IO_UART2_PRESENT 1 - -// User extension aux register io_uart2_clken -#define AR_IO_UART2_CLKEN 0x800142c0 -#pragma Aux_register(0x800142c0, name=>"io_uart2_clken") - -// User extension aux register io_uart2_rbr_thr_dll -#define AR_IO_UART2_RBR_THR_DLL 0x80014200 -#pragma Aux_register(0x80014200, name=>"io_uart2_rbr_thr_dll") - -// User extension aux register io_uart2_ier_dlh -#define AR_IO_UART2_IER_DLH 0x80014204 -#pragma Aux_register(0x80014204, name=>"io_uart2_ier_dlh") - -// User extension aux register io_uart2_iir_fcr -#define AR_IO_UART2_IIR_FCR 0x80014208 -#pragma Aux_register(0x80014208, name=>"io_uart2_iir_fcr") - -// User extension aux register io_uart2_lcr -#define AR_IO_UART2_LCR 0x8001420c -#pragma Aux_register(0x8001420c, name=>"io_uart2_lcr") - -// User extension aux register io_uart2_mcr -#define AR_IO_UART2_MCR 0x80014210 -#pragma Aux_register(0x80014210, name=>"io_uart2_mcr") - -// User extension aux register io_uart2_lsr -#define AR_IO_UART2_LSR 0x80014214 -#pragma Aux_register(0x80014214, name=>"io_uart2_lsr") - -// User extension aux register io_uart2_msr -#define AR_IO_UART2_MSR 0x80014218 -#pragma Aux_register(0x80014218, name=>"io_uart2_msr") - -// User extension aux register io_uart2_usr -#define AR_IO_UART2_USR 0x8001427c -#pragma Aux_register(0x8001427c, name=>"io_uart2_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART3_IO_UART3_PRESENT 1 - -// User extension aux register io_uart3_clken -#define AR_IO_UART3_CLKEN 0x800143c0 -#pragma Aux_register(0x800143c0, name=>"io_uart3_clken") - -// User extension aux register io_uart3_rbr_thr_dll -#define AR_IO_UART3_RBR_THR_DLL 0x80014300 -#pragma Aux_register(0x80014300, name=>"io_uart3_rbr_thr_dll") - -// User extension aux register io_uart3_ier_dlh -#define AR_IO_UART3_IER_DLH 0x80014304 -#pragma Aux_register(0x80014304, name=>"io_uart3_ier_dlh") - -// User extension aux register io_uart3_iir_fcr -#define AR_IO_UART3_IIR_FCR 0x80014308 -#pragma Aux_register(0x80014308, name=>"io_uart3_iir_fcr") - -// User extension aux register io_uart3_lcr -#define AR_IO_UART3_LCR 0x8001430c -#pragma Aux_register(0x8001430c, name=>"io_uart3_lcr") - -// User extension aux register io_uart3_mcr -#define AR_IO_UART3_MCR 0x80014310 -#pragma Aux_register(0x80014310, name=>"io_uart3_mcr") - -// User extension aux register io_uart3_lsr -#define AR_IO_UART3_LSR 0x80014314 -#pragma Aux_register(0x80014314, name=>"io_uart3_lsr") - -// User extension aux register io_uart3_msr -#define AR_IO_UART3_MSR 0x80014318 -#pragma Aux_register(0x80014318, name=>"io_uart3_msr") - -// User extension aux register io_uart3_usr -#define AR_IO_UART3_USR 0x8001437c -#pragma Aux_register(0x8001437c, name=>"io_uart3_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_CREG_MST0_IO_CREG_MST0_PRESENT 1 - -// User extension aux register io_creg_mst0_ctrl -#define AR_IO_CREG_MST0_CTRL 0x80018000 -#pragma Aux_register(0x80018000, name=>"io_creg_mst0_ctrl") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_CREG_SLV0_IO_CREG_SLV0_PRESENT 1 - -// User extension aux register io_creg_slv0_obsr -#define AR_IO_CREG_SLV0_OBSR 0x80018080 -#pragma Aux_register(0x80018080, name=>"io_creg_slv0_obsr") -#define APEX_COM_ARC_HARDWARE_DFSS_SUBSYS_BCR_SUBSYS_BCR_PRESENT 1 - -// User extension aux register SUBSYS_BUILD -#define AR_SUBSYS_BUILD 0xf0 -#pragma Aux_register(0xf0, name=>"SUBSYS_BUILD") - -// User extension aux register SUBSYS_DSP_0_BUILD -#define AR_SUBSYS_DSP_0_BUILD 0xa00 -#pragma Aux_register(0xa00, name=>"SUBSYS_DSP_0_BUILD") - -// User extension aux register SUBSYS_DSP_0_CONFIG -#define AR_SUBSYS_DSP_0_CONFIG 0xa02 -#pragma Aux_register(0xa02, name=>"SUBSYS_DSP_0_CONFIG") - -// User extension aux register SUBSYS_IO_0_BUILD -#define AR_SUBSYS_IO_0_BUILD 0xa04 -#pragma Aux_register(0xa04, name=>"SUBSYS_IO_0_BUILD") - -// User extension aux register SUBSYS_IO_1_BUILD -#define AR_SUBSYS_IO_1_BUILD 0xa05 -#pragma Aux_register(0xa05, name=>"SUBSYS_IO_1_BUILD") -#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_PRESENT 1 - -// User extension aux register fpu_build -#define AR_FPU_BUILD 0xc8 -#pragma Aux_register(0xc8, name=>"fpu_build") - -// User extension aux register fpu_ctrl -#define AR_FPU_CTRL 0x300 -#pragma Aux_register(0x300, name=>"fpu_ctrl") - -// User extension aux register fpu_status -#define AR_FPU_STATUS 0x301 -#pragma Aux_register(0x301, name=>"fpu_status") - -// User extension instruction fsmadd -extern long fsmadd(long,long); -#pragma intrinsic(fsmadd,opcode=>6,sub_opcode=>5, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsmsub -extern long fsmsub(long,long); -#pragma intrinsic(fsmsub,opcode=>6,sub_opcode=>6, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsmul -extern long fsmul(long,long); -#pragma intrinsic(fsmul,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsadd -extern long fsadd(long,long); -#pragma intrinsic(fsadd,opcode=>6,sub_opcode=>1, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fssub -extern long fssub(long,long); -#pragma intrinsic(fssub,opcode=>6,sub_opcode=>2, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fcvt32 -extern long fcvt32(long,long); -#pragma intrinsic(fcvt32,opcode=>6,sub_opcode=>8, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsdiv -extern long fsdiv(long,long); -#pragma intrinsic(fsdiv,opcode=>6,sub_opcode=>7, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmp -extern long fscmp(long,long); -#pragma intrinsic(fscmp,opcode=>6,sub_opcode=>3, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmp -extern long fscmp_f(long,long); -#pragma intrinsic(fscmp_f,opcode=>6,sub_opcode=>3, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmpf -extern long fscmpf(long,long); -#pragma intrinsic(fscmpf,opcode=>6,sub_opcode=>4, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmpf -extern long fscmpf_f(long,long); -#pragma intrinsic(fscmpf_f,opcode=>6,sub_opcode=>4, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fssqrt -extern long fssqrt(long); -#pragma intrinsic(fssqrt,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") -#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_DP_ASSIST_PRESENT 1 - -// User extension aux register aux_dpfp1l -#define AR_AUX_DPFP1L 0x302 -#pragma Aux_register(0x302, name=>"aux_dpfp1l") - -// User extension aux register aux_dpfp1h -#define AR_AUX_DPFP1H 0x303 -#pragma Aux_register(0x303, name=>"aux_dpfp1h") - -// User extension aux register aux_dpfp2l -#define AR_AUX_DPFP2L 0x304 -#pragma Aux_register(0x304, name=>"aux_dpfp2l") - -// User extension aux register aux_dpfp2h -#define AR_AUX_DPFP2H 0x305 -#pragma Aux_register(0x305, name=>"aux_dpfp2h") - -// User extension instruction dmulh11 -extern long dmulh11(long,long); -#pragma intrinsic(dmulh11,opcode=>6,sub_opcode=>48,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh11 -extern long dmulh11_f(long,long); -#pragma intrinsic(dmulh11_f,opcode=>6,sub_opcode=>48, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh12 -extern long dmulh12(long,long); -#pragma intrinsic(dmulh12,opcode=>6,sub_opcode=>49,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh12 -extern long dmulh12_f(long,long); -#pragma intrinsic(dmulh12_f,opcode=>6,sub_opcode=>49, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh21 -extern long dmulh21(long,long); -#pragma intrinsic(dmulh21,opcode=>6,sub_opcode=>50,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh21 -extern long dmulh21_f(long,long); -#pragma intrinsic(dmulh21_f,opcode=>6,sub_opcode=>50, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh22 -extern long dmulh22(long,long); -#pragma intrinsic(dmulh22,opcode=>6,sub_opcode=>51,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh22 -extern long dmulh22_f(long,long); -#pragma intrinsic(dmulh22_f,opcode=>6,sub_opcode=>51, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh11 -extern long daddh11(long,long); -#pragma intrinsic(daddh11,opcode=>6,sub_opcode=>52,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh11 -extern long daddh11_f(long,long); -#pragma intrinsic(daddh11_f,opcode=>6,sub_opcode=>52, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh12 -extern long daddh12(long,long); -#pragma intrinsic(daddh12,opcode=>6,sub_opcode=>53,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh12 -extern long daddh12_f(long,long); -#pragma intrinsic(daddh12_f,opcode=>6,sub_opcode=>53, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh21 -extern long daddh21(long,long); -#pragma intrinsic(daddh21,opcode=>6,sub_opcode=>54,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh21 -extern long daddh21_f(long,long); -#pragma intrinsic(daddh21_f,opcode=>6,sub_opcode=>54, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh22 -extern long daddh22(long,long); -#pragma intrinsic(daddh22,opcode=>6,sub_opcode=>55,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh22 -extern long daddh22_f(long,long); -#pragma intrinsic(daddh22_f,opcode=>6,sub_opcode=>55, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh11 -extern long dsubh11(long,long); -#pragma intrinsic(dsubh11,opcode=>6,sub_opcode=>56,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh11 -extern long dsubh11_f(long,long); -#pragma intrinsic(dsubh11_f,opcode=>6,sub_opcode=>56, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh12 -extern long dsubh12(long,long); -#pragma intrinsic(dsubh12,opcode=>6,sub_opcode=>57,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh12 -extern long dsubh12_f(long,long); -#pragma intrinsic(dsubh12_f,opcode=>6,sub_opcode=>57, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh21 -extern long dsubh21(long,long); -#pragma intrinsic(dsubh21,opcode=>6,sub_opcode=>58,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh21 -extern long dsubh21_f(long,long); -#pragma intrinsic(dsubh21_f,opcode=>6,sub_opcode=>58, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh22 -extern long dsubh22(long,long); -#pragma intrinsic(dsubh22,opcode=>6,sub_opcode=>59,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh22 -extern long dsubh22_f(long,long); -#pragma intrinsic(dsubh22_f,opcode=>6,sub_opcode=>59, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dexcl1 -extern long dexcl1(long,long); -#pragma intrinsic(dexcl1,opcode=>6,sub_opcode=>60, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dexcl2 -extern long dexcl2(long,long); -#pragma intrinsic(dexcl2,opcode=>6,sub_opcode=>61, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - - -#endif - - -]]> - - - - - diff --git a/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf b/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf deleted file mode 100644 index 00cf0a3050b..00000000000 --- a/tensorflow/lite/micro/tools/make/targets/arc/memory.lcf +++ /dev/null @@ -1,50 +0,0 @@ - # SYSTEM memory regions indicate where external memory might be located. - # The TCF has no specific knowledge of whether SYSTEM regions contain - # external memory or not. - # CCMWRAP memory regions indicate unusable portions of the address space - # due to CCM memory wrapping into upper addresses beyond its size - - MEMORY { - ICCM0 : ORIGIN = 0x00000000, LENGTH = 0x00010000 - # CCMWRAP0: ORIGIN = 0x00010000, LENGTH = 0x0fff0000 - ICCM1 : ORIGIN = 0x10000000, LENGTH = 0x00080000 - # CCMWRAP1: ORIGIN = 0x10080000, LENGTH = 0x0ff80000 - # SYSTEM0 : ORIGIN = 0x20000000, LENGTH = 0x60000000 - DCCM : ORIGIN = 0x80000000, LENGTH = 0x00080000 - # CCMWRAP2: ORIGIN = 0x80080000, LENGTH = 0x0ff80000 - XCCM : ORIGIN = 0x90000000, LENGTH = 0x00008000 - # CCMWRAP3: ORIGIN = 0x90008000, LENGTH = 0x0fff8000 - YCCM : ORIGIN = 0xa0000000, LENGTH = 0x00008000 - # CCMWRAP4: ORIGIN = 0xa0008000, LENGTH = 0x0fff8000 - # SYSTEM1 : ORIGIN = 0xb0000000, LENGTH = 0x50000000 - } - SECTIONS { - GROUP BLOCK(4): { - .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:132): {} - .text? : { *('.text$crt*') } - * (TEXT): {} - * (LIT): {} - .rodata_in_data?:{} - } > ICCM1 - - GROUP BLOCK(4): { - /* _SDA_BASE_ computed implicitly */ - .sdata?: {} - .sbss?: {} - .protobuf?: {} - * (DATA): {} - * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} - .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - .tensor_arena?: {} - } > DCCM - GROUP BLOCK(4): { - .Xdata? : {} - } > XCCM - GROUP BLOCK(4): { - .Ydata? : {} - } > YCCM - } - - - From 5b2f6d322cb4943548935b0fc52b528e18c4ad7d Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Thu, 30 Apr 2020 10:56:08 +0300 Subject: [PATCH 39/45] Cases with channel multiplier for DW conv (int8) temporarily fallback to reference code --- tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 081a40b23b5..2aad76bc042 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -69,8 +69,14 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLiteDepthwiseConvParams* params) { const auto* affine_quantization = reinterpret_cast(filter->quantization.params); + const int in_ch = SizeOfDimension(input, 3); + const int filters_num = SizeOfDimension(filter, 3); + // MLI optimized version only supports int8 dataype, dilation factor of 1 and // per-axis quantization of weights (no broadcasting/per-tensor) + // TODO: ((in_ch == filters_num) || (in_ch == 1)) is a forbidding of + // channel multiplier logic for multichannel input. + // To be removed after it will be supported in MLI bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && @@ -78,6 +84,7 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, (params->dilation_height_factor == 1) && (affine_quantization->scale->size == filter->dims->data[kDepthwiseConvQuantizedDimension]) && + ((in_ch == filters_num) || (in_ch == 1)) && affine_quantization->scale->size <= (kMaxChannels * 2); return ret_val; } From ea1a6715ef2fc136b06986cdade85f6a084855be Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 1 May 2020 13:46:45 +0300 Subject: [PATCH 40/45] ARC related documentation in readme files --- .../lite/micro/examples/hello_world/README.md | 45 ++++ .../micro/examples/micro_speech/README.md | 51 +++++ .../micro/examples/person_detection/README.md | 52 +++++ .../person_detection_experimental/README.md | 54 +++++ .../lite/micro/kernels/arc_mli/README.md | 57 +++++ .../micro/tools/make/targets/arc/README.md | 214 ++++++++++++++++++ .../make/templates/arc/README_ARC.md.tpl | 45 +++- .../templates/arc/README_ARC_EMSDP.md.tpl | 48 +++- 8 files changed, 564 insertions(+), 2 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/arc_mli/README.md create mode 100644 tensorflow/lite/micro/tools/make/targets/arc/README.md diff --git a/tensorflow/lite/micro/examples/hello_world/README.md b/tensorflow/lite/micro/examples/hello_world/README.md index 3f3fef67f28..a0a2b678157 100644 --- a/tensorflow/lite/micro/examples/hello_world/README.md +++ b/tensorflow/lite/micro/examples/hello_world/README.md @@ -15,6 +15,7 @@ animation. ## Table of contents - [Understand the model](#understand-the-model) +- [Deploy to ARC EM SDP](#deploy-to-arc-em-sdp) - [Deploy to Arduino](#deploy-to-arduino) - [Deploy to ESP32](#deploy-to-esp32) - [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge) @@ -30,6 +31,50 @@ Walk through this tutorial to understand what the model does, how it works, and how it was converted for use with TensorFlow Lite for Microcontrollers. +## Deploy to ARC EM SDP + +The following instructions will help you to build and deploy this example to +[ARC EM SDP](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform) board. General information and instructions on using the board with TensorFlow Lite Micro can be found in the common [ARC targets description](/tensorflow/lite/micro/tools/make/targets/arc/README.md). + +### Initial Setup + +Follow the instructions on the [ARC EM SDP Initial Setup](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP) to get and install all required tools for work with ARC EM SDP. + +### Generate Example Project + +The example project for ARC EM SDP platform can be generated with the following command: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_hello_world_make_project + +### Build and Run Example + +For more detailed information on building and running examples see the appropriate sections of general descriptions of the [ARC EM SDP usage with TFLM](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP). In the directory with generated project you can also find a *README_ARC_EMSDP.md* file with instructions and options on building and running. Here we only briefly mention main steps which are typically enough to get it started. + +1. You need to [connect the board](/tensorflow/lite/micro/tools/make/targets/arc/README.md#connect-the-board) and open an serial connection. + +2. Go to the generated example project director + + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/hello_world/make + +3. Build the example using + + make app + +4. To generate artefacts for self-boot of example from the board use + + make flash + +5. To run application from the board using microSD card: + * Copy the content of the created /bin folder into the root of microSD card. Note that the card must be formatted as FAT32 with default cluster size (but less than 32 Kbytes) + * Plug in the microSD card into the J11 connector. + * Push the RST button. If a red LED is lit beside RST button, push the CFG button. + +6. If you have the MetaWare Debugger installed in your environment: + * To run application from the console using it type `make run`. + * To stop the execution type `Ctrl+C` in the console several times. + +In both cases (step 5 and 6) you will see the application output in the serial terminal. + ## Deploy to Arduino The following instructions will help you build and deploy this sample diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index 7ccaa806366..ba55a7d8493 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -16,6 +16,7 @@ kilobytes of Flash. ## Table of contents +- [Deploy to ARC EM SDP](#deploy-to-arc-em-sdp) - [Deploy to Arduino](#deploy-to-arduino) - [Deploy to ESP32](#deploy-to-esp32) - [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge) @@ -25,6 +26,56 @@ kilobytes of Flash. - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Train your own model](#train-your-own-model) +## Deploy to ARC EM SDP + +The following instructions will help you to build and deploy this example to +[ARC EM SDP](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform) board. General information and instructions on using the board with TensorFlow Lite Micro can be found in the common [ARC targets description](/tensorflow/lite/micro/tools/make/targets/arc/README.md). + +This example is quantized with symmetric uint8 scheme. As noted in [kernels/arc_mli/README.md](/tensorflow/lite/micro/kernels/arc_mli/README.md), embARC MLI supports optimized kernels for int8 quantization only. Therefore, this example will only use TFLM reference kernels. + +The ARC EM SDP board contains the rich set of extension interfaces. You can choose any compatible microphone and modify [audio_provider.cc](/tensorflow/lite/micro/examples/micro_speech/audio_provider.cc) file accordingly to use input from your specific camera. By default, results of running this example are printed to the console. If you would like to instead implement some target-specific actions, you need to modify [command_responder.cc](/tensorflow/lite/micro/examples/micro_speech/command_responder.cc) accordingly. + +The reference implementations of these files are used by default on the EM SDP. + +### Initial setup + +Follow the instructions on the [ARC EM SDP Initial Setup](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP) to get and install all required tools for work with ARC EM SDP. + +### Generate Example Project + +As default example doesn’t provide any output without real audio, it is recommended to get started with example for mock data. The project for ARC EM SDP platform can be generated with the following command: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_micro_speech_mock_make_project + +### Build and Run Example + +For more detailed information on building and running examples see the appropriate sections of general descriptions of the [ARC EM SDP usage with TFLM](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP). In the directory with generated project you can also find a *README_ARC_EMSDP.md* file with instructions and options on building and running. Here we only briefly mention main steps which are typically enough to get it started. + +1. You need to [connect the board](/tensorflow/lite/micro/tools/make/targets/arc/README.md#connect-the-board) and open an serial connection. + +2. Go to the generated example project director + + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection/make` + +3. Build the example using + + make app + +4. To generate artefacts for self-boot of example from the board use + + make flash + +5. To run application from the board using microSD card: + * Copy the content of the created /bin folder into the root of microSD card. Note that the card must be formatted as FAT32 with default cluster size (but less than 32 Kbytes) + * Plug in the microSD card into the J11 connector. + * Push the RST button. If a red LED is lit beside RST button, push the CFG button. + +6. If you have the MetaWare Debugger installed in your environment: + * To run application from the console using it type `make run`. + * To stop the execution type `Ctrl+C` in the console several times. + +In both cases (step 5 and 6) you will see the application output in the serial terminal. + ## Deploy to Arduino The following instructions will help you build and deploy this sample diff --git a/tensorflow/lite/micro/examples/person_detection/README.md b/tensorflow/lite/micro/examples/person_detection/README.md index 5ee7bda9914..ae47c4be0ff 100644 --- a/tensorflow/lite/micro/examples/person_detection/README.md +++ b/tensorflow/lite/micro/examples/person_detection/README.md @@ -6,6 +6,7 @@ run on systems with small amounts of memory such as microcontrollers and DSPs. ## Table of contents - [Getting started](#getting-started) +- [Running on ARC EM SDP](#running-on-arc-em-sdp) - [Running on Arduino](#running-on-arduino) - [Running on ESP32](#running-on-esp32) - [Running on SparkFun Edge](#running-on-sparkfun-edge) @@ -13,6 +14,57 @@ run on systems with small amounts of memory such as microcontrollers and DSPs. - [Debugging image capture](#debugging-image-capture) - [Training your own model](#training-your-own-model) +## Running on ARC EM SDP + +The following instructions will help you to build and deploy this example to +[ARC EM SDP](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform) board. General information and instructions on using the board with TensorFlow Lite Micro can be found in the common [ARC targets description](/tensorflow/lite/micro/tools/make/targets/arc/README.md). + +This example is quantized with symmetric uint8 scheme. As noted in [kernels/arc_mli/README.md](/tensorflow/lite/micro/kernels/arc_mli/README.md), embARC MLI supports optimized kernels for int8 quantization only. Therefore, this example will only use TFLM reference kernels. + +The ARC EM SDP board contains the reach set of extension interfaces. +You can choose any compatible camera and modify [image_provider.cc](/tensorflow/lite/micro/examples/person_detection/image_provider.cc) file accordingly to use input from your specific camera. By default, results of running this example are printed to the console. If you would like to instead implement some target-specific actions, you need to modify [detection_responder.cc](/tensorflow/lite/micro/examples/person_detection/detection_responder.cc) accordingly. + +The reference implementations of these files are used by default on the EM SDP. + +### Initial setup + +Follow the instructions on the [ARC EM SDP Initial Setup](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP) to get and install all required tools for work with ARC EM SDP. + +### Generate Example Project + +The example project for ARC EM SDP platform can be generated with the following command: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_person_detection_make_project ` + +### Build and Run Example + +For more detailed information on building and running examples see the appropriate sections of general descriptions of the [ARC EM SDP usage with TFLM](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP). In the directory with generated project you can also find a *README_ARC_EMSDP.md* file with instructions and options on building and running. Here we only briefly mention main steps which are typically enough to get it started. + +1. You need to [connect the board](/tensorflow/lite/micro/tools/make/targets/arc/README.md#connect-the-board) and open an serial connection. + +2. Go to the generated example project director + + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection/make` + +3. Build the example using + + make app + +4. To generate artefacts for self-boot of example from the board use + + make flash + +5. To run application from the board using microSD card: + * Copy the content of the created /bin folder into the root of microSD card. Note that the card must be formatted as FAT32 with default cluster size (but less than 32 Kbytes) + * Plug in the microSD card into the J11 connector. + * Push the RST button. If a red LED is lit beside RST button, push the CFG button. + +6. If you have the MetaWare Debugger installed in your environment: + * To run application from the console using it type `make run`. + * To stop the execution type `Ctrl+C` in the console several times. + +In both cases (step 5 and 6) you will see the application output in the serial terminal. + ## Running on Arduino The following instructions will help you build and deploy this sample diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/README.md b/tensorflow/lite/micro/examples/person_detection_experimental/README.md index d8aaa9ba383..af0186fb276 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/README.md +++ b/tensorflow/lite/micro/examples/person_detection_experimental/README.md @@ -7,12 +7,66 @@ This uses the experimental int8 quantized version of the person detection model. ## Table of contents - [Getting started](#getting-started) +- [Running on ARC EM SDP](#running-on-arc-em-sdp) - [Running on Arduino](#running-on-arduino) - [Running on SparkFun Edge](#running-on-sparkfun-edge) - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Debugging image capture](#debugging-image-capture) - [Training your own model](#training-your-own-model) + +## Running on ARC EM SDP + +The following instructions will help you to build and deploy this example to +[ARC EM SDP](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform) board. General information and instructions on using the board with TensorFlow Lite Micro can be found in the common [ARC targets description](/tensorflow/lite/micro/tools/make/targets/arc/README.md). + +This example uses asymmetric int8 quantization and can therefore leverage optimized int8 kernels from the embARC MLI library + +The ARC EM SDP board contains a rich set of extension interfaces. +You can choose any compatible camera and modify [image_provider.cc](/tensorflow/lite/micro/examples/person_detection_experimental/image_provider.cc) file accordingly to use input from your specific camera. By default, results of running this example are printed to the console. If you would like to instead implement some target-specific actions, you need to modify [detection_responder.cc](/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder.cc) accordingly. + +The reference implementations of these files are used by default on the EM SDP. + +### Initial setup + +Follow the instructions on the [ARC EM SDP Initial Setup](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP) to get and install all required tools for work with ARC EM SDP. + +### Generate Example Project + +The example project for ARC EM SDP platform can be generated with the following command: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp generate_person_detection_int8_make_project + +### Build and Run Example + +For more detailed information on building and running examples see the appropriate sections of general descriptions of the [ARC EM SDP usage with TFLM](/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP). In the directory with generated project you can also find a *README_ARC_EMSDP.md* file with instructions and options on building and running. Here we only briefly mention main steps which are typically enough to get it started. + +1. You need to [connect the board](/tensorflow/lite/micro/tools/make/targets/arc/README.md#connect-the-board) and open an serial connection. + +2. Go to the generated example project director + + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection_int8/make` + +3. Build the example using + + make app + +4. To generate artefacts for self-boot of example from the board use + + make flash + +5. To run application from the board using microSD card: + * Copy the content of the created /bin folder into the root of microSD card. Note that the card must be formatted as FAT32 with default cluster size (but less than 32 Kbytes) + * Plug in the microSD card into the J11 connector. + * Push the RST button. If a red LED is lit beside RST button, push the CFG button. + +6. If you have the MetaWare Debugger installed in your environment: + * To run application from the console using it type `make run`. + * To stop the execution type `Ctrl+C` in the console several times. + +In both cases (step 5 and 6) you will see the application output in the serial terminal. + + ## Running on Arduino The following instructions will help you build and deploy this sample diff --git a/tensorflow/lite/micro/kernels/arc_mli/README.md b/tensorflow/lite/micro/kernels/arc_mli/README.md new file mode 100644 index 00000000000..2b2e194e757 --- /dev/null +++ b/tensorflow/lite/micro/kernels/arc_mli/README.md @@ -0,0 +1,57 @@ +# EmbARC MLI Library Based Optimizations of TensorFlow Lite Micro Kernels for ARC Platforms. + +This folder contains kernel implementations which use optimized [embARC MLI Library](https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli). It allows acceleration of inference operations which use int8 (asymmetric quantization). + +## Usage + +embARC MLI Library is used by default to speed up execution of some kernels for asymmetrically quantized layers. This means that usual project generation for ARC specific target implies usage of embARC MLI. + +For example: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp generate_person_detection_int8_make_project + +In case MLI implementation can’t be used, kernels in this folder fallback to TFLM reference implementations. For applications which may not benefit from MLI library, projects can be generated without these implementations by adding `TAGS=no_arc_mli` in the command line, which can reduce overall code size: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_person_detection_int8_make_project + +For ARC EM SDP board, a pre-compiled MLI library is downloaded and used in the application. For a custom target ARC-based platform, MLI sources are downloaded and compiled during project generation phase. To build library from sources for ARC EM SDP platform, add `BUILD_ARC_MLI=true` option to make command: + + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp BUILD_ARC_MLI=true generate_person_detection_int8_make_project. + +If an application exclusively uses accelerated MLI kernel implementations, one can strip out TFLM reference kernel implementations to reduce code size of application. Build application with `MLI_ONLY=true` option in generated project (after the project was built): + + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection_int8/make + + make app MLI_ONLY=true + +if you try this and application execution fails, then most probably MLI can’t be used for some nodes and you need to revert to using TFLM reference kernels. + + +## Limitations + +Currently, the MLI Library provides optimized implementation only for int8 (asymmetric) versions of the following kernels: +1. Convolution 2D – Per axis quantization only, `dilation_ratio==1` +2. Depthwise Convolution 2D – Per axis quantization only, `dilation_ratio==1` +3. Average Pooling +4. Max Pooling +5. Fully Connected + +Currently only [/tensorflow/lite/micro/examples/person_detection_experimental](/tensorflow/lite/micro/examples/person_detection_experimental) is quantized using this specification. Other examples can be executed on ARC-based targets, but will only use reference kernels. + + +## Scratch Buffers and Slicing + +The following information applies only for ARC EM SDP and other targets with XY memory. embARC MLI uses specific optimizations which assumes node operands are in XY memory and/or DCCM (Data Closely Coupled Memory). As operands might be quite big and may not fit in available XY memory, special slicing logic is applied which allows kernel calculations to be split into multiple parts. For this reason, internal static buffers are allocated in these X, Y and DCCM memory banks and used to execute sub-calculations. + +All this is performed automatically and invisible to the user. Half of the DCCM memory bank and the full XY banks are occupied for MLI specific needs. If the user needs space in XY memory for other tasks, these arrays can be reduced by setting specific sizes. For this, add the following option to build command replacing **** with required values: + + EXT_CFLAGS=”-DSCRATCH_MEM_Z_SIZE= -DSCRATCH_MEM_X_SIZE= -DSCRATCH_MEM_Y_SIZE=” + +For example, to reduce sizes of arrays placed in DCCM and XCCM to 32k and 8k respectively, use next command: + + make app EXT_CFLAGS=”-DSCRATCH_MEM_Z_SIZE=32*1024 -DSCRATCH_MEM_X_SIZE=8*1024” + + +## License + +TensorFlow's code is covered by the Apache2 License included in the repository, and third party dependencies are covered by their respective licenses, in the third_party folder of this package. diff --git a/tensorflow/lite/micro/tools/make/targets/arc/README.md b/tensorflow/lite/micro/tools/make/targets/arc/README.md new file mode 100644 index 00000000000..8d20a4681ff --- /dev/null +++ b/tensorflow/lite/micro/tools/make/targets/arc/README.md @@ -0,0 +1,214 @@ +# Building TensorFlow Lite for Microcontrollers for Synopsys DesignWare ARC EM/HS Processors + +This document contains the general information on building and running TensorFlow Lite Micro for targets based on the Synopsys ARC EM/HS Processors. + +## Table of Contents + +- [Install the Synopsys DesignWare ARC MetaWare Development Toolkit](#install-the-synopsys-designware-arc-metaWare-development-toolkit) +- [ARC EM Software Development Platform (ARC EM SDP)](#ARC-EM-Software-Development-Platform-ARC-EM-SDP) +- [Custom ARC EM or HS Platform](#Custom-ARC-EMHS-Platform) + + +## Install the Synopsys DesignWare ARC MetaWare Development Toolkit + +The Synopsys DesignWare ARC MetaWare Development Toolkit (MWDT) is required to build and run Tensorflow Lite Micro applications for all ARC EM/HS targets. + +To license MWDT, please see further details [here](https://www.synopsys.com/dw/ipdir.php?ds=sw_metaware) + +To request an evaluation version of MWDT, please use the [Synopsys Eval Portal](https://eval.synopsys.com/) and follow the link for the MetaWare Development Toolkit (Important: Do not confuse this with MetaWare EV Development Toolkit or MetaWare Lite options also available on this page) + +Run the downloaded installer and follow the instructions to set up the toolchain on your platform. + +TensorFlow Lite for Microcontrollers builds are divided into two phases: Application Project Generation and Application Project Building/Running. The former phase requires \*nix environment while the latter does not. + +For basic project generation targeting [ARC EM Software Development Platform](#ARC-EM-Software-Development-Platform-ARC-EM-SDP), MetaWare is NOT required for the Project Generation Phase. However, it is required in case the following: +- For project generation for custom (not EM SDP) targets +- To build microlib target library with all required TFLM objects for external use + +Please consider the above when choosing whether to install Windows or Linux or both versions of MWDT + + +## ARC EM Software Development Platform (ARC EM SDP) + +This section describes how to deploy on an [ARC EM SDP board](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform) + +### Initial Setup + +To use the EM SDP, you need the following hardware and software: + +#### ARC EM SDP +More information on the platform, including ordering information, can be found [here](https://www.synopsys.com/dw/ipdir.php?ds=arc-em-software-development-platform). + +#### MetaWare Development Toolkit +See [Install the Synopsys DesignWare ARC MetaWare Development Toolkit](#install-the-synopsys-designware-arc-metaWare-development-toolkit) section for instructions on toolchain installation. + +#### Digilent Adept 2 System Software Package +If you wish to use the MetaWare Debugger to debug your code, you need to also install the Digilent Adept 2 software, which includes the necessary drivers for connecting to the targets. This is available from oficial [Digilent site](https://reference.digilentinc.com/reference/software/adept/start?redirect=1#software_downloads). You should install the “System” component, and Runtime. Utilities and SDK are NOT required. + +Digilent installation is NOT required if you plan to deploy to EM SDP via the SD card instead of using the debugger. + +#### Make Tool +A `'make'` tool is required for both phases of deploying Tensorflow Lite Micro applications on ARC EM SDP: +1. Application project generation +2. Working with generated application (build and run) + +For the first phase you need an environment and make tool compatible with Tensorflow Lite for Micro build system. At the moment of this writing, this requires make >=3.82 and a *nix-like environment which supports shell and native commands for file manipulations. MWDT toolkit is not required for this phase. + +For the second phase, requirements are less strict. The gmake version delivered with MetaWare Development Toolkit is sufficient. There are no shell and *nix command dependencies, so Windows can be used + + +#### Serial Terminal Emulation Application +The Debug UART port of the EM SDP is used to print application output. The USB connection provides both the debug channel and RS232 transport. You can use any terminal emulation program (like [PuTTY](https://www.putty.org/)) to view UART output from the EM SDP. + +#### microSD Card +If you want to self-boot your application (start it independently from a debugger connection), you also need a microSD card with a minimum size of 512 MB and a way to write to the card from your development host + +### Connect the Board + +1. Make sure Boot switches of the board (S3) are configured in the next way: + +| Switch # | Switch position | +| :-------: | :----------------: | +| 1 | Low (0) | +| 2 | Low (0) | +| 3 | High (1) | +| 4 | Low (0) | + + +2. Connect the power supply included in the product package to the ARC EM SDP. +3. Connect the USB cable to connector J10 on the ARC EM SDP (near the RST and CFG buttons) and to an available USB port on your development host. +4. Determine the COM port assigned to the USB Serial Port (on Windows, using Device Manager is an easy way to do this) +5. Execute the serial terminal application you installed in the previous step and open the serial connection with the early defined COM port (speed 115200 baud; 8 bits; 1 stop bit; no parity). +6. Push the CFG button on the board. After a few seconds you should see the boot log in the terminal which begins as follows: + +``` +U-Boot + +CPU: ARC EM11D v5.0 at 40 MHz +Subsys:ARC Data Fusion IP Subsystem +Model: snps,emsdp +Board: ARC EM Software Development Platform v1.0 +… +``` + +### Generate Application Project for ARC EM SDP + +Before building an example or test application, you need to generate a TFLM project for this application from TensorFlow sources and external dependencies. To generate it for ARC EM SDP board you need to set `TARGET=arc_emsdp` on the make command line. For instance, to build the Person Detect test application, use a shell to execute the following command from the root directory of the TensorFlow repo: + + make -f tensorflow/lite/micro/tools/make/Makefile generate_person_detection_test_int8_make_project TARGET=arc_emsdp + +The application project will be generated into *tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection_test_int8/make* + +Info on generating and building example applications for EM SDP (*tensorflow/lite/micro/examples*) can be found in the appropriate readme file placed in the same directory with the examples. In general, it’s the same process which described in this Readme. + +The [embARC MLI Library](https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli) is used by default to speed up execution of some kernels for asymmetrically quantized layers. Kernels which use MLI-based implementations are kept in the *tensorflow/lite/micro/kernels/arc_mli* folder. For applications which may not benefit from MLI library, the project can be generated without these implementations by adding `TAGS=no_arc_mli` in the command line. This can reduce code size when the optimized kernels are not required. + +For more options on embARC MLI usage see [kernels/arc_mli/README.md](/tensorflow/lite/micro/kernels/arc_mli/README.md). + +### Build the Application + +You may need to adjust the following commands in order to use the appropriate make tool available in your environment (ie: `make` or `gmake`) + +1. Open command shell and change the working directory to the location which contains the generated project, as described in the previous section + +2. Clean previous build artifacts (optional) + + make clean + +3. Build application + + make app + +### Run the Application on the Board Using MetaWare Debugger + +In case you do not have access to the MetaWare Debugger or have chosen not to install the Digilent drivers, you can skip to the next section. + +To run the application from the console, use the following command: + + make run + +If application runs in an infinite loop, type `Ctrl+C` several times to exit the debugger. + +To run the application in the GUI debugger, use the following command: + + make debug + +In both cases you will see the application output in the serial terminal. + +### Run the Application on the Board from the microSD Card + +1. Use the following command in the same command shell you used for building the application, as described in the previous step + + make flash + +2. Copy the content of the created *./bin* folder into the root of microSD card. Note that the card must be formatted as FAT32 with default cluster size (but less than 32 Kbytes) +3. Plug in the microSD card into the J11 connector. +4. Push the RST button. If a red LED is lit beside RST button, push the CFG button. + +You will see the application output in the serial terminal. + + + +## Custom ARC EM/HS Platform + +This section describes how to deploy on a Custom ARC EM/HS platform defined only by a TCF (Tool Configuration File, created at CPU configuration time) and optional LCF (Linker Command File). In this case, the real hardware is unknown, and applications can be run only in the nSIM simulator included with the MetaWare toolkit + +### Initial Setup + +To with custom ARC EM/HS platform, you need the following : +* Synopsys MetaWare Development Toolkit version 2019.12 or higher +* Make tool (make or gmake) + +See [Install the Synopsys DesignWare ARC MetaWare Development Toolkit](#install-the-synopsys-designware-arc-metaWare-development-toolkit) section for instructions on toolchain installation. +See [MetaWare Development Toolkit](#MetaWare-Development-Toolkit) and [Make Tool](#Make-Tool) sections for instructions on toolchain installation and comments about make versions. + +### Generate Application Project + +Before building the application itself, you need to generate the project for this application from TensorFlow sources and external dependencies. To generate it for a custom TCF you need to set the following variables in the make command line: +* TARGET_ARCH=arc +* TCF_FILE= +* (optional) LCF_FILE= + +If you don’t supply an external LCF, the one embedded in the TCF will be used instead + +For instance, to build **Person Detection** test application, use the following command from the root directory of the TensorFlow repo: + + make -f tensorflow/lite/micro/tools/make/Makefile generate_person_detection_test_int8_make_project TARGET_ARCH=arc TCF_FILE= LCF_FILE= + +The application project will be generated into *tensorflow/lite/micro/tools/make/gen/_arc/prj/person_detection_test_int8/make* + +The [embARC MLI Library](https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli) is used by default to speed up execution of some kernels for asymmetrically quantized layers. Kernels which use MLI-based implementations are kept in the *tensorflow/lite/micro/kernels/arc_mli* folder. For applications which may not benefit from MLI library, the project can be generated without these implementations by adding `TAGS=no_arc_mli` in the command line. This can reduce code size when the optimized kernels are not required. + +For more options on embARC MLI usage see [kernels/arc_mli/README.md](/tensorflow/lite/micro/kernels/arc_mli/README.md). + +### Build the Application + +You may need to adjust the following commands in order to use the appropriate make tool available in your environment (ie: `make` or `gmake`) + +1. Open command shell and change the working directory to the location which contains the generated project, as described in the previous section + +2. Clean previous build artifacts (optional) + + make clean + +3. Build application + + make app + +### Run the Application with MetaWare Debugger on the nSim Simulator. + +To run application from the console, use the following command: + + make run + +If application runs in an infinite loop, type `Ctrl+C` several times to exit the debugger. + +To run the application in the GUI debugger, use the following command: + + make debug + +You will see the application output in the same console where you ran it. + +## License + +TensorFlow's code is covered by the Apache2 License included in the repository, and third-party dependencies are covered by their respective licenses, in the third_party folder of this package. diff --git a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl index b722b9c441d..0ddaf3e0a81 100644 --- a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl +++ b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC.md.tpl @@ -1,2 +1,45 @@ -# Mock Project Readme for common ARC target +# TensorFlow Lite Micro ARC Make Project +This folder has been autogenerated by TensorFlow, and contains sources, headers, and project files needed to build a single TensorFlow Lite Micro application using make tool and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT). + +This project has been generated for a target defined by TCF file only (Tool Configuration File). The real target board is unspecified, and applications can be run only in the nSIM simulator included with MWDT. + +See +[tensorflow/lite/micro](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro) +for details on how projects like this can be generated from the main source tree. + +## Usage + +See [Custom ARC EM/HS Platform](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/tools/make/targets/arc/README.md#Custom-ARC-EMHS-Platform) section for more detailed information on requirements and usage of this project. + +The Makefile contains all the information on building and running the project. One can modify it to satisfy specific needs. Next actions are available out of the box. You may need to adjust the following commands in order to use the appropriate make tool available in your environment, ie: `make` or `gmake` + +1. Build the application. + + make app + +2. Build the application passing additional flags to compiler. + + make app EXT_CFLAGS=[additional compiler flags] + +3. Build the application and stripout TFLM reference kernel fallback implementations in order to reduce code size. This only has an effect in case the project was generated with MLI support. See more info in [EmbARC MLI Library Based Optimizations](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/kernels/arc_mli/README.md). `false` is the default value. + + make app MLI_ONLY=[true|false] + +4. Delete all artifacts created during build. + + make clean + +5. Run the application with the nSIM simulator in console mode. + + make run + +6. Run the application with the nSIM simulator, but using the MetaWare Debugger GUI for further execution/debugging capabilities. + + make debug + + + +## License + +TensorFlow's code is covered by the Apache2 License included in the repository, and third party dependencies are covered by their respective licenses, in the third_party folder of this package. diff --git a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl index b3d9257f4d2..9d2801ed6b7 100644 --- a/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl +++ b/tensorflow/lite/micro/tools/make/templates/arc/README_ARC_EMSDP.md.tpl @@ -1,2 +1,48 @@ -# Mock Project Readme for ARC EMSDP target +# TensorFlow Lite Micro ARC Make Project for EM SDP Board. +This folder has been autogenerated by TensorFlow, and contains source, header, and project files needed to build a single TensorFlow Lite Micro target using make tool and and a Synopsys DesignWare ARC processor compatible toolchain, specifically the ARC MetaWare Development Toolkit (MWDT). + +This project has been generated for the ARC EM Software Development Platform (EM SDP). The built application can be run only on this platform. + +See +[tensorflow/lite/micro](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro) +for details on how projects like this can be generated from the main source tree. + +## Usage + +See [ARC EM Software Development Platform](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/tools/make/targets/arc/README.md#ARC-EM-Software-Development-Platform-ARC-EM-SDP) section for more detailed information on requirements and usage of this project. + +The Makefile contains all the information on building and running the project. One can modify it to satisfy specific needs. Next actions are available out of the box. You may need to adjust the following commands in order to use the appropriate make tool available in your environment, ie: `make` or `gmake`: + +1. Build the application. + + make app + +2. Build the application passing additional flags to compiler. + + make app EXT_CFLAGS=[additional compiler flags] + +3. Build the application and stripout TFLM reference kernel fallback implementations in order to reduce code size. This only has an effect in case the project was generated with MLI support. See more info in [EmbARC MLI Library Based Optimizations](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/kernels/arc_mli/README.md). `false` is the default value. + + make app MLI_ONLY=[true|false] + +4. Delete all artifacts created during build. + + make clean + +5. Run the application with the nSIM simulator in console mode. + + make run + +6. Load the application and open MetaWare Debugger GUI for further execution/debugging. + + make debug + +7. Generate necessary artefacts for self-booting execution from flash. See [reference to Run the application on the board from the micro SD card](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/tools/make/targets/arc/README.md#Run-the-Application-on-the-Board-from-the-microSD-Card). + + make flash + + +## License + +TensorFlow's code is covered by the Apache2 License included in the repository, and third party dependencies are covered by their respective licenses, in the third_party folder of this package. From 9c36f4b4266a13501ebf131ded0fb5639c29ede7 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Wed, 6 May 2020 19:28:16 +0300 Subject: [PATCH 41/45] EM SDP tcf file is removed (to be downloaded with MLI package) + minor fixes in Readmes --- .../micro/examples/micro_speech/README.md | 2 +- .../micro/examples/person_detection/README.md | 4 +- .../person_detection_experimental/README.md | 2 +- .../micro/tools/make/ext_libs/arc_mli.inc | 4 +- .../targets/arc/emsdp/emsdp_em11d_dfss.tcf | 4907 ----------------- .../tools/make/targets/arc_emsdp_makefile.inc | 17 +- .../tools/make/third_party_downloads.inc | 2 +- 7 files changed, 20 insertions(+), 4918 deletions(-) delete mode 100644 tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index ba55a7d8493..3ab8ad24338 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -55,7 +55,7 @@ For more detailed information on building and running examples see the appropria 2. Go to the generated example project director - cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection/make` + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/micro_speech_mock/make 3. Build the example using diff --git a/tensorflow/lite/micro/examples/person_detection/README.md b/tensorflow/lite/micro/examples/person_detection/README.md index ae47c4be0ff..d736d6f7cd5 100644 --- a/tensorflow/lite/micro/examples/person_detection/README.md +++ b/tensorflow/lite/micro/examples/person_detection/README.md @@ -34,7 +34,7 @@ Follow the instructions on the [ARC EM SDP Initial Setup](/tensorflow/lite/micro The example project for ARC EM SDP platform can be generated with the following command: - make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_person_detection_make_project ` + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp TAGS=no_arc_mli generate_person_detection_make_project ### Build and Run Example @@ -44,7 +44,7 @@ For more detailed information on building and running examples see the appropria 2. Go to the generated example project director - cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection/make` + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection/make 3. Build the example using diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/README.md b/tensorflow/lite/micro/examples/person_detection_experimental/README.md index af0186fb276..19a39ddd9a5 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/README.md +++ b/tensorflow/lite/micro/examples/person_detection_experimental/README.md @@ -45,7 +45,7 @@ For more detailed information on building and running examples see the appropria 2. Go to the generated example project director - cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection_int8/make` + cd tensorflow/lite/micro/tools/make/gen/arc_emsdp_arc/prj/person_detection_int8/make 3. Build the example using diff --git a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc index a95b4550417..5dbb91dd368 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/arc_mli.inc @@ -24,7 +24,7 @@ ifeq ($(filter no_arc_mli,$(ALL_TAGS)),) ALL_TAGS += arc_mli ifeq ($(BUILD_ARC_MLI),true) - MLI_LIB_DIR = arc_mli_$(basename $(TCF_FILE_NAME)) + MLI_LIB_DIR ?= arc_mli_$(basename $(TCF_FILE_NAME)) $(eval $(call add_third_party_download,$(EMBARC_MLI_URL),$(EMBARC_MLI_MD5),$(MLI_LIB_DIR),build_embarc_mli,$(TCF_FILE))) @@ -36,7 +36,7 @@ ifeq ($(BUILD_ARC_MLI),true) third_party/$(MLI_LIB_DIR)/LICENSE else ifneq ($(ARC_MLI_PRE_COMPILED_TARGET),) - MLI_LIB_DIR = arc_mli_package + MLI_LIB_DIR ?= arc_mli_package $(eval $(call add_third_party_download,$(EMBARC_MLI_PRE_COMPILED_URL),$(EMBARC_MLI_PRE_COMPILED_MD5),$(MLI_LIB_DIR),)) MLI_INCLUDE_FOLDER = $(MLI_LIB_DIR)/include diff --git a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf b/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf deleted file mode 100644 index 833fa9ca9b9..00000000000 --- a/tensorflow/lite/micro/tools/make/targets/arc/emsdp/emsdp_em11d_dfss.tcf +++ /dev/null @@ -1,4907 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - -# -# option 16/L32/U32 Instructions -# ------ ---------- --------------------- -# -# none -/-/- None -# wlh1 1/1/1 MPYW/U, MPY/U, MPYH/U -# wlh2 2/2/2 MPYW/U, MPY/U, MPYH/U -# wlh3 2/3/3 MPYW/U, MPY/U, MPYH/U -# wlh4 2/4/5 MPYW/U, MPY/U, MPYH/U -# wlh5 5/9/9 MPYW/U, MPY/U, MPYH/U -# -# --mpy_option none - -# code_protection --- The ARC EM architecture divides the memory into 16 regions, which can be protected individually. This feature adds a 16-bit input to the processor core, one bit per region. When the protect bit is set, the processor disables any load or store to the corresponding region. An attempt to access a protected region raises an EV_ProtV exception. --code_protection false - -# stack_checking --- Stack checking is a mechanism for checking stack accesses and raising an exception when a stack overflow or underflow is detected. --stack_checking true - -# unaligned_option --- This enables unaligned loads and stores. --unaligned_option true - -# intvbase_preset --- This sets the interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE. --intvbase_preset 0x0 - -# intvbase_preset_s --- This sets the secure interrupt vector base configuration register, VECBASE_AC_BUILD. The vector base address is aligned to a 1KB boundary, so the required address value should be divided by 1K (i.e. do not include the lower 10 bits). On reset, this register is loaded into the interrupt vector base address register, INT_VECTOR_BASE_S.This is effective only when 2+2 mode is enabled. --intvbase_preset_s 0x0 - -# intvbase_ext --- Set this option to drive the upper 22 bits of the interrupt base vector externally, into signal intvbase_in. --intvbase_ext false - -# nmi_option --- add Non-maskable external exception support --nmi_option false - -# rgf_impl --- This defines whether the register file is implemented using flip-flops, or with a hard macro. --rgf_impl flip_flops - -# rgf_num_regs --- This defines the size (in 32b register) of the processor register file. --rgf_num_regs 32 - -# rgf_wr_ports --- This defines the number of write ports on the register file. --rgf_wr_ports 2 - -# rgf_num_banks --- Dual register banks are useful if Fast IRQ has been configured, but may be selected even if not. --rgf_num_banks 2 - -# rgf_banked_regs --- This selects the number of registers that are replicated in the second register-file bank. --rgf_banked_regs 32 - -# turbo_boost --- This enables the Turbo Boost synthesis option. By enabling this option, the achievable clock frequency is increased, but at the cost of an additional cycle latency on branch instructions. --turbo_boost false - -# infer_alu_adder --- infer: datapath is described as behavioral code: A + B -# instantiate: datapath is instantiated as a detailed multi-stage code of a carry-lookahead adder. It is generally preferable to use the infer option and add directives for your target synthesizer. --infer_alu_adder infer - -# infer_mpy_wtree --- infer: datapath is described as behavioral code: A * B (applies to only wlh3, wlh4 and wlh5 designs) -# instantiate: datapath is instantiated as a detailed multi-stage code of a Wallace Tree multiplier It is generally preferable to use the infer option and add directives for your target synthesizer. --infer_mpy_wtree instantiate - -# scantest_ram_bypass_mux --- This mux is used to make logic trapped between flops and memory (aka shadow logic) to be covered by scantest without requiring advanced sequential ATPG on the memory to be applied. Will add delay to functional access time --scantest_ram_bypass_mux false - -# logic_bist --- This option will OR LBIST_EN with test_mode --logic_bist false - -# power_domains --- Adds three separate power domains to the core, and propagates power-gate control signals to the top level of the core. Also generates UPF constraints and commands in the low-power scripts --power_domains false - -# dvfs --- Adds logic to the core to allow dynamic controlling of voltage and frequency and propagates the associated control signals to the top level of core --dvfs false - -# voltage_domains --- Creates a voltage domain split between RAM and std cell parts to support Ultra Low Voltage on cells and generates UPF constraints --voltage_domains false - -# mem_bus_option --- The core supports two bus protocols for accessing external memory: AHB & AHB-Lite. AHB-Lite-single means instruction fetch and data access share a single AHB-Lite port. AHB-Lite-dual means separate AHB-Lite port for each initiator if present. --mem_bus_option AHB - -# mem_bus_reg_interface --- Specifies whether the memory bus interface is registered. --mem_bus_reg_interface true - -# dmi_burst_option --- This will enable high-throughput burst support on the DMI slave interfaces. By enabling this option, the peak DMI read throughput goes from 1 word per 3 cycles to N words per N+2 cycles, in which N is the AHB burst lengthDMI write throughput goes from 1 word per 3 cycles to 1 word per cycle. --dmi_burst_option true - -# has_dmp_peripheral --- This option enables the redirection of load/store accesses to one segment (1/16) of the addressable space to a dedicated peripheral bus. This offers high system integration and reduces overall system cost. --has_dmp_peripheral true - -# per0_base --- This option specifies the memory region assignment for this peripheral aperture --per0_base 15 - -# per0_limit --- This option specifies the end of this peripheral aperture --per0_limit 0 - -# per_bus_option --- The core supports one bus protocol for accessing the peripheral space, when enabled: AHB-Lite. --per_bus_option AHB-Lite - -# per_bus_reg_interface --- Specifies whether the peripheral bus interface is registered. --per_bus_reg_interface true - -# clock_gating --- This enables the insertion of architectural clock gate elements in the design. By enabling this option, the clocks to various parts of the design will be disabled when the logic they drive is not in use to save power. --clock_gating false - -# back_compat --- This enables the addition of rst_a input in the clkgate module to support backward compatibility with the older EM and Subsystem releases. --back_compat true - -# byte_parity --- If parity protection on the CCMs or Cache is configured, this option enables parity protection on a per-byte basis. Otherwise, parity is per word basis --byte_parity false - -# prot_pipelined --- Check the box if CCM memories are configured for ECC, and you want single-bit errors to be corrected, written back to memory, and re-fetched. When unchecked, single bit errors are corrected when read from memory, but the offending memory location itself is not corrected with a writeback, no influence on Cache protection --prot_pipelined false - -# cct_test_ena --- When ECC is configured, this option enables single bit error injection in CCT RAM models to demonstrate ECC protection on the RAMs. When enabled, the RAM models can only be used in HDL CCT simulation (no xCAM support) and are not intended for use in SoC level integration. --cct_test_ena false - -# err_prot_ehce --- Enabled enhanced ECC architecture for CCM. Instruction fetch with single bit error is not replayed; ecc cac modules are shared to reduce area and timing opt. --err_prot_ehce false - - -######## dsp_trig --- com.arc.hardware.dfss.dsp_trig.1_0 ######## - -# Create dsp_trig --create com.arc.hardware.dfss.dsp_trig.1_0 System.CPUisle.ARCv2EM.dsp_trig - -# dsp_trig --- Command line option for EIA extension component 'dsp_trig'. --dsp_trig true - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio0 --- com.arc.hardware.dfss.io_gpio0.1_0 ######## - -# Create io_gpio0 --create com.arc.hardware.dfss.io_gpio0.1_0 System.CPUisle.ARCv2EM.io_gpio0 - -# io_gpio0 --- Command line option for EIA extension component 'io_gpio0'. --io_gpio0 true - -# io_gpio0_debounce --- Selects the inclusion of Debounce logic --io_gpio0_debounce 1 - -# io_gpio0_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio0_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - -# io_gpio0_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. --io_gpio0_direction_rst_value 0 - -# io_gpio0_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. --io_gpio0_output_rst_value 0x0 - - -######## io_i2c_mst0 --- com.arc.hardware.dfss.io_i2c_mst0.1_0 ######## - -# Create io_i2c_mst0 --create com.arc.hardware.dfss.io_i2c_mst0.1_0 System.CPUisle.ARCv2EM.io_i2c_mst0 - -# io_i2c_mst0 --- Command line option for APEX extension component 'io_i2c_mst0'. --io_i2c_mst0 true - -# io_i2c_mst0_fs --- RX/TX FIFO size --io_i2c_mst0_fs 16 - -# io_i2c_mst0_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst0_dma_support None - -# io_i2c_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst0_cdc_included 0 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2c_slv0 --- com.arc.hardware.dfss.io_i2c_slv0.1_0 ######## - -# Create io_i2c_slv0 --create com.arc.hardware.dfss.io_i2c_slv0.1_0 System.CPUisle.ARCv2EM.io_i2c_slv0 - -# io_i2c_slv0 --- Command line option for APEX extension component 'io_i2c_slv0'. --io_i2c_slv0 true - -# io_i2c_slv0_fs --- RX/TX FIFO size --io_i2c_slv0_fs 16 - -# io_i2c_slv0_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_slv0_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst0 --- com.arc.hardware.dfss.io_spi_mst0.1_0 ######## - -# Create io_spi_mst0 --create com.arc.hardware.dfss.io_spi_mst0.1_0 System.CPUisle.ARCv2EM.io_spi_mst0 - -# io_spi_mst0 --- Command line option for APEX extension component 'io_spi_mst0'. --io_spi_mst0 true - -# io_spi_mst0_fz --- RX/TX FIFO depth --io_spi_mst0_fs 16 - -# io_spi_mst0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst0_max_xfer_size 16 - -# io_spi_mst0_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst0_cdc_included 0 - -# io_spi_mst0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst0_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## subsys_bcr --- com.arc.hardware.dfss.subsys_bcr.1_0 ######## - -# Create subsys_bcr --create com.arc.hardware.dfss.subsys_bcr.1_0 System.CPUisle.ARCv2EM.subsys_bcr - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst1 --- com.arc.hardware.dfss.io_spi_mst1.1_0 ######## - -# Create io_spi_mst1 --create com.arc.hardware.dfss.io_spi_mst1.1_0 System.CPUisle.ARCv2EM.io_spi_mst1 - -# io_spi_mst1 --- Command line option for APEX extension component 'io_spi_mst1'. --io_spi_mst1 true - -# io_spi_mst1_fz --- RX/TX FIFO depth --io_spi_mst1_fs 16 - -# io_spi_mst1_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst1_max_xfer_size 16 - -# io_spi_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst1_cdc_included 0 - -# io_spi_mst1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst1_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_mst2 --- com.arc.hardware.dfss.io_spi_mst2.1_0 ######## - -# Create io_spi_mst2 --create com.arc.hardware.dfss.io_spi_mst2.1_0 System.CPUisle.ARCv2EM.io_spi_mst2 - -# io_spi_mst2 --- Command line option for APEX extension component 'io_spi_mst2'. --io_spi_mst2 true - -# io_spi_mst2_fz --- RX/TX FIFO depth --io_spi_mst2_fs 16 - -# io_spi_mst2_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_mst2_max_xfer_size 16 - -# io_spi_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the peripheral clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than or equal to the peripheral clock frequency. --io_spi_mst2_cdc_included 0 - -# io_spi_mst2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_mst2_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_spi_slv0 --- com.arc.hardware.dfss.io_spi_slv0.1_0 ######## - -# Create io_spi_slv0 --create com.arc.hardware.dfss.io_spi_slv0.1_0 System.CPUisle.ARCv2EM.io_spi_slv0 - -# io_spi_slv0 --- Command line option for APEX extension component 'io_spi_slv0'. --io_spi_slv0 true - -# io_spi_slv0_fz --- RX/TX FIFO depth --io_spi_slv0_fs 16 - -# io_spi_slv0_max_xfer_size --- This defines the maximum number of bits per word at the serial data port, which determines the FIFO width. --io_spi_slv0_max_xfer_size 16 - -# io_spi_slv0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_spi_slv0_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_gpio1 --- com.arc.hardware.dfss.io_gpio1.1_0 ######## - -# Create io_gpio1 --create com.arc.hardware.dfss.io_gpio1.1_0 System.CPUisle.ARCv2EM.io_gpio1 - -# io_gpio1 --- Command line option for EIA extension component 'io_gpio1'. --io_gpio1 true - -# io_gpio1_debounce --- Selects the inclusion of Debounce logic --io_gpio1_debounce 1 - -# io_gpio1_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio1_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - -# io_gpio1_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. --io_gpio1_direction_rst_value 0 - -# io_gpio1_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. --io_gpio1_output_rst_value 0x0 - - -######## io_gpio2 --- com.arc.hardware.dfss.io_gpio2.1_0 ######## - -# Create io_gpio2 --create com.arc.hardware.dfss.io_gpio2.1_0 System.CPUisle.ARCv2EM.io_gpio2 - -# io_gpio2 --- Command line option for EIA extension component 'io_gpio2'. --io_gpio2 true - -# io_gpio2_debounce --- Selects the inclusion of Debounce logic --io_gpio2_debounce 1 - -# io_gpio2_readback_sync --- Selects the inclusion of metastability registers on the read back path when reading the external 'ext_porta' signal --io_gpio2_readback_sync 1 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - -# io_gpio2_direction_rst_value --- Reset value of the SWPORTA_DDR register, which determines the direction (input/output) of the GPIO interface. 0: input, 1: output. --io_gpio2_direction_rst_value 0 - -# io_gpio2_output_rst_value --- Reset value of the SWPORTA_DR register, which determines the reset value of the GPIO output ports. Bits corresponding to input ports are ignored. --io_gpio2_output_rst_value 0x0 - - -######## io_i2c_mst1 --- com.arc.hardware.dfss.io_i2c_mst1.1_0 ######## - -# Create io_i2c_mst1 --create com.arc.hardware.dfss.io_i2c_mst1.1_0 System.CPUisle.ARCv2EM.io_i2c_mst1 - -# io_i2c_mst1 --- Command line option for APEX extension component 'io_i2c_mst1'. --io_i2c_mst1 true - -# io_i2c_mst1_fs --- RX/TX FIFO size --io_i2c_mst1_fs 16 - -# io_i2c_mst1_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst1_dma_support None - -# io_i2c_mst1_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst1_cdc_included 0 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2c_mst2 --- com.arc.hardware.dfss.io_i2c_mst2.1_0 ######## - -# Create io_i2c_mst2 --create com.arc.hardware.dfss.io_i2c_mst2.1_0 System.CPUisle.ARCv2EM.io_i2c_mst2 - -# io_i2c_mst2 --- Command line option for APEX extension component 'io_i2c_mst2'. --io_i2c_mst2 true - -# io_i2c_mst2_fs --- RX/TX FIFO size --io_i2c_mst2_fs 16 - -# io_i2c_mst2_dma_support --- Specifies whether the DMA handshake interface is included --io_i2c_mst2_dma_support None - -# io_i2c_mst2_cdc_included --- Selects whether a clock-domain crossing (CDC) is included between the core clock and the serial clock. If no CDC is present, both clocks must be synchronous. Otherwise the core clock frequency may be higher than, lower than or equal to the serial clock frequency. --io_i2c_mst2_cdc_included 0 - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart0 --- com.arc.hardware.dfss.io_uart0.1_0 ######## - -# Create io_uart0 --create com.arc.hardware.dfss.io_uart0.1_0 System.CPUisle.ARCv2EM.io_uart0 - -# io_uart0 --- Command line option for EIA extension component 'io_uart0'. --io_uart0 true - -# io_uart0_fifo_mode --- Set the UART FIFO mode --io_uart0_fifo_mode 16 - -# io_uart0_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart0_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart1 --- com.arc.hardware.dfss.io_uart1.1_0 ######## - -# Create io_uart1 --create com.arc.hardware.dfss.io_uart1.1_0 System.CPUisle.ARCv2EM.io_uart1 - -# io_uart1 --- Command line option for EIA extension component 'io_uart1'. --io_uart1 true - -# io_uart1_fifo_mode --- Set the UART FIFO mode --io_uart1_fifo_mode 16 - -# io_uart1_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart1_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart2 --- com.arc.hardware.dfss.io_uart2.1_0 ######## - -# Create io_uart2 --create com.arc.hardware.dfss.io_uart2.1_0 System.CPUisle.ARCv2EM.io_uart2 - -# io_uart2 --- Command line option for EIA extension component 'io_uart2'. --io_uart2 true - -# io_uart2_fifo_mode --- Set the UART FIFO mode --io_uart2_fifo_mode 16 - -# io_uart2_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart2_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_uart3 --- com.arc.hardware.dfss.io_uart3.1_0 ######## - -# Create io_uart3 --create com.arc.hardware.dfss.io_uart3.1_0 System.CPUisle.ARCv2EM.io_uart3 - -# io_uart3 --- Command line option for EIA extension component 'io_uart3'. --io_uart3 true - -# io_uart3_fifo_mode --- Set the UART FIFO mode --io_uart3_fifo_mode 16 - -# io_uart3_dma_support --- Selects whether support for the ARC EM DMA is included and whether the handshake interface should be connected to a memory-based or to an Aux-based DMA channel. --io_uart3_dma_support None - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2s_rx_mst0 --- com.arc.hardware.dfss.io_i2s_rx_mst0.1_0 ######## - -# Create io_i2s_rx_mst0 --create com.arc.hardware.dfss.io_i2s_rx_mst0.1_0 System.CPUisle.ARCv2EM.io_i2s_rx_mst0 - -# io_i2s_rx_mst0 --- Command line option for APEX extension component 'io_i2s_rx_mst0'. --io_i2s_rx_mst0 true - -# io_i2s_rx_mst0_fs --- RX FIFO size --io_i2s_rx_mst0_fs 8 - -# io_i2s_rx_mst0_fw --- RX FIFO width --io_i2s_rx_mst0_fw 16 - -# io_i2s_rx_mst0_dma_support --- Specifies whether the DMA handshake interface is included --io_i2s_rx_mst0_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_i2s_tx_mst0 --- com.arc.hardware.dfss.io_i2s_tx_mst0.1_0 ######## - -# Create io_i2s_tx_mst0 --create com.arc.hardware.dfss.io_i2s_tx_mst0.1_0 System.CPUisle.ARCv2EM.io_i2s_tx_mst0 - -# io_i2s_tx_mst0 --- Command line option for APEX extension component 'io_i2s_tx_mst0'. --io_i2s_tx_mst0 true - -# io_i2s_tx_mst0_fs --- TX FIFO size --io_i2s_tx_mst0_fs 8 - -# io_i2s_tx_mst0_fw --- TX FIFO width --io_i2s_tx_mst0_fw 16 - -# io_i2s_tx_mst0_dma_support --- Specifies whether the DMA handshake interface is included --io_i2s_tx_mst0_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## io_pdm_rx0 --- com.arc.hardware.dfss.io_pdm_rx0.1_0 ######## - -# Create io_pdm_rx0 --create com.arc.hardware.dfss.io_pdm_rx0.1_0 System.CPUisle.ARCv2EM.io_pdm_rx0 - -# io_pdm_rx0 --- Command line option for APEX extension component 'io_pdm_rx0'. --io_pdm_rx0 true - -# io_pdm_rx0_ch --- Number of Stereo Channels --io_pdm_rx0_ch 1 - -# io_pdm_rx0_fs --- RX FIFO size --io_pdm_rx0_fs 16 - -# io_pdm_rx0_ns --- Maximum number of CIC stages --io_pdm_rx0_ns 4 - -# io_pdm_rx0_ds --- Maximum delay in the COMB filter of the CIC filter --io_pdm_rx0_ds 2 - -# io_pdm_rx0_dma_support --- Specifies whether the DMA handshake interface is included --io_pdm_rx0_dma_support Memory-Based - -# assign_xpubit --- -# -# The User Mode Extension Enable register (XPU) controls user-mode access to extension instructions and state. Each extension group is assigned a bit within the XPU register, and this bit may be programmed to enable or disable user-mode access to the extensions within that group. -#

-# By default an extension is not assigned a bit in this register. This means the extension is always available. -#

-# If you wish to assign an XPU bit number, select this option. -# -# --assign_xpubit false - -# xpubit --- -# The XPU bit number for this extension. -# --xpubit 0 - - -######## DCCM --- com.arc.hardware.DCCM.1_0 ######## - -# Create DCCM --create com.arc.hardware.DCCM.1_0 System.CPUisle.ARCv2EM.DCCM - -# dccm_size --- This defines the size of the Data Closely Coupled Memory (DCCM) in bytes --dccm_size 131072 - -# dccm_base --- Sets the initial memory region assignment for DCCM --dccm_base 8 - -# dccm_interleave --- Split DCCM into even/odd memory banks. --dccm_interleave false - -# dccm_prot --- Specifies the type of protection built for the DCCM. --dccm_prot None - -# dccm_prot_level --- Specifies the level protection. --dccm_prot_level Data_Only - -# dccm_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the DCCM --dccm_prot_exceptions true - -# dccm_sec_lvl --- Specifies the level of secure DCCM. --dccm_sec_lvl Non_Secure - -# dccm_dmi --- This enables external access through a DMI (direct memory interface) port. --dccm_dmi true - - -######## DMA Controller --- com.arc.hardware.DMA_Controller.1_0 ######## - -# Create DMA Controller --create com.arc.hardware.DMA_Controller.1_0 "System.CPUisle.ARCv2EM.DMA Controller" - -# dmac_channels --- This options specifies the number of DMA channels implemented in the DMA controller --dmac_channels 16 - -# dmac_fifo_depth --- This option specifies the DMA transfer FIFO depth in 32b words. --dmac_fifo_depth 2 - -# dmac_int_config --- None: the DMA controller cannot raise an interrupt -# Single-External: single done and single error interrupt signal for all DMA channels, and the interrupt signals are routed to a port at the top of the EM logical hierarchy -# Multiple-External: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are routed to ports at the top of the EM logical hierarchy -# Single-Internal: single done and single error interrupt signals for all DMA channels, and the interrupt signals are internal to the EM core -# Multiple-Internal: each DMA channel can be configured to raise separate (per-channel) done and error interrupts, and the interrupt signals are internal to the EM core --dmac_int_config Multiple-Internal - -# dmac_separate_error_interrupts --- This specifies whether there is a separate error interrupt per DMA channel, or just one. --dmac_separate_error_interrupts false - -# dmac_registers --- This option defines the number of DMA channels with their registers located in auxiliary space. --dmac_registers 0 - -# dmac_mem_if --- This option specifies whether the DMA controller system memory interface is integrated into the existing EM system memory interfaces or has its own interface. --dmac_mem_if integrated - -# dmac_per_if --- Internal vs DW peripheral interface. Specify (in hex) which channels have the DW interface, where bit 0 corresponds to DMA channel 0, bit 1 for DMA channel 1, etc. -# Example: 4 channel DMA controller where -dmac_per_if is set to 0x9 = DMA Channels 0 and 3 configured with the DW req interface, DMA Channels 1 and 2 configured with the internal req interface. --dmac_per_if 0x7e00 - - -######## DSP --- com.arc.hardware.DSP.1_0 ######## - -# Create DSP --create com.arc.hardware.DSP.1_0 System.CPUisle.ARCv2EM.DSP - -# dsp_complex --- Enable/disable support for single cycle 16b+16b complex instructions and butterfly operations, else 2-cycle complex instructions only without butterfly support --dsp_complex true - -# dsp_itu --- Enable/disable support for ITU bit-accurate 1 bit fractional shift before accumulation, else 1-bit fractional shift result after accumulation only --dsp_itu true - -# dsp_divsqrt --- Enable/disable support for divide and square root operations: DIV(U), REM(U), SQRT --dsp_divsqrt radix2 - -# dsp_accshift --- Select support for accumulator shift operations: no supported, limited shift support only or full shift support and convergent rounding --dsp_accshift full - -# dsp_impl --- The datapath components may be inferred from Verilog for better area or optimized using carry-save components for better timing --dsp_impl optimized - - -######## Data Cache --- com.arc.hardware.Data_Cache.1_0 ######## - -# Create Data Cache --create com.arc.hardware.Data_Cache.1_0 "System.CPUisle.ARCv2EM.Data Cache" - -# dc_size --- This defines the total size of the Data Cache in bytes. --dc_size 16384 - -# dc_ways --- This defines the number of cache ways. --dc_ways 2 - -# dc_bsize --- This defines the cache line length in bytes. --dc_bsize 32 - -# dc_feature_level --- Feature Level, indicates locking and debug feature level 00 = Basic cache, with no locking or debug features 01 = Lock and flush features supported 10 = Lock, flush and advanced debug features supported 11 = Reserved --dc_feature_level 2 - -# dc_uncached_region --- Enable an uncached region defined by aux reg --dc_uncached_region false - -# dc_prot --- Specifies the type of protection built for DCACHE. --dc_prot None - -# dc_prot_level --- Specifies the level of protection. --dc_prot_level Data_Only - -# dc_prot_exceptions --- Builds exception generation hardware for uncorrectable (fatal) errors detected on DCACHE. --dc_prot_exceptions true - - -######## Debug Interface --- com.arc.hardware.Debug_Interface.1_0 ######## - -# Create Debug Interface --create com.arc.hardware.Debug_Interface.1_0 "System.CPUisle.ARCv2EM.Debug Interface" - -# dbg_en_option --- Adds an enable pin to the existing debug interface --dbg_en_option false - -# secure_debug --- This enables secure debug feature --secure_debug false - -# scdbg_aux_unlk --- An internal demo module will be included when enable --scdbg_aux_unlk false - -# dbg_apb_option --- Adds an additional APB debug port alongside the BVCI one --dbg_apb_option false - - -######## ICCM0 --- com.arc.hardware.ICCM0.1_0 ######## - -# Create ICCM0 --create com.arc.hardware.ICCM0.1_0 System.CPUisle.ARCv2EM.ICCM0 - -# iccm0_size --- This defines the size of ICCM0 in bytes.This ICCM has 0 wait states. --iccm0_size 131072 - -# iccm0_base --- Sets the initial memory region assignment for ICCM0 --iccm0_base 6 - -# iccm0_wide --- Creates ICCM0 as 64b memory to reduce accesses. --iccm0_wide false - -# iccm0_prot --- Specifies the type of protection built for ICCM0. --iccm0_prot None - -# iccm0_prot_level --- Specifies the level of protection. --iccm0_prot_level Data_Only - -# iccm0_prot_exceptions --- When the core is configured with ECC or Parity, cause exception generation hardware to be created for uncorrectable errors detected on the ICCM0 --iccm0_prot_exceptions true - -# iccm0_sec_lvl --- Specifies the level of secure ICCM0. --iccm0_sec_lvl Non_Secure - -# iccm0_dmi --- This enables external access through a DMI (direct memory interface) port. --iccm0_dmi true - - -######## Instruction Cache --- com.arc.hardware.Instruction_Cache.1_0 ######## - -# Create Instruction Cache --create com.arc.hardware.Instruction_Cache.1_0 "System.CPUisle.ARCv2EM.Instruction Cache" - -# ic_size --- This defines the total size of the instruction cache in bytes. --ic_size 16384 - -# ic_ways --- This defines the number of cache ways --ic_ways 2 - -# ic_bsize --- This defines the cache line length in bytes. --ic_bsize 64 - -# ic_disable_on_reset --- The instruction cache may be enabled immediately after reset, depending on this option. If this option is enabled, the last cache operation is set to failed, and the direct cache-RAM access is enabled. Furthermore, the instruction cache is invalidated all cache lines are invalidated and unlocked, and the tag RAM is cleared. --ic_disable_on_reset false - -# ic_feature_level --- This defines the feature level of the cache. --ic_feature_level 1 - -# ic_pwr_opt_level --- This selects power-optimization options in the micro-architecture of the instruction cache. --ic_pwr_opt_level 0 - -# ic_prot --- Specifies the type of protection built for ICACHE. --ic_prot None - -# ic_prot_level --- Specifies the level of protection. --ic_prot_level Data_Only - -# ic_prot_exceptions --- Builds exception generation hardware for uncorrectable (fatal) errors detected on ICACHE. --ic_prot_exceptions true - - -######## Interrupt Controller --- com.arc.hardware.Interrupt_Controller.1_0 ######## - -# Create Interrupt Controller --create com.arc.hardware.Interrupt_Controller.1_0 "System.CPUisle.ARCv2EM.Interrupt Controller" - -# number_of_interrupts --- This is the total number of interrupts available to the core. Some interrupts are allocated statically to a specific interrupt line (for example, timer interrupts). For more information on Interrupt and register-file options, see DesignWare ARCv2 ISA Programmers Reference Manual. --number_of_interrupts 96 - -# number_of_levels --- Priority levels in the interrupt controller. --number_of_levels 4 - -# external_interrupts --- This is the total number of interrupt pins available for external system components. This parameter must be less than the total number of interrupts. --external_interrupts 77 - -# firq_option --- This enables the fast-interrupts option, (priority level 0 interrupts), which uses an alternate register bank (if configured) instead of saving the context to memory. --firq_option true - - -######## JTAG Interface --- com.arc.hardware.JTAG_Interface.1_0 ######## - -# Create JTAG Interface --create com.arc.hardware.JTAG_Interface.1_0 "System.CPUisle.ARCv2EM.JTAG Interface" - -######## Timer 0 --- com.arc.hardware.Timer_0.1_0 ######## - -# Create Timer 0 --create com.arc.hardware.Timer_0.1_0 "System.CPUisle.ARCv2EM.Timer 0" - -# timer_0_int_level --- This sets the interrupt level (and implicitly the priority: level 0 is highest) of timer 0. --timer_0_int_level 1 - - -######## Watchdog Timer --- com.arc.hardware.Watchdog_Timer.1_0 ######## - -# Create Watchdog Timer --create com.arc.hardware.Watchdog_Timer.1_0 "System.CPUisle.ARCv2EM.Watchdog Timer" - -# watchdog_size --- Specifies the bit width of timer's internal counter. --watchdog_size 32 - -# watchdog_clk --- Specifies whether the timer should be driven from a separate clock. --watchdog_clk false - - -######## Real-time Counter --- com.arc.hardware.Real_time_Counter.1_0 ######## - -# Create Real-time Counter --create com.arc.hardware.Real_time_Counter.1_0 "System.CPUisle.ARCv2EM.Real-time Counter" - -######## Performance Monitor --- com.arc.hardware.Performance_Monitor.1_0 ######## - -# Create Performance Monitor --create com.arc.hardware.Performance_Monitor.1_0 "System.CPUisle.ARCv2EM.Performance Monitor" - -# pct_counters --- Number of counters for performance monitoring. --pct_counters 8 - - -######## SmaRT --- com.arc.hardware.SmaRT.1_0 ######## - -# Create SmaRT --create com.arc.hardware.SmaRT.1_0 System.CPUisle.ARCv2EM.SmaRT - -# smart_stack_entries --- This specifies the number of entries in the trace buffer. --smart_stack_entries 8 - -# smart_implementation --- Flip-flop = FF-based design. Memory = memory-based design (provides better density for larger trace buffers). --smart_implementation flip-flop - - -######## XY --- com.arc.hardware.XY.1_0 ######## - -# Create XY --create com.arc.hardware.XY.1_0 System.CPUisle.ARCv2EM.XY - -# xy_config --- XY memory configuration: -# One memory: DCCM only. -# Two memories: DCCM + Y. -# Three memories: DCCM + X + Y. --xy_config dccm_x_y - -# xy_size --- Size of X and Y memories if included. -# X and Y memories both have the same configured size. --xy_size 16384 - -# xy_interleave --- Split XY memories into odd/even instances to enable single cycle unaligned access. --xy_interleave false - -# xy_x_base --- Base region for X memory. All accesses to this region will initiate a transfer on the X memory. --xy_x_base 9 - -# xy_y_base --- Base region for Y memory. All accesses to this region will initiate a transfer on the Y memory. --xy_y_base 10 - - -######## AGU --- com.arc.hardware.AGU.1_0 ######## - -# Create AGU --create com.arc.hardware.AGU.1_0 System.CPUisle.ARCv2EM.AGU - -# agu_size --- Predefined configurations of modifiers, address -# pointers and offset registers -#

-# 
-#         address     address                     
-#         pointers    offset regs      modifiers  
-#        ----------- --------------- ------------ 
-# small:     4           2                 4      
-# medium:    8           4                 12     
-# large:     12          8                 24     
-# 
-# --agu_size large - -# agu_accord --- Enable the accordion stage if operating frequency is critical --agu_accord true - -# agu_wb_depth --- Write buffer depth --agu_wb_depth 4 - - -######## Actionpoints --- com.arc.hardware.Actionpoints.1_0 ######## - -# Create Actionpoints --create com.arc.hardware.Actionpoints.1_0 System.CPUisle.ARCv2EM.Actionpoints - -# num_actionpoints --- This is the number of trigger events available. --num_actionpoints 8 - -# aps_feature --- Selects Actionpoint feature set --aps_feature min - - -######## Bit stream --- com.arc.hardware.Bit_stream.1_0 ######## - -# Create Bit stream --create com.arc.hardware.Bit_stream.1_0 "System.CPUisle.ARCv2EM.Bit stream" - -######## Floating-point unit --- com.arc.hardware.Floating_point_unit.1_0 ######## - -# Create Floating-point unit --create com.arc.hardware.Floating_point_unit.1_0 "System.CPUisle.ARCv2EM.Floating-point unit" - -# fpu_dp_assist --- This enables double-precision acceleration instructions. --fpu_dp_assist true - -# fpu_fma_option --- This enables the fused multiply-add & multiply-subtract instructions. --fpu_fma_option true - -# fpu_mas_cycles --- Make mul/add/sub multicycle to achieve a higher clock speed. --fpu_mas_cycles 2 - -# fpu_pipe_impl --- FPU pipelined implementation --fpu_pipe_impl true - -# fpu_div_option --- This enables divide & square-root acceleration --fpu_div_option true - -# fpu_div_cycles --- Controls div/sqrt implementation. --fpu_div_cycles 17 - - -######## Memory Protection Unit --- com.arc.hardware.Memory_Protection_Unit.1_0 ######## - -# Create Memory Protection Unit --create com.arc.hardware.Memory_Protection_Unit.1_0 "System.CPUisle.ARCv2EM.Memory Protection Unit" - -# mpu_num_regions --- Number of configured memory regions. --mpu_num_regions 16 - -# mpu_32b --- Set the minimal region size to be 32 byte instead of 2KB. --mpu_32b false - -# mpu_sid_option --- It will enable SID support in Secure Shield --mpu_sid_option false - - -######## Real-time trace producer --- com.arc.hardware.Real_time_trace_producer.1_0 ######## - -# Create Real-time trace producer --create com.arc.hardware.Real_time_trace_producer.1_0 "System.CPUisle.ARCv2EM.Real-time trace producer" - -# rtt_feature_level --- 'small' means that program trace only is available. `medium' adds data trace. `full' adds core and aux register trace. --rtt_feature_level full - - -######## ARCv2EM CCT --- cct.1_0 ######## - -# Create ARCv2EM CCT --create cct.1_0 "System.ARCv2EM CCT" - -# cct --- -# Option used to add a CCT to the design for command-line builds -# Without this architect can't add this component to a build -# via a cmdline -create command. -# with old scripts. -# --cct true - -# no_hostlink --- -# This prevents the inclusion of the hostlink library when compiling -# C or C++ programs. The resultant executable, if it contains printfs, -# will print to an internal fixed buffer __mwwrite_buf. -# Other hostlink operations that require debugger assistance, such as file -# opens, will fail. -# -# Hostlink references incur memory cycles at unpredictable times and -# so can perturb cycle-timing results. Without hostlink, -# the debugger will not in any way interfere with the target while it is running. -# Therefore this option is useful for simulation in which you want precisely the -# same cycle timing to occur each time you run, or for accurate power consumption results. -# --cct_no_hostlink false - -# has_subsystem_cct_flow --- -# The above option will check for the presence of subsystem component in the build configuration and suitably modifies the Makefile for the sub-system environment. -# --has_subsystem_cct_flow false - - -######## BusFabric --- com.arc.hardware.ARCv2MSS.BusFabric.1_0 ######## - -# Create BusFabric --create com.arc.hardware.ARCv2MSS.BusFabric.1_0 System.BusFabric - -######## ClkCtrl --- com.arc.hardware.ARCv2MSS.ClkCtrl.1_0 ######## - -# Create ClkCtrl --create com.arc.hardware.ARCv2MSS.ClkCtrl.1_0 System.ClkCtrl - -######## DSP Software --- com.arc.software.dfss.sw_dsp.1_0 ######## - -# Create DSP Software --create com.arc.software.dfss.sw_dsp.1_0 "System.DSP Software" - -# sw_dsp --- Command line option for Software element 'DSP Software' --sw_dsp true - - -######## EMSDP_BOARD --- com.arc.hardware.ARCv2MSS.EMSDP_BOARD.1_0 ######## - -# Create EMSDP_BOARD --create com.arc.hardware.ARCv2MSS.EMSDP_BOARD.1_0 System.EMSDP_BOARD - -# emsdp_sys_freq --- Select the core frequency. --emsdp_sys_freq 40 - - -######## IO Software --- com.arc.software.dfss.sw_io.1_0 ######## - -# Create IO Software --create com.arc.software.dfss.sw_io.1_0 "System.IO Software" - -# sw_io --- Command line option for Software element 'IO Software' --sw_io true - - -######## Implementation --- com.arc.hardware.implementation.1_0 ######## - -# Create Implementation --create com.arc.hardware.implementation.1_0 System.Implementation - -# ClockSpeed --- Target clock speed of the system --clock_speed 10 - -# DDR2_clk_Ratio --- DDR2 Clock Vs System Clock Ratio -# 2x -# 3x -# 4x --ddr2_clk_ratio 3x - -# ClockSkew --- The clock skew for the system --clock_skew 0.2 - -# HoldMargin --- Margin for hold time checks --hold_margin 0.05 - -# Floorplan --- Floorplan definition for relative placement of RAMs (at CPU-level) or the placement of the rams and CPU hard cores (at multicore level) --floorplan em4_sensor - -# JTAGFrequency --- Select the frequency of the JTAG clock Tck (in MHz). -# -# The JTAG clock speed has to be less than 1/2 of the cpu clock otherwise the signals on the BVCI interface are not guaranteed to be valid. -# -# NOTE: The RTL simulations will work when the JTAG clock frequency is set to half the CPU clock, however this may not be the case when simulating at gate level due to delays on the IO pads. -# -# The default is set to 10 MHz so that there is no conflict when simulating with an ARCangel3 at 30MHz. (30 > 10*2) -# -# The speed of simulation can be greatly increased by using a faster JTAG clock, but a dependency will warn if it exceeds 1/2 of the cpu clock. -# --jtag_tclk 4 - -# execution_trace_level --- -# This traces committed instructions as they execute, and gathers statistics -# visible in the debugger for counting instructions & cycle delays. -# At the "stats" level ony the statistics are gathered and no trace is printed. -# "file" is equivalent to "full", but the results go to a trace .txt file instead. -# --execution_trace_level stats - -# tb_trace --- -# Enable instruction execution trace. -# This is available to arc_dev licensees (internal developers) only. -# --tb_trace false - -# zero_based_arcnum --- -# In a multicore build, number ARCs from 0. -# If this is not selected, arcs are numbered from 1. -# (This provides the initial value to the arcnum signal.) -# --zero_based_arcnum true - -# generate_ipxact --- -# Generate ipxact.xml file describing the CPUisle or archipelago frontier -# --generate_ipxact false - -# ipxact_relative_path_names --- -# Use relative path names for Verilog files in the ipxact. -# Otherwise, absolute path names are used. -# --ipxact_relative_path_names true - -# optional_encryption --- -# When selected, encrypted RTL output is generated. -# --optional_encryption false - -# ignore_encrypt_license --- -# When selected, pretend the encryption license is missing. For testing. -# --ignore_encrypt_license false - -# ignore_clear_license --- -# When selected, pretend the cleartest license is missing. For testing. -# --ignore_clear_license false - -# OPTION_require_archipelago --- -# When selected, force use of archipelago. This is for testing purposes. -# --require_archipelago false - - -######## Infrastructure Software --- com.arc.software.dfss.sw_infra.1_0 ######## - -# Create Infrastructure Software --create com.arc.software.dfss.sw_infra.1_0 "System.Infrastructure Software" - -# sw_infra --- Command line option for Software element 'Infrastructure Software' --sw_infra true - -# templateName --- Template name --template_name siss_combo_sensor_dsp - - -######## subsys_infra --- com.arc.hardware.dfss.subsys_infra.1_0 ######## - -# Create subsys_infra --create com.arc.hardware.dfss.subsys_infra.1_0 System.subsys_infra - -# subsys_infra --- Command line option for EIA glue logic. --subsys_infra true - -# internal_interrupt --- Connect the IO interrupts internally --internal_interrupt true - -# internal_dma_handshake --- Connect the DMA handshake signals internally --internal_dma_handshake true - -# spi_tb_sw_test_mode --- -# This is a secret option, not seen by customers. -# If you check this, the SPI peripheral's testbenches will be set to SW test mode: -# The serial interface of the first SPI master io_spi_mstN peripheral is connected to all SPI slave peripherals io_spi_slvN. -# This is used for testing the SW drivers. -# --spi_tb_sw_test_mode false - -# i3c_tb_sw_test_mode --- -# This is a secret option, not seen by customers. -# If you check this, the I3C peripheral's testbenches will be set to SW test mode: -# The serial interface of the io_i3cN peripheral is connected to the I2C slave peripherals io_i2c_slv0. -# This is used for testing the SW drivers. -# --i3c_tb_sw_test_mode false - -# subsys_apex_offset --- Subsystem APEX address offset in the AUX address space. The aperture used by the subsystem is fixed to 0x0010_0000. In general, the APEX address offset must be in the range from 0x0010_0000 to 0xFFF0_0000. However, if your design includes the "UAUX Interface" component, then the APEX address offset must be in the range from 0x0010_0000 to 0x7FF0_0000 to avoid address conflicts with any UAUX components. --subsys_apex_offset 0x8000_0000 - -# subsys_uaux_offset --- Subsystem UAUX address offset in the UAUX address space. The UAUX address offset must be an integer multiple of 0x0010_0000 in the range from 0x0000_0000 to 0x7FF0_0000. The aperture reserved for the subsystem is fixed to 0x0010_0000. --subsys_uaux_offset 0x10_0000 - - -######## ARC_RTT --- com.arc.hardware.ARC_RTT.1_0 ######## - -# Create ARC_RTT --create com.arc.hardware.ARC_RTT.1_0 System.ARC_RTT - -# has_nexus_if --- Please select Nexus interface to offload the data from RTT --has_nexus_if true - -# has_on_chip_mem --- Please select the on-chip memory option to store the trace data in shared memory --has_on_chip_mem true - -# nexus_data_wdt --- Please select the Nexus Data Width to offload the data from RTT --nexus_data_wdt 16 - -# internal_memory_size --- Please select internal memory size to capture the trace data --internal_memory_size 16k - -# ram_type --- Please select Types of internal memories to be inferred for the logic --ram_type 1_PORT - -# power_domains --- Adds isolation signal inputs/power switch controls for use in UPF flow when configuring power domains. --rtt_power_domains false - - -######## Tool Configuration --- cgen.1_0 ######## - -# Create Tool Configuration --create cgen.1_0 "System.Tool Configuration" - -# mwdt_version --- Selects the MetaWare version to be used with the TCF file. -# Change from the default to an older or newer toolset version if you want the TCF file to be used with an older or newer version of the MetaWare tools. --mwdt_version O-2018.09 - -# code_base_addr --- -# The base address to assign to the executable code segment in the linker command file when there is no ICCM in the build. This value is ignored when there is an ICCM. -# --code_base_addr 0x0 - -# data_base_addr --- -# The base address to assign to the data segment in the linker command file when the data is not being mapped to a DCCM. This value is ignored when the data segment is mapped to a DCCM, as in that case the base address of the DCCM memory is used. -# -# A value of 0xffffffff means that the data segment will not be mapped to any specific address. -# --data_base_addr 0xffff_ffff - -# underscores_in_numbers --- Use underscores in hex numbers to improve readability. --underscores_in_numbers false - -# tcf_rebrand --- Alternate branding of TCF (not used) --rebrand false - - -]]>
-
- - - - - - - - - - - - - - ICCM0 - - GROUP BLOCK(4): { - /* _SDA_BASE_ computed implicitly */ - .sdata?: {} - .sbss?: {} - * (DATA): {} - * (BSS): {} - .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:65536): {} - .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:0): {} - } > SYSTEM2 - GROUP BLOCK(4): { - .Xdata? : {} - } > XCCM - GROUP BLOCK(4): { - .Ydata? : {} - } > YCCM - GROUP BLOCK(4) : { - .vectors (TEXT) SIZE(DEFINED _IVTSIZE?_IVTSIZE:756): {} = FILL(0xa5a5a5a5,4) - } > IVT - } - -]]> - - - - - - 0x07, sub_opcode => 0x1E , latency_cycles => 8) - -// User extension instruction - dsp_sin -extern long dsp_sin(long); -#pragma intrinsic(dsp_sin, opcode => 0x07, sub_opcode => 0x1F , latency_cycles => 8) - -// User extension instruction - dsp_tan -extern long dsp_tan(long); -#pragma intrinsic(dsp_tan, opcode => 0x07, sub_opcode => 0x22 , latency_cycles => 11) - -// User extension instruction - dsp_acos -extern long dsp_acos(long); -#pragma intrinsic(dsp_acos, opcode => 0x07, sub_opcode => 0x23 , latency_cycles => 31) - -// User extension instruction - dsp_asin -extern long dsp_asin(long); -#pragma intrinsic(dsp_asin, opcode => 0x07, sub_opcode => 0x24 , latency_cycles => 31) - -// User extension instruction - dsp_atan -extern long dsp_atan(long); -#pragma intrinsic(dsp_atan, opcode => 0x07, sub_opcode => 0x25 , latency_cycles => 13) - -// User extension instruction - dsp_sqrt -extern long dsp_sqrt(long); -#pragma intrinsic(dsp_sqrt, opcode => 0x07, sub_opcode => 0x20 , latency_cycles => 31) - -// User extension instruction - dsp_sqrt15 -extern long dsp_sqrt15(long); -#pragma intrinsic(dsp_sqrt15, opcode => 0x07, sub_opcode => 0x21 , latency_cycles => 15) - -#define APEX_COM_ARC_HARDWARE_DFSS_DSP_TRIG_PRESENT 1 -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO0_PRESENT 1 - -// User extension aux register io_gpio0_debounce -#define AR_IO_GPIO0_DEBOUNCE 0x80017048 -#pragma Aux_register(0x80017048, name=>"io_gpio0_debounce") - -// User extension aux register io_gpio0_clken -#define AR_IO_GPIO0_CLKEN 0x80017080 -#pragma Aux_register(0x80017080, name=>"io_gpio0_clken") - -// User extension aux register io_gpio0_swporta_dr -#define AR_IO_GPIO0_SWPORTA_DR 0x80017000 -#pragma Aux_register(0x80017000, name=>"io_gpio0_swporta_dr") - -// User extension aux register io_gpio0_swporta_ddr -#define AR_IO_GPIO0_SWPORTA_DDR 0x80017004 -#pragma Aux_register(0x80017004, name=>"io_gpio0_swporta_ddr") - -// User extension aux register io_gpio0_inten -#define AR_IO_GPIO0_INTEN 0x80017030 -#pragma Aux_register(0x80017030, name=>"io_gpio0_inten") - -// User extension aux register io_gpio0_intmask -#define AR_IO_GPIO0_INTMASK 0x80017034 -#pragma Aux_register(0x80017034, name=>"io_gpio0_intmask") - -// User extension aux register io_gpio0_inttype_level -#define AR_IO_GPIO0_INTTYPE_LEVEL 0x80017038 -#pragma Aux_register(0x80017038, name=>"io_gpio0_inttype_level") - -// User extension aux register io_gpio0_int_polarity -#define AR_IO_GPIO0_INT_POLARITY 0x8001703c -#pragma Aux_register(0x8001703c, name=>"io_gpio0_int_polarity") - -// User extension aux register io_gpio0_intstatus -#define AR_IO_GPIO0_INTSTATUS 0x80017040 -#pragma Aux_register(0x80017040, name=>"io_gpio0_intstatus") - -// User extension aux register io_gpio0_raw_intstatus -#define AR_IO_GPIO0_RAW_INTSTATUS 0x80017044 -#pragma Aux_register(0x80017044, name=>"io_gpio0_raw_intstatus") - -// User extension aux register io_gpio0_porta_eoi -#define AR_IO_GPIO0_PORTA_EOI 0x8001704c -#pragma Aux_register(0x8001704c, name=>"io_gpio0_porta_eoi") - -// User extension aux register io_gpio0_ext_porta -#define AR_IO_GPIO0_EXT_PORTA 0x80017050 -#pragma Aux_register(0x80017050, name=>"io_gpio0_ext_porta") - -// User extension aux register io_gpio0_ls_sync -#define AR_IO_GPIO0_LS_SYNC 0x80017060 -#pragma Aux_register(0x80017060, name=>"io_gpio0_ls_sync") - -// User extension aux register io_gpio0_int_bothedge -#define AR_IO_GPIO0_INT_BOTHEDGE 0x80017068 -#pragma Aux_register(0x80017068, name=>"io_gpio0_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST0_PRESENT 1 - -// User extension aux register io_i2c_mst0_clken -#define AR_IO_I2C_MST0_CLKEN 0x800120c0 -#pragma Aux_register(0x800120c0, name=>"io_i2c_mst0_clken") - -// User extension aux register io_i2c_mst0_con -#define AR_IO_I2C_MST0_CON 0x80012000 -#pragma Aux_register(0x80012000, name=>"io_i2c_mst0_con") - -// User extension aux register io_i2c_mst0_tar -#define AR_IO_I2C_MST0_TAR 0x80012004 -#pragma Aux_register(0x80012004, name=>"io_i2c_mst0_tar") - -// User extension aux register io_i2c_mst0_data_cmd -#define AR_IO_I2C_MST0_DATA_CMD 0x80012010 -#pragma Aux_register(0x80012010, name=>"io_i2c_mst0_data_cmd") - -// User extension aux register io_i2c_mst0_ss_scl_hcnt -#define AR_IO_I2C_MST0_SS_SCL_HCNT 0x80012014 -#pragma Aux_register(0x80012014, name=>"io_i2c_mst0_ss_scl_hcnt") - -// User extension aux register io_i2c_mst0_ss_scl_lcnt -#define AR_IO_I2C_MST0_SS_SCL_LCNT 0x80012018 -#pragma Aux_register(0x80012018, name=>"io_i2c_mst0_ss_scl_lcnt") - -// User extension aux register io_i2c_mst0_fs_scl_hcnt -#define AR_IO_I2C_MST0_FS_SCL_HCNT 0x8001201c -#pragma Aux_register(0x8001201c, name=>"io_i2c_mst0_fs_scl_hcnt") - -// User extension aux register io_i2c_mst0_fs_scl_lcnt -#define AR_IO_I2C_MST0_FS_SCL_LCNT 0x80012020 -#pragma Aux_register(0x80012020, name=>"io_i2c_mst0_fs_scl_lcnt") - -// User extension aux register io_i2c_mst0_intr_stat -#define AR_IO_I2C_MST0_INTR_STAT 0x8001202c -#pragma Aux_register(0x8001202c, name=>"io_i2c_mst0_intr_stat") - -// User extension aux register io_i2c_mst0_intr_mask -#define AR_IO_I2C_MST0_INTR_MASK 0x80012030 -#pragma Aux_register(0x80012030, name=>"io_i2c_mst0_intr_mask") - -// User extension aux register io_i2c_mst0_raw_intr_stat -#define AR_IO_I2C_MST0_RAW_INTR_STAT 0x80012034 -#pragma Aux_register(0x80012034, name=>"io_i2c_mst0_raw_intr_stat") - -// User extension aux register io_i2c_mst0_rx_tl -#define AR_IO_I2C_MST0_RX_TL 0x80012038 -#pragma Aux_register(0x80012038, name=>"io_i2c_mst0_rx_tl") - -// User extension aux register io_i2c_mst0_tx_tl -#define AR_IO_I2C_MST0_TX_TL 0x8001203c -#pragma Aux_register(0x8001203c, name=>"io_i2c_mst0_tx_tl") - -// User extension aux register io_i2c_mst0_clr_intr -#define AR_IO_I2C_MST0_CLR_INTR 0x80012040 -#pragma Aux_register(0x80012040, name=>"io_i2c_mst0_clr_intr") - -// User extension aux register io_i2c_mst0_clr_rx_under -#define AR_IO_I2C_MST0_CLR_RX_UNDER 0x80012044 -#pragma Aux_register(0x80012044, name=>"io_i2c_mst0_clr_rx_under") - -// User extension aux register io_i2c_mst0_clr_rx_over -#define AR_IO_I2C_MST0_CLR_RX_OVER 0x80012048 -#pragma Aux_register(0x80012048, name=>"io_i2c_mst0_clr_rx_over") - -// User extension aux register io_i2c_mst0_clr_tx_over -#define AR_IO_I2C_MST0_CLR_TX_OVER 0x8001204c -#pragma Aux_register(0x8001204c, name=>"io_i2c_mst0_clr_tx_over") - -// User extension aux register io_i2c_mst0_clr_tx_abrt -#define AR_IO_I2C_MST0_CLR_TX_ABRT 0x80012054 -#pragma Aux_register(0x80012054, name=>"io_i2c_mst0_clr_tx_abrt") - -// User extension aux register io_i2c_mst0_clr_activity -#define AR_IO_I2C_MST0_CLR_ACTIVITY 0x8001205c -#pragma Aux_register(0x8001205c, name=>"io_i2c_mst0_clr_activity") - -// User extension aux register io_i2c_mst0_clr_stop_det -#define AR_IO_I2C_MST0_CLR_STOP_DET 0x80012060 -#pragma Aux_register(0x80012060, name=>"io_i2c_mst0_clr_stop_det") - -// User extension aux register io_i2c_mst0_clr_start_det -#define AR_IO_I2C_MST0_CLR_START_DET 0x80012064 -#pragma Aux_register(0x80012064, name=>"io_i2c_mst0_clr_start_det") - -// User extension aux register io_i2c_mst0_enable -#define AR_IO_I2C_MST0_ENABLE 0x8001206c -#pragma Aux_register(0x8001206c, name=>"io_i2c_mst0_enable") - -// User extension aux register io_i2c_mst0_status -#define AR_IO_I2C_MST0_STATUS 0x80012070 -#pragma Aux_register(0x80012070, name=>"io_i2c_mst0_status") - -// User extension aux register io_i2c_mst0_txflr -#define AR_IO_I2C_MST0_TXFLR 0x80012074 -#pragma Aux_register(0x80012074, name=>"io_i2c_mst0_txflr") - -// User extension aux register io_i2c_mst0_rxflr -#define AR_IO_I2C_MST0_RXFLR 0x80012078 -#pragma Aux_register(0x80012078, name=>"io_i2c_mst0_rxflr") - -// User extension aux register io_i2c_mst0_sda_hold -#define AR_IO_I2C_MST0_SDA_HOLD 0x8001207c -#pragma Aux_register(0x8001207c, name=>"io_i2c_mst0_sda_hold") - -// User extension aux register io_i2c_mst0_tx_abrt_source -#define AR_IO_I2C_MST0_TX_ABRT_SOURCE 0x80012080 -#pragma Aux_register(0x80012080, name=>"io_i2c_mst0_tx_abrt_source") - -// User extension aux register io_i2c_mst0_enable_status -#define AR_IO_I2C_MST0_ENABLE_STATUS 0x8001209c -#pragma Aux_register(0x8001209c, name=>"io_i2c_mst0_enable_status") - -// User extension aux register io_i2c_mst0_fs_spklen -#define AR_IO_I2C_MST0_FS_SPKLEN 0x800120a0 -#pragma Aux_register(0x800120a0, name=>"io_i2c_mst0_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_SLV0_PRESENT 1 - -// User extension aux register io_i2c_slv0_clken -#define AR_IO_I2C_SLV0_CLKEN 0x800130c0 -#pragma Aux_register(0x800130c0, name=>"io_i2c_slv0_clken") - -// User extension aux register io_i2c_slv0_con -#define AR_IO_I2C_SLV0_CON 0x80013000 -#pragma Aux_register(0x80013000, name=>"io_i2c_slv0_con") - -// User extension aux register io_i2c_slv0_sar -#define AR_IO_I2C_SLV0_SAR 0x80013008 -#pragma Aux_register(0x80013008, name=>"io_i2c_slv0_sar") - -// User extension aux register io_i2c_slv0_data_cmd -#define AR_IO_I2C_SLV0_DATA_CMD 0x80013010 -#pragma Aux_register(0x80013010, name=>"io_i2c_slv0_data_cmd") - -// User extension aux register io_i2c_slv0_intr_stat -#define AR_IO_I2C_SLV0_INTR_STAT 0x8001302c -#pragma Aux_register(0x8001302c, name=>"io_i2c_slv0_intr_stat") - -// User extension aux register io_i2c_slv0_intr_mask -#define AR_IO_I2C_SLV0_INTR_MASK 0x80013030 -#pragma Aux_register(0x80013030, name=>"io_i2c_slv0_intr_mask") - -// User extension aux register io_i2c_slv0_raw_intr_stat -#define AR_IO_I2C_SLV0_RAW_INTR_STAT 0x80013034 -#pragma Aux_register(0x80013034, name=>"io_i2c_slv0_raw_intr_stat") - -// User extension aux register io_i2c_slv0_rx_tl -#define AR_IO_I2C_SLV0_RX_TL 0x80013038 -#pragma Aux_register(0x80013038, name=>"io_i2c_slv0_rx_tl") - -// User extension aux register io_i2c_slv0_tx_tl -#define AR_IO_I2C_SLV0_TX_TL 0x8001303c -#pragma Aux_register(0x8001303c, name=>"io_i2c_slv0_tx_tl") - -// User extension aux register io_i2c_slv0_clr_intr -#define AR_IO_I2C_SLV0_CLR_INTR 0x80013040 -#pragma Aux_register(0x80013040, name=>"io_i2c_slv0_clr_intr") - -// User extension aux register io_i2c_slv0_clr_rx_under -#define AR_IO_I2C_SLV0_CLR_RX_UNDER 0x80013044 -#pragma Aux_register(0x80013044, name=>"io_i2c_slv0_clr_rx_under") - -// User extension aux register io_i2c_slv0_clr_rx_over -#define AR_IO_I2C_SLV0_CLR_RX_OVER 0x80013048 -#pragma Aux_register(0x80013048, name=>"io_i2c_slv0_clr_rx_over") - -// User extension aux register io_i2c_slv0_clr_tx_over -#define AR_IO_I2C_SLV0_CLR_TX_OVER 0x8001304c -#pragma Aux_register(0x8001304c, name=>"io_i2c_slv0_clr_tx_over") - -// User extension aux register io_i2c_slv0_clr_rd_req -#define AR_IO_I2C_SLV0_CLR_RD_REQ 0x80013050 -#pragma Aux_register(0x80013050, name=>"io_i2c_slv0_clr_rd_req") - -// User extension aux register io_i2c_slv0_clr_tx_abrt -#define AR_IO_I2C_SLV0_CLR_TX_ABRT 0x80013054 -#pragma Aux_register(0x80013054, name=>"io_i2c_slv0_clr_tx_abrt") - -// User extension aux register io_i2c_slv0_clr_rx_done -#define AR_IO_I2C_SLV0_CLR_RX_DONE 0x80013058 -#pragma Aux_register(0x80013058, name=>"io_i2c_slv0_clr_rx_done") - -// User extension aux register io_i2c_slv0_clr_activity -#define AR_IO_I2C_SLV0_CLR_ACTIVITY 0x8001305c -#pragma Aux_register(0x8001305c, name=>"io_i2c_slv0_clr_activity") - -// User extension aux register io_i2c_slv0_clr_stop_det -#define AR_IO_I2C_SLV0_CLR_STOP_DET 0x80013060 -#pragma Aux_register(0x80013060, name=>"io_i2c_slv0_clr_stop_det") - -// User extension aux register io_i2c_slv0_clr_start_det -#define AR_IO_I2C_SLV0_CLR_START_DET 0x80013064 -#pragma Aux_register(0x80013064, name=>"io_i2c_slv0_clr_start_det") - -// User extension aux register io_i2c_slv0_enable -#define AR_IO_I2C_SLV0_ENABLE 0x8001306c -#pragma Aux_register(0x8001306c, name=>"io_i2c_slv0_enable") - -// User extension aux register io_i2c_slv0_status -#define AR_IO_I2C_SLV0_STATUS 0x80013070 -#pragma Aux_register(0x80013070, name=>"io_i2c_slv0_status") - -// User extension aux register io_i2c_slv0_txflr -#define AR_IO_I2C_SLV0_TXFLR 0x80013074 -#pragma Aux_register(0x80013074, name=>"io_i2c_slv0_txflr") - -// User extension aux register io_i2c_slv0_rxflr -#define AR_IO_I2C_SLV0_RXFLR 0x80013078 -#pragma Aux_register(0x80013078, name=>"io_i2c_slv0_rxflr") - -// User extension aux register io_i2c_slv0_sda_hold -#define AR_IO_I2C_SLV0_SDA_HOLD 0x8001307c -#pragma Aux_register(0x8001307c, name=>"io_i2c_slv0_sda_hold") - -// User extension aux register io_i2c_slv0_tx_abrt_source -#define AR_IO_I2C_SLV0_TX_ABRT_SOURCE 0x80013080 -#pragma Aux_register(0x80013080, name=>"io_i2c_slv0_tx_abrt_source") - -// User extension aux register io_i2c_slv0_sda_setup -#define AR_IO_I2C_SLV0_SDA_SETUP 0x80013094 -#pragma Aux_register(0x80013094, name=>"io_i2c_slv0_sda_setup") - -// User extension aux register io_i2c_slv0_enable_status -#define AR_IO_I2C_SLV0_ENABLE_STATUS 0x8001309c -#pragma Aux_register(0x8001309c, name=>"io_i2c_slv0_enable_status") - -// User extension aux register io_i2c_slv0_fs_spklen -#define AR_IO_I2C_SLV0_FS_SPKLEN 0x800130a0 -#pragma Aux_register(0x800130a0, name=>"io_i2c_slv0_fs_spklen") - -// User extension aux register io_i2c_slv0_clr_restart_det -#define AR_IO_I2C_SLV0_CLR_RESTART_DET 0x800130a8 -#pragma Aux_register(0x800130a8, name=>"io_i2c_slv0_clr_restart_det") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST0_PRESENT 1 - -// User extension aux register io_spi_mst0_ctrlr0 -#define AR_IO_SPI_MST0_CTRLR0 0x80010000 -#pragma Aux_register(0x80010000, name=>"io_spi_mst0_ctrlr0") - -// User extension aux register io_spi_mst0_ctrlr1 -#define AR_IO_SPI_MST0_CTRLR1 0x80010001 -#pragma Aux_register(0x80010001, name=>"io_spi_mst0_ctrlr1") - -// User extension aux register io_spi_mst0_spien -#define AR_IO_SPI_MST0_SPIEN 0x80010002 -#pragma Aux_register(0x80010002, name=>"io_spi_mst0_spien") - -// User extension aux register io_spi_mst0_ser -#define AR_IO_SPI_MST0_SER 0x80010004 -#pragma Aux_register(0x80010004, name=>"io_spi_mst0_ser") - -// User extension aux register io_spi_mst0_baudr -#define AR_IO_SPI_MST0_BAUDR 0x80010005 -#pragma Aux_register(0x80010005, name=>"io_spi_mst0_baudr") - -// User extension aux register io_spi_mst0_txftlr -#define AR_IO_SPI_MST0_TXFTLR 0x80010006 -#pragma Aux_register(0x80010006, name=>"io_spi_mst0_txftlr") - -// User extension aux register io_spi_mst0_rxftlr -#define AR_IO_SPI_MST0_RXFTLR 0x80010007 -#pragma Aux_register(0x80010007, name=>"io_spi_mst0_rxftlr") - -// User extension aux register io_spi_mst0_txflr -#define AR_IO_SPI_MST0_TXFLR 0x80010008 -#pragma Aux_register(0x80010008, name=>"io_spi_mst0_txflr") - -// User extension aux register io_spi_mst0_rxflr -#define AR_IO_SPI_MST0_RXFLR 0x80010009 -#pragma Aux_register(0x80010009, name=>"io_spi_mst0_rxflr") - -// User extension aux register io_spi_mst0_sr -#define AR_IO_SPI_MST0_SR 0x8001000a -#pragma Aux_register(0x8001000a, name=>"io_spi_mst0_sr") - -// User extension aux register io_spi_mst0_imr -#define AR_IO_SPI_MST0_IMR 0x8001000b -#pragma Aux_register(0x8001000b, name=>"io_spi_mst0_imr") - -// User extension aux register io_spi_mst0_isr -#define AR_IO_SPI_MST0_ISR 0x8001000c -#pragma Aux_register(0x8001000c, name=>"io_spi_mst0_isr") - -// User extension aux register io_spi_mst0_risr -#define AR_IO_SPI_MST0_RISR 0x8001000d -#pragma Aux_register(0x8001000d, name=>"io_spi_mst0_risr") - -// User extension aux register io_spi_mst0_txoicr -#define AR_IO_SPI_MST0_TXOICR 0x8001000e -#pragma Aux_register(0x8001000e, name=>"io_spi_mst0_txoicr") - -// User extension aux register io_spi_mst0_rxoicr -#define AR_IO_SPI_MST0_RXOICR 0x8001000f -#pragma Aux_register(0x8001000f, name=>"io_spi_mst0_rxoicr") - -// User extension aux register io_spi_mst0_rxuicr -#define AR_IO_SPI_MST0_RXUICR 0x80010010 -#pragma Aux_register(0x80010010, name=>"io_spi_mst0_rxuicr") - -// User extension aux register io_spi_mst0_icr -#define AR_IO_SPI_MST0_ICR 0x80010012 -#pragma Aux_register(0x80010012, name=>"io_spi_mst0_icr") - -// User extension aux register io_spi_mst0_clken -#define AR_IO_SPI_MST0_CLKEN 0x80010016 -#pragma Aux_register(0x80010016, name=>"io_spi_mst0_clken") - -// User extension aux register io_spi_mst0_dr -#define AR_IO_SPI_MST0_DR 0x80010018 -#pragma Aux_register(0x80010018, name=>"io_spi_mst0_dr") - -// User extension aux register io_spi_mst0_rx_sample_dly -#define AR_IO_SPI_MST0_RX_SAMPLE_DLY 0x8001003c -#pragma Aux_register(0x8001003c, name=>"io_spi_mst0_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_SUBSYS_BCR_PRESENT 1 - -// User extension aux register SUBSYS_BUILD -#define AR_SUBSYS_BUILD 0xf0 -#pragma Aux_register(0xf0, name=>"SUBSYS_BUILD") - -// User extension aux register SUBSYS_DSP_0_BUILD -#define AR_SUBSYS_DSP_0_BUILD 0xa00 -#pragma Aux_register(0xa00, name=>"SUBSYS_DSP_0_BUILD") - -// User extension aux register SUBSYS_DSP_0_CONFIG -#define AR_SUBSYS_DSP_0_CONFIG 0xa02 -#pragma Aux_register(0xa02, name=>"SUBSYS_DSP_0_CONFIG") - -// User extension aux register SUBSYS_IO_0_BUILD -#define AR_SUBSYS_IO_0_BUILD 0xa04 -#pragma Aux_register(0xa04, name=>"SUBSYS_IO_0_BUILD") - -// User extension aux register SUBSYS_IO_1_BUILD -#define AR_SUBSYS_IO_1_BUILD 0xa05 -#pragma Aux_register(0xa05, name=>"SUBSYS_IO_1_BUILD") - -// User extension aux register SUBSYS_IO_2_BUILD -#define AR_SUBSYS_IO_2_BUILD 0xa06 -#pragma Aux_register(0xa06, name=>"SUBSYS_IO_2_BUILD") - -// User extension aux register SUBSYS_UAUX_OFFSET -#define AR_SUBSYS_UAUX_OFFSET 0xa1e -#pragma Aux_register(0xa1e, name=>"SUBSYS_UAUX_OFFSET") - -// User extension aux register SUBSYS_APEX_OFFSET -#define AR_SUBSYS_APEX_OFFSET 0xa1f -#pragma Aux_register(0xa1f, name=>"SUBSYS_APEX_OFFSET") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST1_PRESENT 1 - -// User extension aux register io_spi_mst1_ctrlr0 -#define AR_IO_SPI_MST1_CTRLR0 0x80010100 -#pragma Aux_register(0x80010100, name=>"io_spi_mst1_ctrlr0") - -// User extension aux register io_spi_mst1_ctrlr1 -#define AR_IO_SPI_MST1_CTRLR1 0x80010101 -#pragma Aux_register(0x80010101, name=>"io_spi_mst1_ctrlr1") - -// User extension aux register io_spi_mst1_spien -#define AR_IO_SPI_MST1_SPIEN 0x80010102 -#pragma Aux_register(0x80010102, name=>"io_spi_mst1_spien") - -// User extension aux register io_spi_mst1_ser -#define AR_IO_SPI_MST1_SER 0x80010104 -#pragma Aux_register(0x80010104, name=>"io_spi_mst1_ser") - -// User extension aux register io_spi_mst1_baudr -#define AR_IO_SPI_MST1_BAUDR 0x80010105 -#pragma Aux_register(0x80010105, name=>"io_spi_mst1_baudr") - -// User extension aux register io_spi_mst1_txftlr -#define AR_IO_SPI_MST1_TXFTLR 0x80010106 -#pragma Aux_register(0x80010106, name=>"io_spi_mst1_txftlr") - -// User extension aux register io_spi_mst1_rxftlr -#define AR_IO_SPI_MST1_RXFTLR 0x80010107 -#pragma Aux_register(0x80010107, name=>"io_spi_mst1_rxftlr") - -// User extension aux register io_spi_mst1_txflr -#define AR_IO_SPI_MST1_TXFLR 0x80010108 -#pragma Aux_register(0x80010108, name=>"io_spi_mst1_txflr") - -// User extension aux register io_spi_mst1_rxflr -#define AR_IO_SPI_MST1_RXFLR 0x80010109 -#pragma Aux_register(0x80010109, name=>"io_spi_mst1_rxflr") - -// User extension aux register io_spi_mst1_sr -#define AR_IO_SPI_MST1_SR 0x8001010a -#pragma Aux_register(0x8001010a, name=>"io_spi_mst1_sr") - -// User extension aux register io_spi_mst1_imr -#define AR_IO_SPI_MST1_IMR 0x8001010b -#pragma Aux_register(0x8001010b, name=>"io_spi_mst1_imr") - -// User extension aux register io_spi_mst1_isr -#define AR_IO_SPI_MST1_ISR 0x8001010c -#pragma Aux_register(0x8001010c, name=>"io_spi_mst1_isr") - -// User extension aux register io_spi_mst1_risr -#define AR_IO_SPI_MST1_RISR 0x8001010d -#pragma Aux_register(0x8001010d, name=>"io_spi_mst1_risr") - -// User extension aux register io_spi_mst1_txoicr -#define AR_IO_SPI_MST1_TXOICR 0x8001010e -#pragma Aux_register(0x8001010e, name=>"io_spi_mst1_txoicr") - -// User extension aux register io_spi_mst1_rxoicr -#define AR_IO_SPI_MST1_RXOICR 0x8001010f -#pragma Aux_register(0x8001010f, name=>"io_spi_mst1_rxoicr") - -// User extension aux register io_spi_mst1_rxuicr -#define AR_IO_SPI_MST1_RXUICR 0x80010110 -#pragma Aux_register(0x80010110, name=>"io_spi_mst1_rxuicr") - -// User extension aux register io_spi_mst1_icr -#define AR_IO_SPI_MST1_ICR 0x80010112 -#pragma Aux_register(0x80010112, name=>"io_spi_mst1_icr") - -// User extension aux register io_spi_mst1_clken -#define AR_IO_SPI_MST1_CLKEN 0x80010116 -#pragma Aux_register(0x80010116, name=>"io_spi_mst1_clken") - -// User extension aux register io_spi_mst1_dr -#define AR_IO_SPI_MST1_DR 0x80010118 -#pragma Aux_register(0x80010118, name=>"io_spi_mst1_dr") - -// User extension aux register io_spi_mst1_rx_sample_dly -#define AR_IO_SPI_MST1_RX_SAMPLE_DLY 0x8001013c -#pragma Aux_register(0x8001013c, name=>"io_spi_mst1_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_MST2_PRESENT 1 - -// User extension aux register io_spi_mst2_ctrlr0 -#define AR_IO_SPI_MST2_CTRLR0 0x80010200 -#pragma Aux_register(0x80010200, name=>"io_spi_mst2_ctrlr0") - -// User extension aux register io_spi_mst2_ctrlr1 -#define AR_IO_SPI_MST2_CTRLR1 0x80010201 -#pragma Aux_register(0x80010201, name=>"io_spi_mst2_ctrlr1") - -// User extension aux register io_spi_mst2_spien -#define AR_IO_SPI_MST2_SPIEN 0x80010202 -#pragma Aux_register(0x80010202, name=>"io_spi_mst2_spien") - -// User extension aux register io_spi_mst2_ser -#define AR_IO_SPI_MST2_SER 0x80010204 -#pragma Aux_register(0x80010204, name=>"io_spi_mst2_ser") - -// User extension aux register io_spi_mst2_baudr -#define AR_IO_SPI_MST2_BAUDR 0x80010205 -#pragma Aux_register(0x80010205, name=>"io_spi_mst2_baudr") - -// User extension aux register io_spi_mst2_txftlr -#define AR_IO_SPI_MST2_TXFTLR 0x80010206 -#pragma Aux_register(0x80010206, name=>"io_spi_mst2_txftlr") - -// User extension aux register io_spi_mst2_rxftlr -#define AR_IO_SPI_MST2_RXFTLR 0x80010207 -#pragma Aux_register(0x80010207, name=>"io_spi_mst2_rxftlr") - -// User extension aux register io_spi_mst2_txflr -#define AR_IO_SPI_MST2_TXFLR 0x80010208 -#pragma Aux_register(0x80010208, name=>"io_spi_mst2_txflr") - -// User extension aux register io_spi_mst2_rxflr -#define AR_IO_SPI_MST2_RXFLR 0x80010209 -#pragma Aux_register(0x80010209, name=>"io_spi_mst2_rxflr") - -// User extension aux register io_spi_mst2_sr -#define AR_IO_SPI_MST2_SR 0x8001020a -#pragma Aux_register(0x8001020a, name=>"io_spi_mst2_sr") - -// User extension aux register io_spi_mst2_imr -#define AR_IO_SPI_MST2_IMR 0x8001020b -#pragma Aux_register(0x8001020b, name=>"io_spi_mst2_imr") - -// User extension aux register io_spi_mst2_isr -#define AR_IO_SPI_MST2_ISR 0x8001020c -#pragma Aux_register(0x8001020c, name=>"io_spi_mst2_isr") - -// User extension aux register io_spi_mst2_risr -#define AR_IO_SPI_MST2_RISR 0x8001020d -#pragma Aux_register(0x8001020d, name=>"io_spi_mst2_risr") - -// User extension aux register io_spi_mst2_txoicr -#define AR_IO_SPI_MST2_TXOICR 0x8001020e -#pragma Aux_register(0x8001020e, name=>"io_spi_mst2_txoicr") - -// User extension aux register io_spi_mst2_rxoicr -#define AR_IO_SPI_MST2_RXOICR 0x8001020f -#pragma Aux_register(0x8001020f, name=>"io_spi_mst2_rxoicr") - -// User extension aux register io_spi_mst2_rxuicr -#define AR_IO_SPI_MST2_RXUICR 0x80010210 -#pragma Aux_register(0x80010210, name=>"io_spi_mst2_rxuicr") - -// User extension aux register io_spi_mst2_icr -#define AR_IO_SPI_MST2_ICR 0x80010212 -#pragma Aux_register(0x80010212, name=>"io_spi_mst2_icr") - -// User extension aux register io_spi_mst2_clken -#define AR_IO_SPI_MST2_CLKEN 0x80010216 -#pragma Aux_register(0x80010216, name=>"io_spi_mst2_clken") - -// User extension aux register io_spi_mst2_dr -#define AR_IO_SPI_MST2_DR 0x80010218 -#pragma Aux_register(0x80010218, name=>"io_spi_mst2_dr") - -// User extension aux register io_spi_mst2_rx_sample_dly -#define AR_IO_SPI_MST2_RX_SAMPLE_DLY 0x8001023c -#pragma Aux_register(0x8001023c, name=>"io_spi_mst2_rx_sample_dly") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_SPI_SLV0_PRESENT 1 - -// User extension aux register io_spi_slv0_ctrlr0 -#define AR_IO_SPI_SLV0_CTRLR0 0x80011000 -#pragma Aux_register(0x80011000, name=>"io_spi_slv0_ctrlr0") - -// User extension aux register io_spi_slv0_spien -#define AR_IO_SPI_SLV0_SPIEN 0x80011002 -#pragma Aux_register(0x80011002, name=>"io_spi_slv0_spien") - -// User extension aux register io_spi_slv0_txftlr -#define AR_IO_SPI_SLV0_TXFTLR 0x80011006 -#pragma Aux_register(0x80011006, name=>"io_spi_slv0_txftlr") - -// User extension aux register io_spi_slv0_rxftlr -#define AR_IO_SPI_SLV0_RXFTLR 0x80011007 -#pragma Aux_register(0x80011007, name=>"io_spi_slv0_rxftlr") - -// User extension aux register io_spi_slv0_txflr -#define AR_IO_SPI_SLV0_TXFLR 0x80011008 -#pragma Aux_register(0x80011008, name=>"io_spi_slv0_txflr") - -// User extension aux register io_spi_slv0_rxflr -#define AR_IO_SPI_SLV0_RXFLR 0x80011009 -#pragma Aux_register(0x80011009, name=>"io_spi_slv0_rxflr") - -// User extension aux register io_spi_slv0_sr -#define AR_IO_SPI_SLV0_SR 0x8001100a -#pragma Aux_register(0x8001100a, name=>"io_spi_slv0_sr") - -// User extension aux register io_spi_slv0_imr -#define AR_IO_SPI_SLV0_IMR 0x8001100b -#pragma Aux_register(0x8001100b, name=>"io_spi_slv0_imr") - -// User extension aux register io_spi_slv0_isr -#define AR_IO_SPI_SLV0_ISR 0x8001100c -#pragma Aux_register(0x8001100c, name=>"io_spi_slv0_isr") - -// User extension aux register io_spi_slv0_risr -#define AR_IO_SPI_SLV0_RISR 0x8001100d -#pragma Aux_register(0x8001100d, name=>"io_spi_slv0_risr") - -// User extension aux register io_spi_slv0_txoicr -#define AR_IO_SPI_SLV0_TXOICR 0x8001100e -#pragma Aux_register(0x8001100e, name=>"io_spi_slv0_txoicr") - -// User extension aux register io_spi_slv0_rxoicr -#define AR_IO_SPI_SLV0_RXOICR 0x8001100f -#pragma Aux_register(0x8001100f, name=>"io_spi_slv0_rxoicr") - -// User extension aux register io_spi_slv0_rxuicr -#define AR_IO_SPI_SLV0_RXUICR 0x80011010 -#pragma Aux_register(0x80011010, name=>"io_spi_slv0_rxuicr") - -// User extension aux register io_spi_slv0_icr -#define AR_IO_SPI_SLV0_ICR 0x80011012 -#pragma Aux_register(0x80011012, name=>"io_spi_slv0_icr") - -// User extension aux register io_spi_slv0_clken -#define AR_IO_SPI_SLV0_CLKEN 0x80011016 -#pragma Aux_register(0x80011016, name=>"io_spi_slv0_clken") - -// User extension aux register io_spi_slv0_dr -#define AR_IO_SPI_SLV0_DR 0x80011018 -#pragma Aux_register(0x80011018, name=>"io_spi_slv0_dr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO1_PRESENT 1 - -// User extension aux register io_gpio1_debounce -#define AR_IO_GPIO1_DEBOUNCE 0x80017148 -#pragma Aux_register(0x80017148, name=>"io_gpio1_debounce") - -// User extension aux register io_gpio1_clken -#define AR_IO_GPIO1_CLKEN 0x80017180 -#pragma Aux_register(0x80017180, name=>"io_gpio1_clken") - -// User extension aux register io_gpio1_swporta_dr -#define AR_IO_GPIO1_SWPORTA_DR 0x80017100 -#pragma Aux_register(0x80017100, name=>"io_gpio1_swporta_dr") - -// User extension aux register io_gpio1_swporta_ddr -#define AR_IO_GPIO1_SWPORTA_DDR 0x80017104 -#pragma Aux_register(0x80017104, name=>"io_gpio1_swporta_ddr") - -// User extension aux register io_gpio1_inten -#define AR_IO_GPIO1_INTEN 0x80017130 -#pragma Aux_register(0x80017130, name=>"io_gpio1_inten") - -// User extension aux register io_gpio1_intmask -#define AR_IO_GPIO1_INTMASK 0x80017134 -#pragma Aux_register(0x80017134, name=>"io_gpio1_intmask") - -// User extension aux register io_gpio1_inttype_level -#define AR_IO_GPIO1_INTTYPE_LEVEL 0x80017138 -#pragma Aux_register(0x80017138, name=>"io_gpio1_inttype_level") - -// User extension aux register io_gpio1_int_polarity -#define AR_IO_GPIO1_INT_POLARITY 0x8001713c -#pragma Aux_register(0x8001713c, name=>"io_gpio1_int_polarity") - -// User extension aux register io_gpio1_intstatus -#define AR_IO_GPIO1_INTSTATUS 0x80017140 -#pragma Aux_register(0x80017140, name=>"io_gpio1_intstatus") - -// User extension aux register io_gpio1_raw_intstatus -#define AR_IO_GPIO1_RAW_INTSTATUS 0x80017144 -#pragma Aux_register(0x80017144, name=>"io_gpio1_raw_intstatus") - -// User extension aux register io_gpio1_porta_eoi -#define AR_IO_GPIO1_PORTA_EOI 0x8001714c -#pragma Aux_register(0x8001714c, name=>"io_gpio1_porta_eoi") - -// User extension aux register io_gpio1_ext_porta -#define AR_IO_GPIO1_EXT_PORTA 0x80017150 -#pragma Aux_register(0x80017150, name=>"io_gpio1_ext_porta") - -// User extension aux register io_gpio1_ls_sync -#define AR_IO_GPIO1_LS_SYNC 0x80017160 -#pragma Aux_register(0x80017160, name=>"io_gpio1_ls_sync") - -// User extension aux register io_gpio1_int_bothedge -#define AR_IO_GPIO1_INT_BOTHEDGE 0x80017168 -#pragma Aux_register(0x80017168, name=>"io_gpio1_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_GPIO2_PRESENT 1 - -// User extension aux register io_gpio2_debounce -#define AR_IO_GPIO2_DEBOUNCE 0x80017248 -#pragma Aux_register(0x80017248, name=>"io_gpio2_debounce") - -// User extension aux register io_gpio2_clken -#define AR_IO_GPIO2_CLKEN 0x80017280 -#pragma Aux_register(0x80017280, name=>"io_gpio2_clken") - -// User extension aux register io_gpio2_swporta_dr -#define AR_IO_GPIO2_SWPORTA_DR 0x80017200 -#pragma Aux_register(0x80017200, name=>"io_gpio2_swporta_dr") - -// User extension aux register io_gpio2_swporta_ddr -#define AR_IO_GPIO2_SWPORTA_DDR 0x80017204 -#pragma Aux_register(0x80017204, name=>"io_gpio2_swporta_ddr") - -// User extension aux register io_gpio2_inten -#define AR_IO_GPIO2_INTEN 0x80017230 -#pragma Aux_register(0x80017230, name=>"io_gpio2_inten") - -// User extension aux register io_gpio2_intmask -#define AR_IO_GPIO2_INTMASK 0x80017234 -#pragma Aux_register(0x80017234, name=>"io_gpio2_intmask") - -// User extension aux register io_gpio2_inttype_level -#define AR_IO_GPIO2_INTTYPE_LEVEL 0x80017238 -#pragma Aux_register(0x80017238, name=>"io_gpio2_inttype_level") - -// User extension aux register io_gpio2_int_polarity -#define AR_IO_GPIO2_INT_POLARITY 0x8001723c -#pragma Aux_register(0x8001723c, name=>"io_gpio2_int_polarity") - -// User extension aux register io_gpio2_intstatus -#define AR_IO_GPIO2_INTSTATUS 0x80017240 -#pragma Aux_register(0x80017240, name=>"io_gpio2_intstatus") - -// User extension aux register io_gpio2_raw_intstatus -#define AR_IO_GPIO2_RAW_INTSTATUS 0x80017244 -#pragma Aux_register(0x80017244, name=>"io_gpio2_raw_intstatus") - -// User extension aux register io_gpio2_porta_eoi -#define AR_IO_GPIO2_PORTA_EOI 0x8001724c -#pragma Aux_register(0x8001724c, name=>"io_gpio2_porta_eoi") - -// User extension aux register io_gpio2_ext_porta -#define AR_IO_GPIO2_EXT_PORTA 0x80017250 -#pragma Aux_register(0x80017250, name=>"io_gpio2_ext_porta") - -// User extension aux register io_gpio2_ls_sync -#define AR_IO_GPIO2_LS_SYNC 0x80017260 -#pragma Aux_register(0x80017260, name=>"io_gpio2_ls_sync") - -// User extension aux register io_gpio2_int_bothedge -#define AR_IO_GPIO2_INT_BOTHEDGE 0x80017268 -#pragma Aux_register(0x80017268, name=>"io_gpio2_int_bothedge") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST1_PRESENT 1 - -// User extension aux register io_i2c_mst1_clken -#define AR_IO_I2C_MST1_CLKEN 0x800121c0 -#pragma Aux_register(0x800121c0, name=>"io_i2c_mst1_clken") - -// User extension aux register io_i2c_mst1_con -#define AR_IO_I2C_MST1_CON 0x80012100 -#pragma Aux_register(0x80012100, name=>"io_i2c_mst1_con") - -// User extension aux register io_i2c_mst1_tar -#define AR_IO_I2C_MST1_TAR 0x80012104 -#pragma Aux_register(0x80012104, name=>"io_i2c_mst1_tar") - -// User extension aux register io_i2c_mst1_data_cmd -#define AR_IO_I2C_MST1_DATA_CMD 0x80012110 -#pragma Aux_register(0x80012110, name=>"io_i2c_mst1_data_cmd") - -// User extension aux register io_i2c_mst1_ss_scl_hcnt -#define AR_IO_I2C_MST1_SS_SCL_HCNT 0x80012114 -#pragma Aux_register(0x80012114, name=>"io_i2c_mst1_ss_scl_hcnt") - -// User extension aux register io_i2c_mst1_ss_scl_lcnt -#define AR_IO_I2C_MST1_SS_SCL_LCNT 0x80012118 -#pragma Aux_register(0x80012118, name=>"io_i2c_mst1_ss_scl_lcnt") - -// User extension aux register io_i2c_mst1_fs_scl_hcnt -#define AR_IO_I2C_MST1_FS_SCL_HCNT 0x8001211c -#pragma Aux_register(0x8001211c, name=>"io_i2c_mst1_fs_scl_hcnt") - -// User extension aux register io_i2c_mst1_fs_scl_lcnt -#define AR_IO_I2C_MST1_FS_SCL_LCNT 0x80012120 -#pragma Aux_register(0x80012120, name=>"io_i2c_mst1_fs_scl_lcnt") - -// User extension aux register io_i2c_mst1_intr_stat -#define AR_IO_I2C_MST1_INTR_STAT 0x8001212c -#pragma Aux_register(0x8001212c, name=>"io_i2c_mst1_intr_stat") - -// User extension aux register io_i2c_mst1_intr_mask -#define AR_IO_I2C_MST1_INTR_MASK 0x80012130 -#pragma Aux_register(0x80012130, name=>"io_i2c_mst1_intr_mask") - -// User extension aux register io_i2c_mst1_raw_intr_stat -#define AR_IO_I2C_MST1_RAW_INTR_STAT 0x80012134 -#pragma Aux_register(0x80012134, name=>"io_i2c_mst1_raw_intr_stat") - -// User extension aux register io_i2c_mst1_rx_tl -#define AR_IO_I2C_MST1_RX_TL 0x80012138 -#pragma Aux_register(0x80012138, name=>"io_i2c_mst1_rx_tl") - -// User extension aux register io_i2c_mst1_tx_tl -#define AR_IO_I2C_MST1_TX_TL 0x8001213c -#pragma Aux_register(0x8001213c, name=>"io_i2c_mst1_tx_tl") - -// User extension aux register io_i2c_mst1_clr_intr -#define AR_IO_I2C_MST1_CLR_INTR 0x80012140 -#pragma Aux_register(0x80012140, name=>"io_i2c_mst1_clr_intr") - -// User extension aux register io_i2c_mst1_clr_rx_under -#define AR_IO_I2C_MST1_CLR_RX_UNDER 0x80012144 -#pragma Aux_register(0x80012144, name=>"io_i2c_mst1_clr_rx_under") - -// User extension aux register io_i2c_mst1_clr_rx_over -#define AR_IO_I2C_MST1_CLR_RX_OVER 0x80012148 -#pragma Aux_register(0x80012148, name=>"io_i2c_mst1_clr_rx_over") - -// User extension aux register io_i2c_mst1_clr_tx_over -#define AR_IO_I2C_MST1_CLR_TX_OVER 0x8001214c -#pragma Aux_register(0x8001214c, name=>"io_i2c_mst1_clr_tx_over") - -// User extension aux register io_i2c_mst1_clr_tx_abrt -#define AR_IO_I2C_MST1_CLR_TX_ABRT 0x80012154 -#pragma Aux_register(0x80012154, name=>"io_i2c_mst1_clr_tx_abrt") - -// User extension aux register io_i2c_mst1_clr_activity -#define AR_IO_I2C_MST1_CLR_ACTIVITY 0x8001215c -#pragma Aux_register(0x8001215c, name=>"io_i2c_mst1_clr_activity") - -// User extension aux register io_i2c_mst1_clr_stop_det -#define AR_IO_I2C_MST1_CLR_STOP_DET 0x80012160 -#pragma Aux_register(0x80012160, name=>"io_i2c_mst1_clr_stop_det") - -// User extension aux register io_i2c_mst1_clr_start_det -#define AR_IO_I2C_MST1_CLR_START_DET 0x80012164 -#pragma Aux_register(0x80012164, name=>"io_i2c_mst1_clr_start_det") - -// User extension aux register io_i2c_mst1_enable -#define AR_IO_I2C_MST1_ENABLE 0x8001216c -#pragma Aux_register(0x8001216c, name=>"io_i2c_mst1_enable") - -// User extension aux register io_i2c_mst1_status -#define AR_IO_I2C_MST1_STATUS 0x80012170 -#pragma Aux_register(0x80012170, name=>"io_i2c_mst1_status") - -// User extension aux register io_i2c_mst1_txflr -#define AR_IO_I2C_MST1_TXFLR 0x80012174 -#pragma Aux_register(0x80012174, name=>"io_i2c_mst1_txflr") - -// User extension aux register io_i2c_mst1_rxflr -#define AR_IO_I2C_MST1_RXFLR 0x80012178 -#pragma Aux_register(0x80012178, name=>"io_i2c_mst1_rxflr") - -// User extension aux register io_i2c_mst1_sda_hold -#define AR_IO_I2C_MST1_SDA_HOLD 0x8001217c -#pragma Aux_register(0x8001217c, name=>"io_i2c_mst1_sda_hold") - -// User extension aux register io_i2c_mst1_tx_abrt_source -#define AR_IO_I2C_MST1_TX_ABRT_SOURCE 0x80012180 -#pragma Aux_register(0x80012180, name=>"io_i2c_mst1_tx_abrt_source") - -// User extension aux register io_i2c_mst1_enable_status -#define AR_IO_I2C_MST1_ENABLE_STATUS 0x8001219c -#pragma Aux_register(0x8001219c, name=>"io_i2c_mst1_enable_status") - -// User extension aux register io_i2c_mst1_fs_spklen -#define AR_IO_I2C_MST1_FS_SPKLEN 0x800121a0 -#pragma Aux_register(0x800121a0, name=>"io_i2c_mst1_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2C_MST2_PRESENT 1 - -// User extension aux register io_i2c_mst2_clken -#define AR_IO_I2C_MST2_CLKEN 0x800122c0 -#pragma Aux_register(0x800122c0, name=>"io_i2c_mst2_clken") - -// User extension aux register io_i2c_mst2_con -#define AR_IO_I2C_MST2_CON 0x80012200 -#pragma Aux_register(0x80012200, name=>"io_i2c_mst2_con") - -// User extension aux register io_i2c_mst2_tar -#define AR_IO_I2C_MST2_TAR 0x80012204 -#pragma Aux_register(0x80012204, name=>"io_i2c_mst2_tar") - -// User extension aux register io_i2c_mst2_data_cmd -#define AR_IO_I2C_MST2_DATA_CMD 0x80012210 -#pragma Aux_register(0x80012210, name=>"io_i2c_mst2_data_cmd") - -// User extension aux register io_i2c_mst2_ss_scl_hcnt -#define AR_IO_I2C_MST2_SS_SCL_HCNT 0x80012214 -#pragma Aux_register(0x80012214, name=>"io_i2c_mst2_ss_scl_hcnt") - -// User extension aux register io_i2c_mst2_ss_scl_lcnt -#define AR_IO_I2C_MST2_SS_SCL_LCNT 0x80012218 -#pragma Aux_register(0x80012218, name=>"io_i2c_mst2_ss_scl_lcnt") - -// User extension aux register io_i2c_mst2_fs_scl_hcnt -#define AR_IO_I2C_MST2_FS_SCL_HCNT 0x8001221c -#pragma Aux_register(0x8001221c, name=>"io_i2c_mst2_fs_scl_hcnt") - -// User extension aux register io_i2c_mst2_fs_scl_lcnt -#define AR_IO_I2C_MST2_FS_SCL_LCNT 0x80012220 -#pragma Aux_register(0x80012220, name=>"io_i2c_mst2_fs_scl_lcnt") - -// User extension aux register io_i2c_mst2_intr_stat -#define AR_IO_I2C_MST2_INTR_STAT 0x8001222c -#pragma Aux_register(0x8001222c, name=>"io_i2c_mst2_intr_stat") - -// User extension aux register io_i2c_mst2_intr_mask -#define AR_IO_I2C_MST2_INTR_MASK 0x80012230 -#pragma Aux_register(0x80012230, name=>"io_i2c_mst2_intr_mask") - -// User extension aux register io_i2c_mst2_raw_intr_stat -#define AR_IO_I2C_MST2_RAW_INTR_STAT 0x80012234 -#pragma Aux_register(0x80012234, name=>"io_i2c_mst2_raw_intr_stat") - -// User extension aux register io_i2c_mst2_rx_tl -#define AR_IO_I2C_MST2_RX_TL 0x80012238 -#pragma Aux_register(0x80012238, name=>"io_i2c_mst2_rx_tl") - -// User extension aux register io_i2c_mst2_tx_tl -#define AR_IO_I2C_MST2_TX_TL 0x8001223c -#pragma Aux_register(0x8001223c, name=>"io_i2c_mst2_tx_tl") - -// User extension aux register io_i2c_mst2_clr_intr -#define AR_IO_I2C_MST2_CLR_INTR 0x80012240 -#pragma Aux_register(0x80012240, name=>"io_i2c_mst2_clr_intr") - -// User extension aux register io_i2c_mst2_clr_rx_under -#define AR_IO_I2C_MST2_CLR_RX_UNDER 0x80012244 -#pragma Aux_register(0x80012244, name=>"io_i2c_mst2_clr_rx_under") - -// User extension aux register io_i2c_mst2_clr_rx_over -#define AR_IO_I2C_MST2_CLR_RX_OVER 0x80012248 -#pragma Aux_register(0x80012248, name=>"io_i2c_mst2_clr_rx_over") - -// User extension aux register io_i2c_mst2_clr_tx_over -#define AR_IO_I2C_MST2_CLR_TX_OVER 0x8001224c -#pragma Aux_register(0x8001224c, name=>"io_i2c_mst2_clr_tx_over") - -// User extension aux register io_i2c_mst2_clr_tx_abrt -#define AR_IO_I2C_MST2_CLR_TX_ABRT 0x80012254 -#pragma Aux_register(0x80012254, name=>"io_i2c_mst2_clr_tx_abrt") - -// User extension aux register io_i2c_mst2_clr_activity -#define AR_IO_I2C_MST2_CLR_ACTIVITY 0x8001225c -#pragma Aux_register(0x8001225c, name=>"io_i2c_mst2_clr_activity") - -// User extension aux register io_i2c_mst2_clr_stop_det -#define AR_IO_I2C_MST2_CLR_STOP_DET 0x80012260 -#pragma Aux_register(0x80012260, name=>"io_i2c_mst2_clr_stop_det") - -// User extension aux register io_i2c_mst2_clr_start_det -#define AR_IO_I2C_MST2_CLR_START_DET 0x80012264 -#pragma Aux_register(0x80012264, name=>"io_i2c_mst2_clr_start_det") - -// User extension aux register io_i2c_mst2_enable -#define AR_IO_I2C_MST2_ENABLE 0x8001226c -#pragma Aux_register(0x8001226c, name=>"io_i2c_mst2_enable") - -// User extension aux register io_i2c_mst2_status -#define AR_IO_I2C_MST2_STATUS 0x80012270 -#pragma Aux_register(0x80012270, name=>"io_i2c_mst2_status") - -// User extension aux register io_i2c_mst2_txflr -#define AR_IO_I2C_MST2_TXFLR 0x80012274 -#pragma Aux_register(0x80012274, name=>"io_i2c_mst2_txflr") - -// User extension aux register io_i2c_mst2_rxflr -#define AR_IO_I2C_MST2_RXFLR 0x80012278 -#pragma Aux_register(0x80012278, name=>"io_i2c_mst2_rxflr") - -// User extension aux register io_i2c_mst2_sda_hold -#define AR_IO_I2C_MST2_SDA_HOLD 0x8001227c -#pragma Aux_register(0x8001227c, name=>"io_i2c_mst2_sda_hold") - -// User extension aux register io_i2c_mst2_tx_abrt_source -#define AR_IO_I2C_MST2_TX_ABRT_SOURCE 0x80012280 -#pragma Aux_register(0x80012280, name=>"io_i2c_mst2_tx_abrt_source") - -// User extension aux register io_i2c_mst2_enable_status -#define AR_IO_I2C_MST2_ENABLE_STATUS 0x8001229c -#pragma Aux_register(0x8001229c, name=>"io_i2c_mst2_enable_status") - -// User extension aux register io_i2c_mst2_fs_spklen -#define AR_IO_I2C_MST2_FS_SPKLEN 0x800122a0 -#pragma Aux_register(0x800122a0, name=>"io_i2c_mst2_fs_spklen") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART0_PRESENT 1 - -// User extension aux register io_uart0_clken -#define AR_IO_UART0_CLKEN 0x800140c0 -#pragma Aux_register(0x800140c0, name=>"io_uart0_clken") - -// User extension aux register io_uart0_rbr_thr_dll -#define AR_IO_UART0_RBR_THR_DLL 0x80014000 -#pragma Aux_register(0x80014000, name=>"io_uart0_rbr_thr_dll") - -// User extension aux register io_uart0_ier_dlh -#define AR_IO_UART0_IER_DLH 0x80014004 -#pragma Aux_register(0x80014004, name=>"io_uart0_ier_dlh") - -// User extension aux register io_uart0_iir_fcr -#define AR_IO_UART0_IIR_FCR 0x80014008 -#pragma Aux_register(0x80014008, name=>"io_uart0_iir_fcr") - -// User extension aux register io_uart0_lcr -#define AR_IO_UART0_LCR 0x8001400c -#pragma Aux_register(0x8001400c, name=>"io_uart0_lcr") - -// User extension aux register io_uart0_mcr -#define AR_IO_UART0_MCR 0x80014010 -#pragma Aux_register(0x80014010, name=>"io_uart0_mcr") - -// User extension aux register io_uart0_lsr -#define AR_IO_UART0_LSR 0x80014014 -#pragma Aux_register(0x80014014, name=>"io_uart0_lsr") - -// User extension aux register io_uart0_msr -#define AR_IO_UART0_MSR 0x80014018 -#pragma Aux_register(0x80014018, name=>"io_uart0_msr") - -// User extension aux register io_uart0_usr -#define AR_IO_UART0_USR 0x8001407c -#pragma Aux_register(0x8001407c, name=>"io_uart0_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART1_PRESENT 1 - -// User extension aux register io_uart1_clken -#define AR_IO_UART1_CLKEN 0x800141c0 -#pragma Aux_register(0x800141c0, name=>"io_uart1_clken") - -// User extension aux register io_uart1_rbr_thr_dll -#define AR_IO_UART1_RBR_THR_DLL 0x80014100 -#pragma Aux_register(0x80014100, name=>"io_uart1_rbr_thr_dll") - -// User extension aux register io_uart1_ier_dlh -#define AR_IO_UART1_IER_DLH 0x80014104 -#pragma Aux_register(0x80014104, name=>"io_uart1_ier_dlh") - -// User extension aux register io_uart1_iir_fcr -#define AR_IO_UART1_IIR_FCR 0x80014108 -#pragma Aux_register(0x80014108, name=>"io_uart1_iir_fcr") - -// User extension aux register io_uart1_lcr -#define AR_IO_UART1_LCR 0x8001410c -#pragma Aux_register(0x8001410c, name=>"io_uart1_lcr") - -// User extension aux register io_uart1_mcr -#define AR_IO_UART1_MCR 0x80014110 -#pragma Aux_register(0x80014110, name=>"io_uart1_mcr") - -// User extension aux register io_uart1_lsr -#define AR_IO_UART1_LSR 0x80014114 -#pragma Aux_register(0x80014114, name=>"io_uart1_lsr") - -// User extension aux register io_uart1_msr -#define AR_IO_UART1_MSR 0x80014118 -#pragma Aux_register(0x80014118, name=>"io_uart1_msr") - -// User extension aux register io_uart1_usr -#define AR_IO_UART1_USR 0x8001417c -#pragma Aux_register(0x8001417c, name=>"io_uart1_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART2_PRESENT 1 - -// User extension aux register io_uart2_clken -#define AR_IO_UART2_CLKEN 0x800142c0 -#pragma Aux_register(0x800142c0, name=>"io_uart2_clken") - -// User extension aux register io_uart2_rbr_thr_dll -#define AR_IO_UART2_RBR_THR_DLL 0x80014200 -#pragma Aux_register(0x80014200, name=>"io_uart2_rbr_thr_dll") - -// User extension aux register io_uart2_ier_dlh -#define AR_IO_UART2_IER_DLH 0x80014204 -#pragma Aux_register(0x80014204, name=>"io_uart2_ier_dlh") - -// User extension aux register io_uart2_iir_fcr -#define AR_IO_UART2_IIR_FCR 0x80014208 -#pragma Aux_register(0x80014208, name=>"io_uart2_iir_fcr") - -// User extension aux register io_uart2_lcr -#define AR_IO_UART2_LCR 0x8001420c -#pragma Aux_register(0x8001420c, name=>"io_uart2_lcr") - -// User extension aux register io_uart2_mcr -#define AR_IO_UART2_MCR 0x80014210 -#pragma Aux_register(0x80014210, name=>"io_uart2_mcr") - -// User extension aux register io_uart2_lsr -#define AR_IO_UART2_LSR 0x80014214 -#pragma Aux_register(0x80014214, name=>"io_uart2_lsr") - -// User extension aux register io_uart2_msr -#define AR_IO_UART2_MSR 0x80014218 -#pragma Aux_register(0x80014218, name=>"io_uart2_msr") - -// User extension aux register io_uart2_usr -#define AR_IO_UART2_USR 0x8001427c -#pragma Aux_register(0x8001427c, name=>"io_uart2_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_UART3_PRESENT 1 - -// User extension aux register io_uart3_clken -#define AR_IO_UART3_CLKEN 0x800143c0 -#pragma Aux_register(0x800143c0, name=>"io_uart3_clken") - -// User extension aux register io_uart3_rbr_thr_dll -#define AR_IO_UART3_RBR_THR_DLL 0x80014300 -#pragma Aux_register(0x80014300, name=>"io_uart3_rbr_thr_dll") - -// User extension aux register io_uart3_ier_dlh -#define AR_IO_UART3_IER_DLH 0x80014304 -#pragma Aux_register(0x80014304, name=>"io_uart3_ier_dlh") - -// User extension aux register io_uart3_iir_fcr -#define AR_IO_UART3_IIR_FCR 0x80014308 -#pragma Aux_register(0x80014308, name=>"io_uart3_iir_fcr") - -// User extension aux register io_uart3_lcr -#define AR_IO_UART3_LCR 0x8001430c -#pragma Aux_register(0x8001430c, name=>"io_uart3_lcr") - -// User extension aux register io_uart3_mcr -#define AR_IO_UART3_MCR 0x80014310 -#pragma Aux_register(0x80014310, name=>"io_uart3_mcr") - -// User extension aux register io_uart3_lsr -#define AR_IO_UART3_LSR 0x80014314 -#pragma Aux_register(0x80014314, name=>"io_uart3_lsr") - -// User extension aux register io_uart3_msr -#define AR_IO_UART3_MSR 0x80014318 -#pragma Aux_register(0x80014318, name=>"io_uart3_msr") - -// User extension aux register io_uart3_usr -#define AR_IO_UART3_USR 0x8001437c -#pragma Aux_register(0x8001437c, name=>"io_uart3_usr") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2S_RX_MST0_PRESENT 1 - -// User extension aux register io_i2s_rx_mst0_ier -#define AR_IO_I2S_RX_MST0_IER 0x8001a000 -#pragma Aux_register(0x8001a000, name=>"io_i2s_rx_mst0_ier") - -// User extension aux register io_i2s_rx_mst0_irer -#define AR_IO_I2S_RX_MST0_IRER 0x8001a004 -#pragma Aux_register(0x8001a004, name=>"io_i2s_rx_mst0_irer") - -// User extension aux register io_i2s_rx_mst0_cer -#define AR_IO_I2S_RX_MST0_CER 0x8001a00c -#pragma Aux_register(0x8001a00c, name=>"io_i2s_rx_mst0_cer") - -// User extension aux register io_i2s_rx_mst0_ccr -#define AR_IO_I2S_RX_MST0_CCR 0x8001a010 -#pragma Aux_register(0x8001a010, name=>"io_i2s_rx_mst0_ccr") - -// User extension aux register io_i2s_rx_mst0_rxffr -#define AR_IO_I2S_RX_MST0_RXFFR 0x8001a014 -#pragma Aux_register(0x8001a014, name=>"io_i2s_rx_mst0_rxffr") - -// User extension aux register io_i2s_rx_mst0_lrbr -#define AR_IO_I2S_RX_MST0_LRBR 0x8001a020 -#pragma Aux_register(0x8001a020, name=>"io_i2s_rx_mst0_lrbr") - -// User extension aux register io_i2s_rx_mst0_rrbr -#define AR_IO_I2S_RX_MST0_RRBR 0x8001a024 -#pragma Aux_register(0x8001a024, name=>"io_i2s_rx_mst0_rrbr") - -// User extension aux register io_i2s_rx_mst0_rer -#define AR_IO_I2S_RX_MST0_RER 0x8001a028 -#pragma Aux_register(0x8001a028, name=>"io_i2s_rx_mst0_rer") - -// User extension aux register io_i2s_rx_mst0_rcr -#define AR_IO_I2S_RX_MST0_RCR 0x8001a030 -#pragma Aux_register(0x8001a030, name=>"io_i2s_rx_mst0_rcr") - -// User extension aux register io_i2s_rx_mst0_isr -#define AR_IO_I2S_RX_MST0_ISR 0x8001a038 -#pragma Aux_register(0x8001a038, name=>"io_i2s_rx_mst0_isr") - -// User extension aux register io_i2s_rx_mst0_imr -#define AR_IO_I2S_RX_MST0_IMR 0x8001a03c -#pragma Aux_register(0x8001a03c, name=>"io_i2s_rx_mst0_imr") - -// User extension aux register io_i2s_rx_mst0_ror -#define AR_IO_I2S_RX_MST0_ROR 0x8001a040 -#pragma Aux_register(0x8001a040, name=>"io_i2s_rx_mst0_ror") - -// User extension aux register io_i2s_rx_mst0_rfcr -#define AR_IO_I2S_RX_MST0_RFCR 0x8001a048 -#pragma Aux_register(0x8001a048, name=>"io_i2s_rx_mst0_rfcr") - -// User extension aux register io_i2s_rx_mst0_rff -#define AR_IO_I2S_RX_MST0_RFF 0x8001a050 -#pragma Aux_register(0x8001a050, name=>"io_i2s_rx_mst0_rff") - -// User extension aux register io_i2s_rx_mst0_rxdma -#define AR_IO_I2S_RX_MST0_RXDMA 0x8001a1c0 -#pragma Aux_register(0x8001a1c0, name=>"io_i2s_rx_mst0_rxdma") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_I2S_TX_MST0_PRESENT 1 - -// User extension aux register io_i2s_tx_mst0_ier -#define AR_IO_I2S_TX_MST0_IER 0x80019000 -#pragma Aux_register(0x80019000, name=>"io_i2s_tx_mst0_ier") - -// User extension aux register io_i2s_tx_mst0_iter -#define AR_IO_I2S_TX_MST0_ITER 0x80019008 -#pragma Aux_register(0x80019008, name=>"io_i2s_tx_mst0_iter") - -// User extension aux register io_i2s_tx_mst0_cer -#define AR_IO_I2S_TX_MST0_CER 0x8001900c -#pragma Aux_register(0x8001900c, name=>"io_i2s_tx_mst0_cer") - -// User extension aux register io_i2s_tx_mst0_ccr -#define AR_IO_I2S_TX_MST0_CCR 0x80019010 -#pragma Aux_register(0x80019010, name=>"io_i2s_tx_mst0_ccr") - -// User extension aux register io_i2s_tx_mst0_txffr -#define AR_IO_I2S_TX_MST0_TXFFR 0x80019018 -#pragma Aux_register(0x80019018, name=>"io_i2s_tx_mst0_txffr") - -// User extension aux register io_i2s_tx_mst0_lthr -#define AR_IO_I2S_TX_MST0_LTHR 0x80019020 -#pragma Aux_register(0x80019020, name=>"io_i2s_tx_mst0_lthr") - -// User extension aux register io_i2s_tx_mst0_rthr -#define AR_IO_I2S_TX_MST0_RTHR 0x80019024 -#pragma Aux_register(0x80019024, name=>"io_i2s_tx_mst0_rthr") - -// User extension aux register io_i2s_tx_mst0_ter -#define AR_IO_I2S_TX_MST0_TER 0x8001902c -#pragma Aux_register(0x8001902c, name=>"io_i2s_tx_mst0_ter") - -// User extension aux register io_i2s_tx_mst0_tcr -#define AR_IO_I2S_TX_MST0_TCR 0x80019034 -#pragma Aux_register(0x80019034, name=>"io_i2s_tx_mst0_tcr") - -// User extension aux register io_i2s_tx_mst0_isr -#define AR_IO_I2S_TX_MST0_ISR 0x80019038 -#pragma Aux_register(0x80019038, name=>"io_i2s_tx_mst0_isr") - -// User extension aux register io_i2s_tx_mst0_imr -#define AR_IO_I2S_TX_MST0_IMR 0x8001903c -#pragma Aux_register(0x8001903c, name=>"io_i2s_tx_mst0_imr") - -// User extension aux register io_i2s_tx_mst0_tor -#define AR_IO_I2S_TX_MST0_TOR 0x80019044 -#pragma Aux_register(0x80019044, name=>"io_i2s_tx_mst0_tor") - -// User extension aux register io_i2s_tx_mst0_tfcr -#define AR_IO_I2S_TX_MST0_TFCR 0x8001904c -#pragma Aux_register(0x8001904c, name=>"io_i2s_tx_mst0_tfcr") - -// User extension aux register io_i2s_tx_mst0_tff -#define AR_IO_I2S_TX_MST0_TFF 0x80019054 -#pragma Aux_register(0x80019054, name=>"io_i2s_tx_mst0_tff") - -// User extension aux register io_i2s_tx_mst0_txdma -#define AR_IO_I2S_TX_MST0_TXDMA 0x800191c8 -#pragma Aux_register(0x800191c8, name=>"io_i2s_tx_mst0_txdma") -#define APEX_COM_ARC_HARDWARE_DFSS_IO_PDM_RX0_PRESENT 1 - -// User extension aux register io_pdm_rx0_pdm_en -#define AR_IO_PDM_RX0_PDM_EN 0x8001b000 -#pragma Aux_register(0x8001b000, name=>"io_pdm_rx0_pdm_en") - -// User extension aux register io_pdm_rx0_pdm_ren -#define AR_IO_PDM_RX0_PDM_REN 0x8001b004 -#pragma Aux_register(0x8001b004, name=>"io_pdm_rx0_pdm_ren") - -// User extension aux register io_pdm_rx0_cer -#define AR_IO_PDM_RX0_CER 0x8001b00c -#pragma Aux_register(0x8001b00c, name=>"io_pdm_rx0_cer") - -// User extension aux register io_pdm_rx0_rxffr -#define AR_IO_PDM_RX0_RXFFR 0x8001b014 -#pragma Aux_register(0x8001b014, name=>"io_pdm_rx0_rxffr") - -// User extension aux register io_pdm_rx0_rer0 -#define AR_IO_PDM_RX0_RER0 0x8001b028 -#pragma Aux_register(0x8001b028, name=>"io_pdm_rx0_rer0") - -// User extension aux register io_pdm_rx0_isr -#define AR_IO_PDM_RX0_ISR 0x8001b038 -#pragma Aux_register(0x8001b038, name=>"io_pdm_rx0_isr") - -// User extension aux register io_pdm_rx0_imr -#define AR_IO_PDM_RX0_IMR 0x8001b03c -#pragma Aux_register(0x8001b03c, name=>"io_pdm_rx0_imr") - -// User extension aux register io_pdm_rx0_ror -#define AR_IO_PDM_RX0_ROR 0x8001b040 -#pragma Aux_register(0x8001b040, name=>"io_pdm_rx0_ror") - -// User extension aux register io_pdm_rx0_rfcr -#define AR_IO_PDM_RX0_RFCR 0x8001b048 -#pragma Aux_register(0x8001b048, name=>"io_pdm_rx0_rfcr") - -// User extension aux register io_pdm_rx0_rxdma -#define AR_IO_PDM_RX0_RXDMA 0x8001b1c0 -#pragma Aux_register(0x8001b1c0, name=>"io_pdm_rx0_rxdma") - -// User extension aux register io_pdm_rx0_pdm_rr -#define AR_IO_PDM_RX0_PDM_RR 0x8001b1d0 -#pragma Aux_register(0x8001b1d0, name=>"io_pdm_rx0_pdm_rr") - -// User extension aux register io_pdm_rx0_cic_n -#define AR_IO_PDM_RX0_CIC_N 0x8001b1d4 -#pragma Aux_register(0x8001b1d4, name=>"io_pdm_rx0_cic_n") - -// User extension aux register io_pdm_rx0_cic_d -#define AR_IO_PDM_RX0_CIC_D 0x8001b1d8 -#pragma Aux_register(0x8001b1d8, name=>"io_pdm_rx0_cic_d") - -// User extension aux register io_pdm_rx0_dcrc -#define AR_IO_PDM_RX0_DCRC 0x8001b1dc -#pragma Aux_register(0x8001b1dc, name=>"io_pdm_rx0_dcrc") - -// User extension aux register io_pdm_rx0_brc_b0 -#define AR_IO_PDM_RX0_BRC_B0 0x8001b1e0 -#pragma Aux_register(0x8001b1e0, name=>"io_pdm_rx0_brc_b0") - -// User extension aux register io_pdm_rx0_brc_clp -#define AR_IO_PDM_RX0_BRC_CLP 0x8001b1f0 -#pragma Aux_register(0x8001b1f0, name=>"io_pdm_rx0_brc_clp") -#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_PRESENT 1 - -// User extension aux register fpu_build -#define AR_FPU_BUILD 0xc8 -#pragma Aux_register(0xc8, name=>"fpu_build") - -// User extension aux register fpu_ctrl -#define AR_FPU_CTRL 0x300 -#pragma Aux_register(0x300, name=>"fpu_ctrl") - -// User extension aux register fpu_status -#define AR_FPU_STATUS 0x301 -#pragma Aux_register(0x301, name=>"fpu_status") - -// User extension instruction fsmadd -extern int fsmadd(int,int); -#pragma intrinsic(fsmadd,opcode=>6,sub_opcode=>5, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsmsub -extern int fsmsub(int,int); -#pragma intrinsic(fsmsub,opcode=>6,sub_opcode=>6, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsmul -extern int fsmul(int,int); -#pragma intrinsic(fsmul,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsadd -extern int fsadd(int,int); -#pragma intrinsic(fsadd,opcode=>6,sub_opcode=>1, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fssub -extern int fssub(int,int); -#pragma intrinsic(fssub,opcode=>6,sub_opcode=>2, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fcvt32 -extern int fcvt32(int,int); -#pragma intrinsic(fcvt32,opcode=>6,sub_opcode=>8, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fsdiv -extern int fsdiv(int,int); -#pragma intrinsic(fsdiv,opcode=>6,sub_opcode=>7, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmp -extern int fscmp(int,int); -#pragma intrinsic(fscmp,opcode=>6,sub_opcode=>3, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmp -extern int fscmp_f(int,int); -#pragma intrinsic(fscmp_f,opcode=>6,sub_opcode=>3, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmpf -extern int fscmpf(int,int); -#pragma intrinsic(fscmpf,opcode=>6,sub_opcode=>4, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fscmpf -extern int fscmpf_f(int,int); -#pragma intrinsic(fscmpf_f,opcode=>6,sub_opcode=>4, set_flags => 1, flags => "zncv", effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") - -// User extension instruction fssqrt -extern int fssqrt(int); -#pragma intrinsic(fssqrt,opcode=>6,sub_opcode=>0, effects=>"auxreg=0xc8:is_read:is_written; auxreg=0x300:is_read:is_written; auxreg=0x301:is_read:is_written") -#define APEX_COM_ARC_HARDWARE_FLOATING_POINT_UNIT_FPU_DP_ASSIST_PRESENT 1 - -// User extension aux register aux_dpfp1l -#define AR_AUX_DPFP1L 0x302 -#pragma Aux_register(0x302, name=>"aux_dpfp1l") - -// User extension aux register aux_dpfp1h -#define AR_AUX_DPFP1H 0x303 -#pragma Aux_register(0x303, name=>"aux_dpfp1h") - -// User extension aux register aux_dpfp2l -#define AR_AUX_DPFP2L 0x304 -#pragma Aux_register(0x304, name=>"aux_dpfp2l") - -// User extension aux register aux_dpfp2h -#define AR_AUX_DPFP2H 0x305 -#pragma Aux_register(0x305, name=>"aux_dpfp2h") - -// User extension instruction dmulh11 -extern int dmulh11(int,int); -#pragma intrinsic(dmulh11,opcode=>6,sub_opcode=>48,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh11 -extern int dmulh11_f(int,int); -#pragma intrinsic(dmulh11_f,opcode=>6,sub_opcode=>48, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh12 -extern int dmulh12(int,int); -#pragma intrinsic(dmulh12,opcode=>6,sub_opcode=>49,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh12 -extern int dmulh12_f(int,int); -#pragma intrinsic(dmulh12_f,opcode=>6,sub_opcode=>49, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh21 -extern int dmulh21(int,int); -#pragma intrinsic(dmulh21,opcode=>6,sub_opcode=>50,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh21 -extern int dmulh21_f(int,int); -#pragma intrinsic(dmulh21_f,opcode=>6,sub_opcode=>50, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh22 -extern int dmulh22(int,int); -#pragma intrinsic(dmulh22,opcode=>6,sub_opcode=>51,blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dmulh22 -extern int dmulh22_f(int,int); -#pragma intrinsic(dmulh22_f,opcode=>6,sub_opcode=>51, set_flags => 1, flags => "zncv",blocking_cycles=> 7, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh11 -extern int daddh11(int,int); -#pragma intrinsic(daddh11,opcode=>6,sub_opcode=>52,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh11 -extern int daddh11_f(int,int); -#pragma intrinsic(daddh11_f,opcode=>6,sub_opcode=>52, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh12 -extern int daddh12(int,int); -#pragma intrinsic(daddh12,opcode=>6,sub_opcode=>53,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh12 -extern int daddh12_f(int,int); -#pragma intrinsic(daddh12_f,opcode=>6,sub_opcode=>53, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh21 -extern int daddh21(int,int); -#pragma intrinsic(daddh21,opcode=>6,sub_opcode=>54,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh21 -extern int daddh21_f(int,int); -#pragma intrinsic(daddh21_f,opcode=>6,sub_opcode=>54, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh22 -extern int daddh22(int,int); -#pragma intrinsic(daddh22,opcode=>6,sub_opcode=>55,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction daddh22 -extern int daddh22_f(int,int); -#pragma intrinsic(daddh22_f,opcode=>6,sub_opcode=>55, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh11 -extern int dsubh11(int,int); -#pragma intrinsic(dsubh11,opcode=>6,sub_opcode=>56,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh11 -extern int dsubh11_f(int,int); -#pragma intrinsic(dsubh11_f,opcode=>6,sub_opcode=>56, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh12 -extern int dsubh12(int,int); -#pragma intrinsic(dsubh12,opcode=>6,sub_opcode=>57,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh12 -extern int dsubh12_f(int,int); -#pragma intrinsic(dsubh12_f,opcode=>6,sub_opcode=>57, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh21 -extern int dsubh21(int,int); -#pragma intrinsic(dsubh21,opcode=>6,sub_opcode=>58,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh21 -extern int dsubh21_f(int,int); -#pragma intrinsic(dsubh21_f,opcode=>6,sub_opcode=>58, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh22 -extern int dsubh22(int,int); -#pragma intrinsic(dsubh22,opcode=>6,sub_opcode=>59,blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dsubh22 -extern int dsubh22_f(int,int); -#pragma intrinsic(dsubh22_f,opcode=>6,sub_opcode=>59, set_flags => 1, flags => "zncv",blocking_cycles=> 5, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dexcl1 -extern int dexcl1(int,int); -#pragma intrinsic(dexcl1,opcode=>6,sub_opcode=>60, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - -// User extension instruction dexcl2 -extern int dexcl2(int,int); -#pragma intrinsic(dexcl2,opcode=>6,sub_opcode=>61, effects=>"auxreg=0x302:is_read:is_written; auxreg=0x303:is_read:is_written; auxreg=0x304:is_read:is_written; auxreg=0x305:is_read:is_written") - - -#endif - - -]]> - - - - -
- diff --git a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc index 211437bd9f4..405b9698cca 100644 --- a/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/arc_emsdp_makefile.inc @@ -18,14 +18,23 @@ ifeq ($(TARGET), arc_emsdp) TARGET_ARCH := arc ARC_TOOLCHAIN := mwdt - TCF_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp_em11d_dfss.tcf - LCF_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf - UBOOT_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/uboot.env - UBOOT_FILE_NAME := $(notdir $(UBOOT_FILE)) BUILD_ARC_MLI := false ARC_MLI_PRE_COMPILED_TARGET := emsdp_em11d_em9d_dfss +ifneq ($(filter no_arc_mli,$(ALL_TAGS)),) + MLI_LIB_DIR = arc_mli_package + $(eval $(call add_third_party_download,$(EMBARC_MLI_PRE_COMPILED_URL),$(EMBARC_MLI_PRE_COMPILED_MD5),$(MLI_LIB_DIR),)) +else ifeq ($(BUILD_ARC_MLI), true) + MLI_LIB_DIR = arc_mli_$(ARC_MLI_PRE_COMPILED_TARGET) +endif + + TCF_FILE = $(PWD)/$(MAKEFILE_DIR)/downloads/$(MLI_LIB_DIR)/hw/emsdp_em11d_em9d_dfss.tcf + LCF_FILE = $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/emsdp.lcf + UBOOT_FILE := $(PWD)/$(MAKEFILE_DIR)/targets/arc/emsdp/uboot.env + UBOOT_FILE_NAME := $(notdir $(UBOOT_FILE)) + + include $(MAKEFILE_DIR)/targets/arc/arc_common.inc ARC_EXTRA_APP_SETTINGS = \ diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index db420b7fd1b..d90f8548f31 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -75,7 +75,7 @@ EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embar EMBARC_MLI_MD5 := "7eebd730db79c6834399f87e509115fb" EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC1/embARC_MLI_package.zip" -EMBARC_MLI_PRE_COMPILED_MD5 := "b85b8b89446757735342795367e37d22" +EMBARC_MLI_PRE_COMPILED_MD5 := "a66d6afff8daeb40bd3a99c42de048ab" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From 175ec5e02e4f7bc1662c6a6b0bde2c50292ba638 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 8 May 2020 19:01:07 +0300 Subject: [PATCH 42/45] arc_mli slicing: Got rid of hand-written MIN/MAX macro --- .../lite/micro/kernels/arc_mli/mli_slicers.cc | 10 +++++----- .../micro/kernels/arc_mli/scratch_buf_mgr.cc | 19 ++++++++++--------- .../micro/kernels/arc_mli/scratch_buffers.cc | 3 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc b/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc index 91bae5caa38..11065f00646 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.cc @@ -15,8 +15,8 @@ limitations under the License. #include "mli_slicers.h" -#define MAX(A,B) (((A) > (B))? (A): (B)) -#define MIN(A,B) (((A) > (B))? (B): (A)) +#include + namespace tflite { namespace ops { @@ -75,11 +75,11 @@ void TensorSlicer::ComputeSubTensor(void) { // begin and end spans the complete input region including padding areas. const int begin = (int)sub_cfg_.offset[sliceDim_] - pad_pre_; // end is clipped to the end of the full input region. this is needed for cases where the last slice is smaller than the rest. - const int end = MIN(begin + sub_cfg_.size[sliceDim_] + overlap_, full_tensor_->shape[sliceDim_] + pad_post_); + const int end = std::min(begin + sub_cfg_.size[sliceDim_] + overlap_, full_tensor_->shape[sliceDim_] + pad_post_); // The start coordinate of the subtensor is clipped to zero - cfg_new.offset[sliceDim_] = MAX(begin, 0); + cfg_new.offset[sliceDim_] = std::max(begin, 0); // and the stop coordinate is clipped to the size of the full tensor - const int stop_coord = MIN(end, full_tensor_->shape[sliceDim_]); + const int stop_coord = std::min(end, static_cast(full_tensor_->shape[sliceDim_])); // compute the size of the subtensor cfg_new.size[sliceDim_] = stop_coord - cfg_new.offset[sliceDim_]; diff --git a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc index d030d04170c..097908e30ab 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc @@ -15,9 +15,10 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h" #include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" + +#include #include -#define MAX(A,B) (((A) > (B))? (A): (B)) -#define MIN(A,B) (((A) > (B))? (B): (A)) + namespace tflite { namespace ops { @@ -242,19 +243,19 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( *out_slice_height = out_height; } else { // First compute how many lines fit into the input tensor, and compute how many output lines can be computed with that. - max_lines_in = MIN(in_height, in->capacity / line_size_in); + max_lines_in = std::min(in_height, static_cast(in->capacity) / line_size_in); if (max_lines_in >= in_height) { max_out_lines_for_input = out_height; } else if (2 * max_lines_in >= in_height) { // in this case only two slices are needed, so both could benefit from padding. take the MIN to get the worst case. - max_out_lines_for_input = (max_lines_in + MIN(padding_top, padding_bot) - kernel_height + 1) / stride_height; + max_out_lines_for_input = (max_lines_in + std::min(padding_top, padding_bot) - kernel_height + 1) / stride_height; } else { max_out_lines_for_input = (max_lines_in - kernel_height + 1) / stride_height; // TODO add padding exceptions and test by makin fit=false; } // Ten compute how many ouput lines fit into the output tensor. - max_lines_out = MIN(out_height, out->capacity / line_size_out); + max_lines_out = std::min(out_height, static_cast(out->capacity) / line_size_out); // the smallest of the two determines the slice height for the output, and the derived sliceheight for the input. - *out_slice_height = MIN(max_out_lines_for_input, max_lines_out); + *out_slice_height = std::min(max_out_lines_for_input, max_lines_out); *in_slice_height = *out_slice_height * stride_height; } @@ -282,11 +283,11 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_weights( *slice_channels = channels; } else { // First compute how many channels fit into the weights tensor - max_ch_weigths = MIN(channels, weights->capacity / ch_size_w); + max_ch_weigths = std::min(channels, static_cast(weights->capacity) / ch_size_w); // Ten compute how many channels fit into the bias tensor. - max_ch_bias = MIN(channels, bias->capacity / ch_size_b); + max_ch_bias = std::min(channels, static_cast(bias->capacity) / ch_size_b); // the smallest of the two determines the slice size - *slice_channels = MIN(max_ch_weigths, max_ch_bias); + *slice_channels = std::min(max_ch_weigths, max_ch_bias); } if (*slice_channels > 0) { diff --git a/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc index a770e4ccd66..6b56770f1f7 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.cc @@ -14,9 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" + #include -#define MAX(A,B) (((A) > (B))? (A): (B)) -#define MIN(A,B) (((A) > (B))? (B): (A)) namespace tflite { namespace ops { From f8867620dcc60433b9a83a5af5b96276e83127d6 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Mon, 11 May 2020 12:36:37 +0300 Subject: [PATCH 43/45] Explanatory comments in slicing tests files (ARC specific) + URL to the latest embarc_MLI library --- tensorflow/lite/micro/kernels/arc_mli/README.md | 2 +- .../lite/micro/kernels/arc_mli/conv_slicing_test.cc | 9 +++++++++ .../micro/kernels/arc_mli/depthwise_conv_slicing_test.cc | 9 +++++++++ .../kernels/arc_mli/fully_connected_slicing_test.cc | 9 +++++++++ .../lite/micro/kernels/arc_mli/pooling_slicing_test.cc | 8 ++++++++ .../lite/micro/tools/make/third_party_downloads.inc | 8 ++++---- 6 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/README.md b/tensorflow/lite/micro/kernels/arc_mli/README.md index 2b2e194e757..33e46ca871d 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/README.md +++ b/tensorflow/lite/micro/kernels/arc_mli/README.md @@ -16,7 +16,7 @@ In case MLI implementation can’t be used, kernels in this folder fallback to T For ARC EM SDP board, a pre-compiled MLI library is downloaded and used in the application. For a custom target ARC-based platform, MLI sources are downloaded and compiled during project generation phase. To build library from sources for ARC EM SDP platform, add `BUILD_ARC_MLI=true` option to make command: - make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp BUILD_ARC_MLI=true generate_person_detection_int8_make_project. + make -f tensorflow/lite/micro/tools/make/Makefile TARGET=arc_emsdp BUILD_ARC_MLI=true generate_person_detection_int8_make_project If an application exclusively uses accelerated MLI kernel implementations, one can strip out TFLM reference kernel implementations to reduce code size of application. Build application with `MLI_ONLY=true` option in generated project (after the project was built): diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc index 27e30856f6c..9eb9d6499dd 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc @@ -13,6 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// This test checks that slicing logic doesn`t affect result of convolution +// kernel +// +// This test doesn`t replace default convolution test +// (tensorflow/lite/micro/kernels/conv_test.cc). It is added to the whole +// testset only in case MLI for ARC platform is used during generation (which is +// handled in arc_mli.inc). So such tests won`t be generated for other +// platforms. + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/kernels/all_ops_resolver.h" diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc index fb9dd46c1e4..e6a87ff82e6 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc @@ -13,6 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// This test checks that slicing logic doesn`t affect result of depthwise +// convolution kernel +// +// This test doesn`t replace default depthwise convolution test +// (tensorflow/lite/micro/kernels/depthwise_conv_test.cc). It is added to the +// whole testset only in case MLI for ARC platform is used during generation +// (which is handled in arc_mli.inc). So such tests won`t be generated for other +// platforms. + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc index 78cb2873c54..0bd264a5f1b 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected_slicing_test.cc @@ -13,6 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// This test checks that slicing logic doesn`t affect result of fully +// connected kernel +// +// This test doesn`t replace default fully connected test +// (tensorflow/lite/micro/kernels/fully_connected_test.cc). It is added to the +// whole testset only in case MLI for ARC platform is used during generation +// (which is handled in arc_mli.inc). So such tests won`t be generated for other +// platforms. + #include #include "tensorflow/lite/c/builtin_op_data.h" diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc index 63737a41791..381420f1f7d 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc @@ -13,6 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// This test checks that slicing logic doesn`t affect result of pooling kernels +// +// This test doesn`t replace default pooling test +// (tensorflow/lite/micro/kernels/pooling.cc). It is added to the +// whole testset only in case MLI for ARC platform is used during generation +// (which is handled in arc_mli.inc). So such tests won`t be generated for other +// platforms. + #include #include "tensorflow/lite/c/builtin_op_data.h" diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index d90f8548f31..91f3f1b5263 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -71,11 +71,11 @@ PERSON_MODEL_MD5 := "fe2934bd0788f1dcc7af3f0a954542ab" PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_01_13.zip" PERSON_MODEL_INT8_MD5 := "8a7d2c70325f53136faea6dde517b8cc" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/7026ad09bb7f967324eb29e069f776bc44a08886.zip" -EMBARC_MLI_MD5 := "7eebd730db79c6834399f87e509115fb" +EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" +EMBARC_MLI_MD5 := "2bf4982a327fdaa9d475803ce014d1ef" -EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC1/embARC_MLI_package.zip" -EMBARC_MLI_PRE_COMPILED_MD5 := "a66d6afff8daeb40bd3a99c42de048ab" +EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC2/embARC_MLI_package.zip" +EMBARC_MLI_PRE_COMPILED_MD5 := "a95ff9e0370434484f14e7e4114327f6" XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" From 6e2654d882563116c2965215818b59c3abc8cc23 Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Tue, 12 May 2020 21:35:27 +0300 Subject: [PATCH 44/45] Removed named section pragmas from shared example code --- .../examples/person_detection_experimental/main_functions.cc | 2 -- .../person_detection_experimental/person_detection_test.cc | 2 -- 2 files changed, 4 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc index 552b52c9c51..719f16b2d36 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc @@ -42,9 +42,7 @@ TfLiteTensor* input = nullptr; // An area of memory to use for input, output, and intermediate arrays. constexpr int kTensorArenaSize = 125 * 1024; -#pragma Bss(".tensor_arena") static uint8_t tensor_arena[kTensorArenaSize]; -#pragma Bss() } // namespace // The name of this function is important for Arduino compatibility. diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc index 9c7212648cc..b0979735d4f 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc @@ -28,9 +28,7 @@ limitations under the License. // Create an area of memory to use for input, output, and intermediate arrays. constexpr int tensor_arena_size = 125 * 1024; -#pragma Bss(".tensor_arena") uint8_t tensor_arena[tensor_arena_size]; -#pragma Bss() TF_LITE_MICRO_TESTS_BEGIN From 6ccf21ef6d284fc1fc262789523cbece1b22ddad Mon Sep 17 00:00:00 2001 From: Dmitry Zakharov Date: Fri, 15 May 2020 12:46:49 +0300 Subject: [PATCH 45/45] =?UTF-8?q?Cleanup=20of=20TODO=E2=80=99s=20in=20ARC?= =?UTF-8?q?=20specific=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tensorflow/lite/micro/arc_emsdp/debug_log.cc | 1 - .../person_detection_experimental/arc_emsdp/emsdp.lcf | 3 --- tensorflow/lite/micro/kernels/arc_mli/conv.cc | 1 - tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc | 1 - tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc | 4 +--- .../micro/kernels/arc_mli/depthwise_conv_slicing_test.cc | 1 - tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc | 1 - tensorflow/lite/micro/kernels/arc_mli/pooling.cc | 1 - tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc | 5 ++--- 9 files changed, 3 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/micro/arc_emsdp/debug_log.cc b/tensorflow/lite/micro/arc_emsdp/debug_log.cc index b3b25f88ac1..fa9909f7372 100644 --- a/tensorflow/lite/micro/arc_emsdp/debug_log.cc +++ b/tensorflow/lite/micro/arc_emsdp/debug_log.cc @@ -55,7 +55,6 @@ typedef volatile struct dw_uart_reg { // to organize blocking loop for printing symbols. No input and no IRQ handling. // See embarc_osp repository for full EMSDP uart driver. // (https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_osp) -// TODO: Consider U-Boot API to do it in a less "hacky" way. void DbgUartSendStr(const char* s) { DW_UART_REG* uart_reg_ptr = (DW_UART_REG*)(EMSDP_DBG_UART_BASE); const char* src = s; diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf index 2d7954217d3..95732d2a8b9 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf +++ b/tensorflow/lite/micro/examples/person_detection_experimental/arc_emsdp/emsdp.lcf @@ -1,7 +1,6 @@ # Difference with common EMSDP LCF file (to reduce data access time): # - move data from external PSRAM to on-chip memory # - move text from SRAM to ICCM -# - TODO: Move tensor arena to DCCM to reduce data flow between fast and extrnal memory # # CCMWRAP memory regions indicate unusable portions of the address space # due to CCM memory wrapping into upper addresses beyond its size @@ -46,8 +45,6 @@ SECTIONS { } > SRAM GROUP BLOCK(4): { -# TODO: Move tensor arena to DCCM when it will be possible -# .tensor_arena? : {} .Zdata? : {} .heap? ALIGN(4) SIZE(DEFINED _HEAPSIZE?_HEAPSIZE:8K): {} .stack ALIGN(4) SIZE(DEFINED _STACKSIZE?_STACKSIZE:8K): {} diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc index 4a2676821d9..b80d220a1cc 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -52,7 +52,6 @@ struct OpData { int output_shift; // Per channel output multiplier and shift. - // TODO(b/141139247): Allocate these dynamically when possible. int32_t per_channel_output_multiplier[kMaxChannels]; int32_t per_channel_output_shift[kMaxChannels]; diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc index 9eb9d6499dd..7703bec3602 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv_slicing_test.cc @@ -256,7 +256,6 @@ void TestConvQuantizedPerChannel( CreateQuantizedTensor(output_data, output_dims, output_scale, output_zero_point, "output_tensor"); - // TODO(njeff): Affine Quantization Params should be set on tensor creation. float input_scales[] = {1, input_scale}; int input_zero_points[] = {1, input_zero_point}; TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 2aad76bc042..e46f4766fce 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -54,7 +54,6 @@ struct OpData { int output_shift; // Per channel output multiplier and shift. - // TODO(b/141139247): Allocate these dynamically when possible. int32_t per_channel_output_multiplier[kMaxChannels]; int32_t per_channel_output_shift[kMaxChannels]; @@ -74,9 +73,8 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, // MLI optimized version only supports int8 dataype, dilation factor of 1 and // per-axis quantization of weights (no broadcasting/per-tensor) - // TODO: ((in_ch == filters_num) || (in_ch == 1)) is a forbidding of + // (in_ch == filters_num) || (in_ch == 1)) is a forbidding of // channel multiplier logic for multichannel input. - // To be removed after it will be supported in MLI bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc index e6a87ff82e6..03a9fcbb30b 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv_slicing_test.cc @@ -152,7 +152,6 @@ void TestDepthwiseConvQuantizedPerChannel( CreateQuantizedTensor(output_data, output_dims, output_scale, input_zero_point, "output_tensor"); - // TODO(njeff): Affine Quantization Params should be set on tensor creation. float input_scales[] = {1, input_scale}; int input_zero_points[] = {1, input_zero_point}; TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 89eae356f51..c2e35dbc8dc 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -236,7 +236,6 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, op_params.weights_offset = -filter->params.zero_point; op_params.output_offset = output->params.zero_point; op_params.output_multiplier = data->output_multiplier; - // TODO(b/138810107): Figure out whether output shift should be inverted op_params.output_shift = -data->output_shift; op_params.quantized_activation_min = data->output_activation_min; op_params.quantized_activation_max = data->output_activation_max; diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index 79deacc23d9..0d79fc5dbcf 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -46,7 +46,6 @@ enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLitePoolParams* params) { // MLI optimized version only supports int8 dataype and no fused Relu - // TODO: subject to add mli_saturate kernel return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone); } diff --git a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc index 097908e30ab..1518513649f 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.cc @@ -54,7 +54,6 @@ static void get_arc_two_buffer_sizes(int request_size_1, int request_size_2, int } else { // In case only one buffer is available, // use only the max buffer, and split it. - // TODO compute optimal split ratio based on request ratio. *grant_size_1 = maxavailable / 2; *grant_size_2 = maxavailable / 2; } @@ -228,7 +227,7 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( const int padding_bot, int *in_slice_height, int *out_slice_height) { - const int height_dimension = 1; // todo: compute from rank + const int height_dimension = 1; const int in_height = in->shape[height_dimension]; const int out_height = out->shape[height_dimension]; const int line_size_in = mli_hlp_count_elem_num(in, height_dimension + 1) * mli_hlp_tensor_element_size(in); @@ -250,7 +249,7 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io( // in this case only two slices are needed, so both could benefit from padding. take the MIN to get the worst case. max_out_lines_for_input = (max_lines_in + std::min(padding_top, padding_bot) - kernel_height + 1) / stride_height; } else { - max_out_lines_for_input = (max_lines_in - kernel_height + 1) / stride_height; // TODO add padding exceptions and test by makin fit=false; + max_out_lines_for_input = (max_lines_in - kernel_height + 1) / stride_height; } // Ten compute how many ouput lines fit into the output tensor. max_lines_out = std::min(out_height, static_cast(out->capacity) / line_size_out);