From dbedeebd8d8bcade1c2606363aebb0a481822197 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 7 Oct 2020 13:43:26 -0700 Subject: [PATCH] Switch TFLM DepthwiseConv2D kernels to flat namespace. This is incremental progress towards a flat namespace for TFLM. See https://abseil.io/tips/130 for more context. Best effort change to the arc_mli implementation. All the others (reference, cmsis-nn and xtensa_hifimini) build. PiperOrigin-RevId: 335944013 Change-Id: I80b7cb5c6649f9036550417526e872635160556e --- .../micro/kernels/arc_mli/depthwise_conv.cc | 15 +-- .../micro/kernels/cmsis-nn/depthwise_conv.cc | 15 +-- .../lite/micro/kernels/depthwise_conv.cc | 15 +-- .../lite/micro/kernels/depthwise_conv_test.cc | 3 +- tensorflow/lite/micro/kernels/micro_ops.h | 2 +- .../kernels/xtensa_hifimini/depthwise_conv.cc | 118 ++++++++---------- .../lite/micro/micro_mutable_op_resolver.h | 3 +- 7 files changed, 68 insertions(+), 103 deletions(-) diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index ae98b996987..d30a5308708 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -31,9 +31,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h" namespace tflite { -namespace ops { -namespace micro { -namespace depthwise_conv { namespace { constexpr int kInputTensor = 0; @@ -127,8 +124,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -514,19 +509,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace depthwise_conv +} // namespace TfLiteRegistration Register_DEPTHWISE_CONV_2D() { - return {/*init=*/depthwise_conv::Init, + return {/*init=*/Init, /*free=*/nullptr, - /*prepare=*/depthwise_conv::Prepare, - /*invoke=*/depthwise_conv::Eval, + /*prepare=*/Prepare, + /*invoke=*/Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, /*version=*/0}; } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index bc91f5f94c3..3a59b71c985 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -28,9 +28,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace depthwise_conv { namespace { constexpr int kInputTensor = 0; @@ -104,8 +101,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -464,19 +459,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace depthwise_conv +} // namespace TfLiteRegistration Register_DEPTHWISE_CONV_2D() { - return {/*init=*/depthwise_conv::Init, + return {/*init=*/Init, /*free=*/nullptr, - /*prepare=*/depthwise_conv::Prepare, - /*invoke=*/depthwise_conv::Eval, + /*prepare=*/Prepare, + /*invoke=*/Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, /*version=*/0}; } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index cfb457c2016..85b51233e90 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -27,9 +27,6 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { -namespace ops { -namespace micro { -namespace depthwise_conv { namespace { constexpr int kInputTensor = 0; @@ -101,8 +98,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -315,19 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace depthwise_conv +} // namespace TfLiteRegistration Register_DEPTHWISE_CONV_2D() { - return {/*init=*/depthwise_conv::Init, + return {/*init=*/Init, /*free=*/nullptr, - /*prepare=*/depthwise_conv::Prepare, - /*invoke=*/depthwise_conv::Eval, + /*prepare=*/Prepare, + /*invoke=*/Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, /*version=*/0}; } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index e16e9f893cb..d4be5b6b1da 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -47,8 +47,7 @@ TfLiteStatus ValidateDepthwiseConvGoldens( int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = - tflite::ops::micro::Register_DEPTHWISE_CONV_2D(); + const TfLiteRegistration registration = Register_DEPTHWISE_CONV_2D(); micro::KernelRunner runner( registration, tensors, tensors_size, inputs_array, outputs_array, reinterpret_cast(conv_params), micro_test::reporter); diff --git a/tensorflow/lite/micro/kernels/micro_ops.h b/tensorflow/lite/micro/kernels/micro_ops.h index b21a12109b3..3210a24c7f5 100644 --- a/tensorflow/lite/micro/kernels/micro_ops.h +++ b/tensorflow/lite/micro/kernels/micro_ops.h @@ -32,6 +32,7 @@ namespace tflite { // have their Register function declarations in the tflite namespace. TfLiteRegistration Register_CONV_2D(); +TfLiteRegistration Register_DEPTHWISE_CONV_2D(); TfLiteRegistration Register_SHAPE(); namespace ops { @@ -47,7 +48,6 @@ TfLiteRegistration Register_CEIL(); TfLiteRegistration* Register_CIRCULAR_BUFFER(); TfLiteRegistration Register_CONCATENATION(); TfLiteRegistration Register_COS(); -TfLiteRegistration Register_DEPTHWISE_CONV_2D(); TfLiteRegistration Register_DEQUANTIZE(); TfLiteRegistration Register_EQUAL(); TfLiteRegistration Register_FLOOR(); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index 0d79d56ad0c..73da39cf23b 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -28,11 +28,38 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" namespace tflite { -namespace ops { -namespace micro { -namespace depthwise_conv { -namespace xtensa { -namespace hifimini { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kFilterTensor = 1; +constexpr int kBiasTensor = 2; +constexpr int kOutputTensor = 0; + +// Depthwise conv is quantized along dimension 3: +// https://www.tensorflow.org/lite/performance/quantization_spec +constexpr int kDepthwiseConvQuantizedDimension = 3; + +struct OpData { + TfLitePaddingValues padding; + // The scaling factor from input to output (aka the 'real multiplier') can + // be represented as a fixed point multiplier plus a left shift. + int32_t output_multiplier; + int output_shift; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t output_zero_point; + + // Per channel output multiplier and shift. + // TODO(b/141139247): Allocate these dynamically when possible. + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; + + // The range of the fused activation layer. For example for kNone and + // uint8_t these would be 0 and 255. + int32_t output_activation_min; + int32_t output_activation_max; +}; inline void DepthwiseConvPerChannel( const DepthwiseParams& params, const int32_t* output_multiplier, @@ -145,9 +172,10 @@ inline void DepthwiseConvPerChannel( // Apply quantized multiplier and accumulate result at 48bit // alignment: - acc_56 = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( - acc_24x2, output_multiplier[output_channel], - output_shift[output_channel]); + acc_56 = + ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( + acc_24x2, output_multiplier[output_channel], + output_shift[output_channel]); // Add output offset, cap activation, and assign to the output: acc_56 = AE_ADDQ56(acc_56, output_offset_56); @@ -260,11 +288,11 @@ inline void DepthwiseConv4x32MatchingInputAndFilter( // Apply quantized multiplier and accumulate result at 48bit // alignment: - block_0_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( + block_0_acc = ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( acc_24x2_0, mult, shift); // Apply quantized multiplier and accumulate result at 48bit // alignment: - block_1_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( + block_1_acc = ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier( acc_24x2_1, mult, shift); // Add output offset, cap activation, and assign to the output: @@ -280,42 +308,6 @@ inline void DepthwiseConv4x32MatchingInputAndFilter( } } -} // namespace hifimini -} // namespace xtensa - -namespace { - -constexpr int kInputTensor = 0; -constexpr int kFilterTensor = 1; -constexpr int kBiasTensor = 2; -constexpr int kOutputTensor = 0; - -// Depthwise conv is quantized along dimension 3: -// https://www.tensorflow.org/lite/performance/quantization_spec -constexpr int kDepthwiseConvQuantizedDimension = 3; - -struct OpData { - TfLitePaddingValues padding; - // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. - int32_t output_multiplier; - int output_shift; - - // Cached tensor zero point values for quantized operations. - int32_t input_zero_point; - int32_t output_zero_point; - - // Per channel output multiplier and shift. - // TODO(b/141139247): Allocate these dynamically when possible. - int32_t* per_channel_output_multiplier; - int32_t* per_channel_output_shift; - - // The range of the fused activation layer. For example for kNone and - // uint8_t these would be 0 and 255. - int32_t output_activation_min; - int32_t output_activation_max; -}; - TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, int width, int height, int filter_width, int filter_height, @@ -353,8 +345,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } -} // namespace - void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); return context->AllocatePersistentBuffer(context, sizeof(OpData)); @@ -437,16 +427,16 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_min = std::numeric_limits::min(); op_params.quantized_activation_max = std::numeric_limits::max(); - xtensa::hifimini::DepthwiseConvPerChannel( - op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), - tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), - tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), - tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); + DepthwiseConvPerChannel(op_params, data->per_channel_output_multiplier, + data->per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -473,7 +463,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (input_dims[0] == 1 && input_dims[1] == 4 && input_dims[2] == 1 && input_dims[3] == 32 && filter_dims[0] == 1 && filter_dims[1] == 4 && filter_dims[2] == 1 && filter_dims[3] == 32) { - xtensa::hifimini::DepthwiseConv4x32MatchingInputAndFilter( + DepthwiseConv4x32MatchingInputAndFilter( -op_data->input_zero_point, op_data->output_zero_point, std::numeric_limits::min(), std::numeric_limits::max(), op_data->per_channel_output_multiplier, @@ -500,19 +490,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -} // namespace depthwise_conv +} // namespace TfLiteRegistration Register_DEPTHWISE_CONV_2D() { - return {/*init=*/depthwise_conv::Init, + return {/*init=*/Init, /*free=*/nullptr, - /*prepare=*/depthwise_conv::Prepare, - /*invoke=*/depthwise_conv::Eval, + /*prepare=*/Prepare, + /*invoke=*/Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, /*version=*/0}; } -} // namespace micro -} // namespace ops } // namespace tflite diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index c8475aadd19..d1d516573ef 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -160,8 +160,7 @@ class MicroMutableOpResolver : public MicroOpResolver { TfLiteStatus AddDepthwiseConv2D() { return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, - tflite::ops::micro::Register_DEPTHWISE_CONV_2D(), - ParseDepthwiseConv2D); + Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D); } TfLiteStatus AddDequantize() {