From dbedeebd8d8bcade1c2606363aebb0a481822197 Mon Sep 17 00:00:00 2001
From: Advait Jain <advaitjain@google.com>
Date: Wed, 7 Oct 2020 13:43:26 -0700
Subject: [PATCH] Switch TFLM DepthwiseConv2D kernels to flat namespace.

This is incremental progress towards a flat namespace for TFLM. See
https://abseil.io/tips/130 for more context.

Best effort change to the arc_mli implementation. All the others (reference,
cmsis-nn and xtensa_hifimini) build.

PiperOrigin-RevId: 335944013
Change-Id: I80b7cb5c6649f9036550417526e872635160556e
---
 .../micro/kernels/arc_mli/depthwise_conv.cc   |  15 +--
 .../micro/kernels/cmsis-nn/depthwise_conv.cc  |  15 +--
 .../lite/micro/kernels/depthwise_conv.cc      |  15 +--
 .../lite/micro/kernels/depthwise_conv_test.cc |   3 +-
 tensorflow/lite/micro/kernels/micro_ops.h     |   2 +-
 .../kernels/xtensa_hifimini/depthwise_conv.cc | 118 ++++++++----------
 .../lite/micro/micro_mutable_op_resolver.h    |   3 +-
 7 files changed, 68 insertions(+), 103 deletions(-)

diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc
index ae98b996987..d30a5308708 100644
--- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc
@@ -31,9 +31,6 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace depthwise_conv {
 namespace {
 
 constexpr int kInputTensor = 0;
@@ -127,8 +124,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -514,19 +509,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace depthwise_conv
+}  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/depthwise_conv::Init,
+  return {/*init=*/Init,
           /*free=*/nullptr,
-          /*prepare=*/depthwise_conv::Prepare,
-          /*invoke=*/depthwise_conv::Eval,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
           /*profiling_string=*/nullptr,
           /*builtin_code=*/0,
           /*custom_name=*/nullptr,
           /*version=*/0};
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
index bc91f5f94c3..3a59b71c985 100644
--- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
@@ -28,9 +28,6 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace depthwise_conv {
 namespace {
 
 constexpr int kInputTensor = 0;
@@ -104,8 +101,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -464,19 +459,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace depthwise_conv
+}  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/depthwise_conv::Init,
+  return {/*init=*/Init,
           /*free=*/nullptr,
-          /*prepare=*/depthwise_conv::Prepare,
-          /*invoke=*/depthwise_conv::Eval,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
           /*profiling_string=*/nullptr,
           /*builtin_code=*/0,
           /*custom_name=*/nullptr,
           /*version=*/0};
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc
index cfb457c2016..85b51233e90 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -27,9 +27,6 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace depthwise_conv {
 namespace {
 
 constexpr int kInputTensor = 0;
@@ -101,8 +98,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -315,19 +310,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace depthwise_conv
+}  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/depthwise_conv::Init,
+  return {/*init=*/Init,
           /*free=*/nullptr,
-          /*prepare=*/depthwise_conv::Prepare,
-          /*invoke=*/depthwise_conv::Eval,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
           /*profiling_string=*/nullptr,
           /*builtin_code=*/0,
           /*custom_name=*/nullptr,
           /*version=*/0};
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
index e16e9f893cb..d4be5b6b1da 100644
--- a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
+++ b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc
@@ -47,8 +47,7 @@ TfLiteStatus ValidateDepthwiseConvGoldens(
   int outputs_array_data[] = {1, 3};
   TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
 
-  const TfLiteRegistration registration =
-      tflite::ops::micro::Register_DEPTHWISE_CONV_2D();
+  const TfLiteRegistration registration = Register_DEPTHWISE_CONV_2D();
   micro::KernelRunner runner(
       registration, tensors, tensors_size, inputs_array, outputs_array,
       reinterpret_cast<void*>(conv_params), micro_test::reporter);
diff --git a/tensorflow/lite/micro/kernels/micro_ops.h b/tensorflow/lite/micro/kernels/micro_ops.h
index b21a12109b3..3210a24c7f5 100644
--- a/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/tensorflow/lite/micro/kernels/micro_ops.h
@@ -32,6 +32,7 @@ namespace tflite {
 // have their Register function declarations in the tflite namespace.
 
 TfLiteRegistration Register_CONV_2D();
+TfLiteRegistration Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration Register_SHAPE();
 
 namespace ops {
@@ -47,7 +48,6 @@ TfLiteRegistration Register_CEIL();
 TfLiteRegistration* Register_CIRCULAR_BUFFER();
 TfLiteRegistration Register_CONCATENATION();
 TfLiteRegistration Register_COS();
-TfLiteRegistration Register_DEPTHWISE_CONV_2D();
 TfLiteRegistration Register_DEQUANTIZE();
 TfLiteRegistration Register_EQUAL();
 TfLiteRegistration Register_FLOOR();
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
index 0d79d56ad0c..73da39cf23b 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
@@ -28,11 +28,38 @@ limitations under the License.
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace depthwise_conv {
-namespace xtensa {
-namespace hifimini {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kFilterTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+
+// Depthwise conv is quantized along dimension 3:
+// https://www.tensorflow.org/lite/performance/quantization_spec
+constexpr int kDepthwiseConvQuantizedDimension = 3;
+
+struct OpData {
+  TfLitePaddingValues padding;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+
+  // Cached tensor zero point values for quantized operations.
+  int32_t input_zero_point;
+  int32_t output_zero_point;
+
+  // Per channel output multiplier and shift.
+  // TODO(b/141139247): Allocate these dynamically when possible.
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
+
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+};
 
 inline void DepthwiseConvPerChannel(
     const DepthwiseParams& params, const int32_t* output_multiplier,
@@ -145,9 +172,10 @@ inline void DepthwiseConvPerChannel(
 
             // Apply quantized multiplier and accumulate result at 48bit
             // alignment:
-            acc_56 = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
-                acc_24x2, output_multiplier[output_channel],
-                output_shift[output_channel]);
+            acc_56 =
+                ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
+                    acc_24x2, output_multiplier[output_channel],
+                    output_shift[output_channel]);
 
             // Add output offset, cap activation, and assign to the output:
             acc_56 = AE_ADDQ56(acc_56, output_offset_56);
@@ -260,11 +288,11 @@ inline void DepthwiseConv4x32MatchingInputAndFilter(
 
     // Apply quantized multiplier and accumulate result at 48bit
     // alignment:
-    block_0_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
+    block_0_acc = ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
         acc_24x2_0, mult, shift);
     // Apply quantized multiplier and accumulate result at 48bit
     // alignment:
-    block_1_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
+    block_1_acc = ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
         acc_24x2_1, mult, shift);
 
     // Add output offset, cap activation, and assign to the output:
@@ -280,42 +308,6 @@ inline void DepthwiseConv4x32MatchingInputAndFilter(
   }
 }
 
-}  // namespace hifimini
-}  // namespace xtensa
-
-namespace {
-
-constexpr int kInputTensor = 0;
-constexpr int kFilterTensor = 1;
-constexpr int kBiasTensor = 2;
-constexpr int kOutputTensor = 0;
-
-// Depthwise conv is quantized along dimension 3:
-// https://www.tensorflow.org/lite/performance/quantization_spec
-constexpr int kDepthwiseConvQuantizedDimension = 3;
-
-struct OpData {
-  TfLitePaddingValues padding;
-  // The scaling factor from input to output (aka the 'real multiplier') can
-  // be represented as a fixed point multiplier plus a left shift.
-  int32_t output_multiplier;
-  int output_shift;
-
-  // Cached tensor zero point values for quantized operations.
-  int32_t input_zero_point;
-  int32_t output_zero_point;
-
-  // Per channel output multiplier and shift.
-  // TODO(b/141139247): Allocate these dynamically when possible.
-  int32_t* per_channel_output_multiplier;
-  int32_t* per_channel_output_shift;
-
-  // The range of the fused activation layer. For example for kNone and
-  // uint8_t these would be 0 and 255.
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-};
-
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
                              TfLiteDepthwiseConvParams* params, int width,
                              int height, int filter_width, int filter_height,
@@ -353,8 +345,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -437,16 +427,16 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
   op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
 
-  xtensa::hifimini::DepthwiseConvPerChannel(
-      op_params, data->per_channel_output_multiplier,
-      data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
-      tflite::micro::GetTensorData<int8_t>(input),
-      tflite::micro::GetTensorShape(filter),
-      tflite::micro::GetTensorData<int8_t>(filter),
-      tflite::micro::GetTensorShape(bias),
-      tflite::micro::GetTensorData<int32_t>(bias),
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<int8_t>(output));
+  DepthwiseConvPerChannel(op_params, data->per_channel_output_multiplier,
+                          data->per_channel_output_shift,
+                          tflite::micro::GetTensorShape(input),
+                          tflite::micro::GetTensorData<int8_t>(input),
+                          tflite::micro::GetTensorShape(filter),
+                          tflite::micro::GetTensorData<int8_t>(filter),
+                          tflite::micro::GetTensorShape(bias),
+                          tflite::micro::GetTensorData<int32_t>(bias),
+                          tflite::micro::GetTensorShape(output),
+                          tflite::micro::GetTensorData<int8_t>(output));
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@@ -473,7 +463,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (input_dims[0] == 1 && input_dims[1] == 4 && input_dims[2] == 1 &&
       input_dims[3] == 32 && filter_dims[0] == 1 && filter_dims[1] == 4 &&
       filter_dims[2] == 1 && filter_dims[3] == 32) {
-    xtensa::hifimini::DepthwiseConv4x32MatchingInputAndFilter(
+    DepthwiseConv4x32MatchingInputAndFilter(
         -op_data->input_zero_point, op_data->output_zero_point,
         std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
         op_data->per_channel_output_multiplier,
@@ -500,19 +490,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace depthwise_conv
+}  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/depthwise_conv::Init,
+  return {/*init=*/Init,
           /*free=*/nullptr,
-          /*prepare=*/depthwise_conv::Prepare,
-          /*invoke=*/depthwise_conv::Eval,
+          /*prepare=*/Prepare,
+          /*invoke=*/Eval,
           /*profiling_string=*/nullptr,
           /*builtin_code=*/0,
           /*custom_name=*/nullptr,
           /*version=*/0};
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h
index c8475aadd19..d1d516573ef 100644
--- a/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -160,8 +160,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
 
   TfLiteStatus AddDepthwiseConv2D() {
     return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
-                      tflite::ops::micro::Register_DEPTHWISE_CONV_2D(),
-                      ParseDepthwiseConv2D);
+                      Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
   }
 
   TfLiteStatus AddDequantize() {