Split delegate_test.cc: move tests of InvokeWithCPUFallback into

a new unit test interpreter_utils_test.cc, to match the name of the file interpreter_utils.cc which defines InvokeWithCPUFallback. This required moving the test infrastructure that is now shared between delegate_test.cc and interpreter_test.cc into a separate compilation unit delegate_test_utils.{h,cc}. PiperOrigin-RevId: 345034742 Change-Id: If91f0d70d0b4d8c160685f13d6e313dd16fd5425
2020-12-01 09:21:35 -08:00 · 2020-12-01 09:21:35 -08:00 · 0b2e5e42d5
commit 0b2e5e42d5
parent d10afb111a
6 changed files with 848 additions and 656 deletions
--- a/tensorflow/lite/delegates/BUILD
+++ b/tensorflow/lite/delegates/BUILD
@ -67,18 +67,20 @@ cc_test(
 )

 cc_test(
-    name = "delegate_test",
+    name = "interpreter_utils_test",
    size = "small",
-    srcs = ["delegate_test.cc"],
+    srcs = ["interpreter_utils_test.cc"],
    features = ["-dynamic_link_test_srcs"],  # see go/dynamic_link_test_srcs
    deps = [
+        ":delegate_test_util",
        ":interpreter_utils",
        ":utils",
        "//tensorflow/lite:framework",
        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:string",
        "//tensorflow/lite:util",
        "//tensorflow/lite:version",
-        "//tensorflow/lite/core/api",
+        "//tensorflow/lite/c:common",
        "//tensorflow/lite/kernels:builtin_ops",
        "//tensorflow/lite/kernels:kernel_util",
        "//tensorflow/lite/kernels/internal:compatibility",
@ -87,5 +89,59 @@ cc_test(
        "//tensorflow/lite/testing:util",
        "//third_party/eigen3",
        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+cc_test(
+    name = "delegate_test",
+    size = "small",
+    srcs = ["delegate_test.cc"],
+    features = ["-dynamic_link_test_srcs"],  # see go/dynamic_link_test_srcs
+    deps = [
+        ":delegate_test_util",
+        ":interpreter_utils",
+        ":utils",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:string",
+        "//tensorflow/lite:util",
+        "//tensorflow/lite:version",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/kernels:kernel_util",
+        "//tensorflow/lite/kernels/internal:compatibility",
+        "//tensorflow/lite/schema:schema_conversion_utils",
+        "//tensorflow/lite/schema:schema_fbs",
+        "//tensorflow/lite/testing:util",
+        "//third_party/eigen3",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+cc_library(
+    name = "delegate_test_util",
+    testonly = True,
+    srcs = ["delegate_test_util.cc"],
+    hdrs = ["delegate_test_util.h"],
+    deps = [
+        ":interpreter_utils",
+        ":utils",
+        "//tensorflow/lite:builtin_ops",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:string",
+        "//tensorflow/lite:util",
+        "//tensorflow/lite:version",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/kernels:kernel_util",
+        "//tensorflow/lite/kernels/internal:compatibility",
+        "//tensorflow/lite/schema:schema_conversion_utils",
+        "//tensorflow/lite/schema:schema_fbs",
+        "//tensorflow/lite/testing:util",
+        "//third_party/eigen3",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
    ],
 )
--- a/tensorflow/lite/delegates/delegate_test.cc
+++ b/tensorflow/lite/delegates/delegate_test.cc
@ -14,352 +14,32 @@ limitations under the License.
 ==============================================================================*/

 #include <stdint.h>
+#include <stdlib.h>
+#include <string.h>

 #include <memory>
+#include <utility>
+#include <vector>

-#include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include "third_party/eigen3/Eigen/Core"
-#include "tensorflow/lite/builtin_op_data.h"
-#include "tensorflow/lite/builtin_ops.h"
-#include "tensorflow/lite/delegates/interpreter_utils.h"
-#include "tensorflow/lite/delegates/utils.h"
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/delegates/delegate_test_util.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/interpreter_builder.h"
-#include "tensorflow/lite/kernels/builtin_op_kernels.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/schema/schema_conversion_utils.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/testing/util.h"
-#include "tensorflow/lite/util.h"
 #include "tensorflow/lite/version.h"

 namespace tflite {
-namespace {
+namespace delegates {

-// Build a kernel registration for a custom addition op that adds its two
-// tensor inputs to produce a tensor output.
-TfLiteRegistration AddOpRegistration() {
-  TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
+using test_utils::TestDelegate;
+using test_utils::TestFP16Delegation;

-  reg.custom_name = "my_add";
-  reg.builtin_code = tflite::BuiltinOperator_CUSTOM;
-
-  reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-    const TfLiteTensor* input1;
-    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
-    const TfLiteTensor* input2;
-    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input2));
-    TfLiteTensor* output;
-    TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
-
-    // Verify that the two inputs have the same shape.
-    TF_LITE_ENSURE_EQ(context, input1->dims->size, input2->dims->size);
-    for (int i = 0; i < input1->dims->size; ++i) {
-      TF_LITE_ENSURE_EQ(context, input1->dims->data[i], input2->dims->data[i]);
-    }
-
-    // Set output shape to match input shape.
-    TF_LITE_ENSURE_STATUS(context->ResizeTensor(
-        context, output, TfLiteIntArrayCopy(input1->dims)));
-    return kTfLiteOk;
-  };
-
-  reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
-    const TfLiteTensor* a0;
-    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &a0));
-    TF_LITE_ENSURE(context, a0);
-    TF_LITE_ENSURE(context, a0->data.f);
-    const TfLiteTensor* a1;
-    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &a1));
-    TF_LITE_ENSURE(context, a1);
-    TF_LITE_ENSURE(context, a1->data.f);
-    TfLiteTensor* out;
-    TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
-    TF_LITE_ENSURE(context, out);
-    TF_LITE_ENSURE(context, out->data.f);
-    // Set output data to element-wise sum of input data.
-    int num = a0->dims->data[0];
-    for (int i = 0; i < num; i++) {
-      out->data.f[i] = a0->data.f[i] + a1->data.f[i];
-    }
-    return kTfLiteOk;
-  };
-  return reg;
-}
-
-}  // namespace
-
-// TestDelegate is a friend of Interpreter to access RemoveAllDelegates().
-class TestDelegate : public ::testing::Test {
- protected:
-  void SetUp() override {
-    interpreter_.reset(new Interpreter);
-    interpreter_->AddTensors(5);
-    interpreter_->SetInputs({0, 1});
-    interpreter_->SetOutputs({3, 4});
-    TfLiteQuantizationParams quant;
-    interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3},
-                                               quant);
-    TfLiteRegistration reg = AddOpRegistration();
-    interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, &reg);
-    interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, &reg);
-    interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, &reg);
-  }
-
-  void TearDown() override {
-    // Interpreter relies on delegate to free the resources properly. Thus
-    // the life cycle of delegate must be longer than interpreter.
-    interpreter_.reset();
-    delegate_.reset();
-  }
-
-  TfLiteBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle;
-
-  TfLiteBufferHandle AllocateBufferHandle() { return ++last_allocated_handle_; }
-
-  TfLiteStatus RemoveAllDelegates() {
-    return interpreter_->RemoveAllDelegates();
-  }
-
- protected:
-  class SimpleDelegate {
-   public:
-    // Create a simple implementation of a TfLiteDelegate. We use the C++ class
-    // SimpleDelegate and it can produce a handle TfLiteDelegate that is
-    // value-copyable and compatible with TfLite.
-    // fail_node_prepare: To simulate failure of Delegate node's Prepare().
-    // min_ops_per_subset: If >0, partitioning preview is used to choose only
-    // those subsets with min_ops_per_subset number of nodes.
-    // fail_node_invoke: To simulate failure of Delegate node's Invoke().
-    // automatic_shape_propagation: This assumes that the runtime will propagate
-    // shapes using the original execution plan.
-    explicit SimpleDelegate(const std::vector<int>& nodes,
-                            int64_t delegate_flags = kTfLiteDelegateFlagsNone,
-                            bool fail_node_prepare = false,
-                            int min_ops_per_subset = 0,
-                            bool fail_node_invoke = false,
-                            bool automatic_shape_propagation = false)
-        : nodes_(nodes),
-          fail_delegate_node_prepare_(fail_node_prepare),
-          min_ops_per_subset_(min_ops_per_subset),
-          fail_delegate_node_invoke_(fail_node_invoke),
-          automatic_shape_propagation_(automatic_shape_propagation) {
-      delegate_.Prepare = [](TfLiteContext* context,
-                             TfLiteDelegate* delegate) -> TfLiteStatus {
-        auto* simple = static_cast<SimpleDelegate*>(delegate->data_);
-        TfLiteIntArray* nodes_to_separate =
-            TfLiteIntArrayCreate(simple->nodes_.size());
-        // Mark nodes that we want in TfLiteIntArray* structure.
-        int index = 0;
-        for (auto node_index : simple->nodes_) {
-          nodes_to_separate->data[index++] = node_index;
-          // make sure node is added
-          TfLiteNode* node;
-          TfLiteRegistration* reg;
-          context->GetNodeAndRegistration(context, node_index, &node, &reg);
-          TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
-          TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
-        }
-        // Check that all nodes are available
-        TfLiteIntArray* execution_plan;
-        TF_LITE_ENSURE_STATUS(
-            context->GetExecutionPlan(context, &execution_plan));
-        for (int exec_index = 0; exec_index < execution_plan->size;
-             exec_index++) {
-          int node_index = execution_plan->data[exec_index];
-          TfLiteNode* node;
-          TfLiteRegistration* reg;
-          context->GetNodeAndRegistration(context, node_index, &node, &reg);
-          if (exec_index == node_index) {
-            // Check op details only if it wasn't delegated already.
-            TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
-            TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
-          }
-        }
-
-        // Get preview of delegate partitioning from the context.
-        TfLiteDelegateParams* params_array;
-        int num_partitions;
-        TFLITE_CHECK_EQ(
-            context->PreviewDelegatePartitioning(
-                context, nodes_to_separate, &params_array, &num_partitions),
-            kTfLiteOk);
-
-        if (simple->min_ops_per_subset() > 0) {
-          // Build a new vector of ops from subsets with at least the minimum
-          // size.
-          std::vector<int> allowed_ops;
-          for (int idx = 0; idx < num_partitions; ++idx) {
-            const auto* nodes_in_subset = params_array[idx].nodes_to_replace;
-            if (nodes_in_subset->size < simple->min_ops_per_subset()) continue;
-            allowed_ops.insert(allowed_ops.end(), nodes_in_subset->data,
-                               nodes_in_subset->data + nodes_in_subset->size);
-          }
-
-          // Free existing nodes_to_separate & initialize a new array with
-          // allowed_ops.
-          TfLiteIntArrayFree(nodes_to_separate);
-          nodes_to_separate = TfLiteIntArrayCreate(allowed_ops.size());
-          memcpy(nodes_to_separate->data, allowed_ops.data(),
-                 sizeof(int) * nodes_to_separate->size);
-        }
-
-        // Another call to PreviewDelegateParitioning should be okay, since
-        // partitioning memory is managed by context.
-        TFLITE_CHECK_EQ(
-            context->PreviewDelegatePartitioning(
-                context, nodes_to_separate, &params_array, &num_partitions),
-            kTfLiteOk);
-
-        context->ReplaceNodeSubsetsWithDelegateKernels(
-            context, simple->FakeFusedRegistration(), nodes_to_separate,
-            delegate);
-        TfLiteIntArrayFree(nodes_to_separate);
-        return kTfLiteOk;
-      };
-      delegate_.CopyToBufferHandle = [](TfLiteContext* context,
-                                        TfLiteDelegate* delegate,
-                                        TfLiteBufferHandle buffer_handle,
-                                        TfLiteTensor* tensor) -> TfLiteStatus {
-        // TODO(b/156586986): Implement tests to test buffer copying logic.
-        return kTfLiteOk;
-      };
-      delegate_.CopyFromBufferHandle =
-          [](TfLiteContext* context, TfLiteDelegate* delegate,
-             TfLiteBufferHandle buffer_handle,
-             TfLiteTensor* output) -> TfLiteStatus {
-        TFLITE_CHECK_GE(buffer_handle, -1);
-        TFLITE_CHECK_EQ(output->buffer_handle, buffer_handle);
-        const float floats[] = {6., 6., 6.};
-        int num = output->dims->data[0];
-        for (int i = 0; i < num; i++) {
-          output->data.f[i] = floats[i];
-        }
-        return kTfLiteOk;
-      };
-
-      delegate_.FreeBufferHandle =
-          [](TfLiteContext* context, TfLiteDelegate* delegate,
-             TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; };
-      // Store type-punned data SimpleDelegate structure.
-      delegate_.data_ = static_cast<void*>(this);
-      delegate_.flags = delegate_flags;
-    }
-
-    TfLiteRegistration FakeFusedRegistration() {
-      TfLiteRegistration reg = {nullptr};
-      reg.custom_name = "fake_fused_op";
-
-      // Different flavors of the delegate kernel's Invoke(), dependent on
-      // testing parameters.
-      if (fail_delegate_node_invoke_) {
-        reg.invoke = [](TfLiteContext* context,
-                        TfLiteNode* node) -> TfLiteStatus {
-          return kTfLiteError;
-        };
-      } else {
-        reg.invoke = [](TfLiteContext* context,
-                        TfLiteNode* node) -> TfLiteStatus {
-          // Copy input data to output data.
-          const TfLiteTensor* a0;
-          const TfLiteTensor* a1;
-          if (node->inputs->size == 2) {
-            a0 = GetInput(context, node, 0);
-            a1 = GetInput(context, node, 1);
-          } else {
-            a0 = GetInput(context, node, 0);
-            a1 = a0;
-          }
-          TfLiteTensor* out;
-          TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
-          int num = 1;
-          for (int i = 0; i < a0->dims->size; ++i) {
-            num *= a0->dims->data[i];
-          }
-          for (int i = 0; i < num; i++) {
-            out->data.f[i] = a0->data.f[i] + a1->data.f[i];
-          }
-          if (out->buffer_handle != kTfLiteNullBufferHandle) {
-            // Make the data stale so that CopyFromBufferHandle can be invoked
-            out->data_is_stale = true;
-          }
-          return kTfLiteOk;
-        };
-      }
-
-      // Different flavors of the delegate kernel's Prepare(), dependent on
-      // testing parameters.
-      if (automatic_shape_propagation_) {
-        reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-          // Shapes should already by propagated by the runtime, just need to
-          // check.
-          const TfLiteTensor* input1;
-          TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
-          TfLiteTensor* output;
-          TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
-          const int input_dims_size = input1->dims->size;
-          TF_LITE_ENSURE(context, output->dims->size == input_dims_size);
-          for (int i = 0; i < input_dims_size; ++i) {
-            TF_LITE_ENSURE(context,
-                           output->dims->data[i] == input1->dims->data[i]);
-          }
-          return kTfLiteOk;
-        };
-      } else if (fail_delegate_node_prepare_) {
-        reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-          return kTfLiteError;
-        };
-      } else {
-        reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-          // Set output size to input size
-          const TfLiteTensor* input1;
-          const TfLiteTensor* input2;
-          if (node->inputs->size == 2) {
-            input1 = GetInput(context, node, 0);
-            input2 = GetInput(context, node, 1);
-          } else {
-            input1 = GetInput(context, node, 0);
-            input2 = input1;
-          }
-          TfLiteTensor* output;
-          TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
-
-          TF_LITE_ENSURE_STATUS(context->ResizeTensor(
-              context, output, TfLiteIntArrayCopy(input1->dims)));
-          return kTfLiteOk;
-        };
-      }
-
-      return reg;
-    }
-
-    TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
-
-    int min_ops_per_subset() { return min_ops_per_subset_; }
-
-   private:
-    std::vector<int> nodes_;
-    TfLiteDelegate delegate_;
-    bool fail_delegate_node_prepare_ = false;
-    int min_ops_per_subset_ = 0;
-    bool fail_delegate_node_invoke_ = false;
-    bool automatic_shape_propagation_ = false;
-  };
-
-  std::unique_ptr<Interpreter> interpreter_;
-  std::unique_ptr<SimpleDelegate> delegate_, delegate2_;
-};
 namespace {

 TEST_F(TestDelegate, BasicDelegate) {
@ -444,34 +124,6 @@ TEST_F(TestDelegate, DelegateNodeInvokeFailure) {
  }
 }

-TEST_F(TestDelegate, DelegateNodeInvokeFailureFallback) {
-  delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
-      {0, 1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
-      0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
-  ASSERT_EQ(
-      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
-      kTfLiteOk);
-  // Delegation modified execution plan.
-  ASSERT_EQ(interpreter_->execution_plan().size(), 1);
-
-  std::vector<float> input = {1.0f, 2.0f, 3.0f};
-  std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
-  constexpr int kOutputTensorIndex = 3;
-
-  memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
-  memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
-  EXPECT_EQ(
-      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
-      kTfLiteDelegateError);
-  // Delegation removed, returning to original execution plan.
-  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
-  // Check outputs.
-  TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
-  for (int i = 0; i < 3; ++i) {
-    EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
-  }
-}
-
 TEST_F(TestDelegate, SecondDelegationPrepareFailure) {
  // First delegate only supports nodes 1, 2. Gets applied successfully.
  // This delegate should support dynamic tensors, otherwise the second won't be
@ -940,44 +592,6 @@ TEST_F(TestDelegate, TestRequirePropagatedShapes_MultipleDelegates) {
  }
 }

-TEST_F(TestDelegate, TestFallbackWithMultipleDelegates) {
-  // First delegate only supports node 0.
-  // This delegate should support dynamic tensors, otherwise the second won't be
-  // applied.
-  delegate_ = std::unique_ptr<SimpleDelegate>(
-      new SimpleDelegate({0}, kTfLiteDelegateFlagsAllowDynamicTensors));
-  // Second delegate supports nodes 1 & 2, and makes the graph immutable.
-  delegate2_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
-      {1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
-      0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
-  // Pre-delegation execution plan should have three nodes.
-  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
-  ASSERT_EQ(
-      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
-      kTfLiteOk);
-  ASSERT_EQ(
-      interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()),
-      kTfLiteOk);
-  // Should be two delegates nodes.
-  ASSERT_EQ(interpreter_->execution_plan().size(), 2);
-
-  std::vector<float> input = {1.0f, 2.0f, 3.0f};
-  std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
-  constexpr int kOutputTensorIndex = 2;
-  TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
-
-  memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
-  memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
-  EXPECT_EQ(
-      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
-      kTfLiteDelegateError);
-  // All delegates should be undone.
-  EXPECT_EQ(interpreter_->execution_plan().size(), 3);
-  for (int i = 0; i < 3; ++i) {
-    EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
-  }
-}
-
 TEST_F(TestDelegate, ReleaseNonPersistentMemoryWithDelegates) {
  // First delegate only supports node 0.
  // This delegate should support dynamic tensors, otherwise the second won't be
@ -1297,232 +911,6 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) {
 // Tests for FP16 graphs
 // =====================

-// Tests delegate functionality related to FP16 graphs.
-// Model architecture:
-// 1->DEQ->2   4->DEQ->5   7->DEQ->8   10->DEQ->11
-//         |           |           |            |
-// 0----->ADD->3----->ADD->6----->MUL->9------>ADD-->12
-// Input: 0, Output:12.
-// All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10
-//
-// Delegate only supports ADD, so can have up to two delegated partitions.
-// TODO(b/156707497): Add more cases here once we have landed CPU kernels
-// supporting FP16.
-class TestFP16Delegation : public ::testing::TestWithParam<int> {
- protected:
-  void SetUp() override {
-    interpreter_.reset(new Interpreter);
-    interpreter_->AddTensors(13);
-    interpreter_->SetInputs({0});
-    interpreter_->SetOutputs({12});
-
-    float16_const_ = Eigen::half_impl::float_to_half_rtne(2.f);
-
-    // TENSORS.
-    TfLiteQuantizationParams quant;
-    // Input.
-    interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {1},
-                                               quant);
-    // fp16 constant, dequantize output, Add0 output.
-    interpreter_->SetTensorParametersReadOnly(
-        1, kTfLiteFloat16, "", {1}, quant,
-        reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
-    interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {1},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {1},
-                                               quant);
-    // fp16 constant, dequantize output, Add1 output.
-    interpreter_->SetTensorParametersReadOnly(
-        4, kTfLiteFloat16, "", {1}, quant,
-        reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
-    interpreter_->SetTensorParametersReadWrite(5, kTfLiteFloat32, "", {1},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(6, kTfLiteFloat32, "", {1},
-                                               quant);
-    // fp16 constant, dequantize output, Mul0 output.
-    interpreter_->SetTensorParametersReadOnly(
-        7, kTfLiteFloat16, "", {1}, quant,
-        reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
-    interpreter_->SetTensorParametersReadWrite(8, kTfLiteFloat32, "", {1},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(9, kTfLiteFloat32, "", {1},
-                                               quant);
-    // fp16 constant, dequantize output, Add2 output.
-    interpreter_->SetTensorParametersReadOnly(
-        10, kTfLiteFloat16, "", {1}, quant,
-        reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
-    interpreter_->SetTensorParametersReadWrite(11, kTfLiteFloat32, "", {1},
-                                               quant);
-    interpreter_->SetTensorParametersReadWrite(12, kTfLiteFloat32, "", {1},
-                                               quant);
-
-    // NODES.
-    auto* add_reg = ops::builtin::Register_ADD();
-    auto* mul_reg = ops::builtin::Register_MUL();
-    auto* deq_reg = ops::builtin::Register_DEQUANTIZE();
-    add_reg->builtin_code = kTfLiteBuiltinAdd;
-    deq_reg->builtin_code = kTfLiteBuiltinDequantize;
-    mul_reg->builtin_code = kTfLiteBuiltinMul;
-    TfLiteAddParams* builtin_data0 =
-        reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
-    TfLiteAddParams* builtin_data1 =
-        reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
-    TfLiteMulParams* builtin_data2 =
-        reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
-    TfLiteAddParams* builtin_data3 =
-        reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
-    builtin_data0->activation = kTfLiteActNone;
-    builtin_data1->activation = kTfLiteActNone;
-    builtin_data2->activation = kTfLiteActNone;
-    builtin_data3->activation = kTfLiteActNone;
-    interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, deq_reg);
-    interpreter_->AddNodeWithParameters({0, 2}, {3}, nullptr, 0, builtin_data0,
-                                        add_reg);
-    interpreter_->AddNodeWithParameters({4}, {5}, nullptr, 0, nullptr, deq_reg);
-    interpreter_->AddNodeWithParameters({3, 5}, {6}, nullptr, 0, builtin_data1,
-                                        add_reg);
-    interpreter_->AddNodeWithParameters({7}, {8}, nullptr, 0, nullptr, deq_reg);
-    interpreter_->AddNodeWithParameters({6, 8}, {9}, nullptr, 0, builtin_data2,
-                                        mul_reg);
-    interpreter_->AddNodeWithParameters({10}, {11}, nullptr, 0, nullptr,
-                                        deq_reg);
-    interpreter_->AddNodeWithParameters({9, 11}, {12}, nullptr, 0,
-                                        builtin_data3, add_reg);
-  }
-
-  void VerifyInvoke() {
-    std::vector<float> input = {3.0f};
-    std::vector<float> expected_output = {16.0f};
-
-    const int input_tensor_idx = interpreter_->inputs()[0];
-    const int output_tensor_idx = interpreter_->outputs()[0];
-
-    memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
-           sizeof(float));
-    ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
-    TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
-    for (int i = 0; i < 1; ++i) {
-      EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
-    }
-  }
-
-  void TearDown() override { interpreter_.reset(); }
-
- protected:
-  class FP16Delegate {
-   public:
-    // Uses FP16GraphPartitionHelper to accept ADD nodes with fp16 input.
-    explicit FP16Delegate(int num_delegated_subsets,
-                          bool fail_node_prepare = false,
-                          bool fail_node_invoke = false)
-        : num_delegated_subsets_(num_delegated_subsets),
-          fail_delegate_node_prepare_(fail_node_prepare),
-          fail_delegate_node_invoke_(fail_node_invoke) {
-      delegate_.Prepare = [](TfLiteContext* context,
-                             TfLiteDelegate* delegate) -> TfLiteStatus {
-        auto* fp16_delegate = static_cast<FP16Delegate*>(delegate->data_);
-        // FP16 graph partitioning.
-        delegates::IsNodeSupportedFn node_supported_fn =
-            [=](TfLiteContext* context, TfLiteNode* node,
-                TfLiteRegistration* registration,
-                std::string* unsupported_details) -> bool {
-          return registration->builtin_code == kTfLiteBuiltinAdd;
-        };
-        delegates::FP16GraphPartitionHelper partition_helper(context,
-                                                             node_supported_fn);
-        TfLiteIntArray* nodes_to_separate = nullptr;
-        if (partition_helper.Partition(nullptr) != kTfLiteOk) {
-          nodes_to_separate = TfLiteIntArrayCreate(0);
-        } else {
-          std::vector<int> ops_to_replace =
-              partition_helper.GetNodesOfFirstNLargestPartitions(
-                  fp16_delegate->num_delegated_subsets());
-          nodes_to_separate = ConvertVectorToTfLiteIntArray(ops_to_replace);
-        }
-
-        context->ReplaceNodeSubsetsWithDelegateKernels(
-            context, fp16_delegate->FakeFusedRegistration(), nodes_to_separate,
-            delegate);
-        TfLiteIntArrayFree(nodes_to_separate);
-        return kTfLiteOk;
-      };
-      delegate_.CopyFromBufferHandle =
-          [](TfLiteContext* context, TfLiteDelegate* delegate,
-             TfLiteBufferHandle buffer_handle,
-             TfLiteTensor* output) -> TfLiteStatus { return kTfLiteOk; };
-      delegate_.FreeBufferHandle = nullptr;
-      delegate_.CopyToBufferHandle = nullptr;
-      // Store type-punned data SimpleDelegate structure.
-      delegate_.data_ = static_cast<void*>(this);
-      delegate_.flags = kTfLiteDelegateFlagsNone;
-    }
-
-    TfLiteRegistration FakeFusedRegistration() {
-      TfLiteRegistration reg = {nullptr};
-      reg.custom_name = "fake_fp16_add_op";
-
-      // Different flavors of the delegate kernel's Invoke(), dependent on
-      // testing parameters.
-      if (fail_delegate_node_invoke_) {
-        reg.invoke = [](TfLiteContext* context,
-                        TfLiteNode* node) -> TfLiteStatus {
-          return kTfLiteError;
-        };
-      } else {
-        reg.invoke = [](TfLiteContext* context,
-                        TfLiteNode* node) -> TfLiteStatus {
-          float output = 0;
-          for (int i = 0; i < node->inputs->size; ++i) {
-            const TfLiteTensor* input_tensor = GetInput(context, node, i);
-            if (input_tensor->type == kTfLiteFloat32) {
-              output += input_tensor->data.f[0];
-            } else {
-              // All constants are 2.
-              output += 2;
-            }
-          }
-          TfLiteTensor* out = GetOutput(context, node, 0);
-          out->data.f[0] = output;
-          return kTfLiteOk;
-        };
-      }
-
-      // Different flavors of the delegate kernel's Prepare(), dependent on
-      // testing parameters.
-      if (fail_delegate_node_prepare_) {
-        reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-          return kTfLiteError;
-        };
-      } else {
-        reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
-          // Set output size to input size
-          const TfLiteTensor* input = GetInput(context, node, 0);
-          TfLiteTensor* output = GetOutput(context, node, 0);
-          TF_LITE_ENSURE_STATUS(context->ResizeTensor(
-              context, output, TfLiteIntArrayCopy(input->dims)));
-          return kTfLiteOk;
-        };
-      }
-
-      return reg;
-    }
-
-    TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
-
-    int num_delegated_subsets() { return num_delegated_subsets_; }
-
-   private:
-    TfLiteDelegate delegate_;
-    int num_delegated_subsets_;
-    bool fail_delegate_node_prepare_ = false;
-    bool fail_delegate_node_invoke_ = false;
-  };
-
-  std::unique_ptr<Interpreter> interpreter_;
-  std::unique_ptr<FP16Delegate> delegate_;
-  Eigen::half float16_const_;
-};
-
 TEST_P(TestFP16Delegation, NonDelegatedInterpreterWorks) {
  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
  VerifyInvoke();
@ -1551,38 +939,11 @@ TEST_P(TestFP16Delegation, DelegatePrepareFails) {
  VerifyInvoke();
 }

-TEST_P(TestFP16Delegation, DelegateInvokeWithCPUFallback) {
-  delegate_ = std::unique_ptr<FP16Delegate>(new FP16Delegate(
-      /**num_delegated_subsets**/ GetParam(), /**fail_node_prepare**/ false,
-      /**fail_node_invoke**/ true));
-  ASSERT_EQ(
-      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
-      kTfLiteOk);
-
-  std::vector<float> input = {3.0f};
-  std::vector<float> expected_output = {16.0f};
-
-  const int input_tensor_idx = interpreter_->inputs()[0];
-  const int output_tensor_idx = interpreter_->outputs()[0];
-
-  memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
-         sizeof(float));
-  EXPECT_EQ(
-      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
-      kTfLiteDelegateError);
-  TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
-  for (int i = 0; i < 1; ++i) {
-    EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
-  }
-
-  ASSERT_EQ(interpreter_->execution_plan().size(), 8);
-  VerifyInvoke();
-}
-
 INSTANTIATE_TEST_SUITE_P(TestFP16Delegation, TestFP16Delegation,
                         ::testing::Values(1, 2));

-}  // namespace
+}  // anonymous namespace
+}  // namespace delegates
 }  // namespace tflite

 int main(int argc, char** argv) {
--- a/tensorflow/lite/delegates/delegate_test_util.cc
+++ b/tensorflow/lite/delegates/delegate_test_util.cc
@ -0,0 +1,493 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/delegate_test_util.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "third_party/eigen3/Eigen/Core"
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/delegates/utils.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/builtin_op_kernels.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/string_type.h"
+#include "tensorflow/lite/util.h"
+
+namespace tflite {
+namespace delegates {
+namespace test_utils {
+
+TfLiteRegistration AddOpRegistration() {
+  TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
+
+  reg.custom_name = "my_add";
+  reg.builtin_code = tflite::BuiltinOperator_CUSTOM;
+
+  reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+    const TfLiteTensor* input1;
+    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
+    const TfLiteTensor* input2;
+    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input2));
+    TfLiteTensor* output;
+    TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+
+    // Verify that the two inputs have the same shape.
+    TF_LITE_ENSURE_EQ(context, input1->dims->size, input2->dims->size);
+    for (int i = 0; i < input1->dims->size; ++i) {
+      TF_LITE_ENSURE_EQ(context, input1->dims->data[i], input2->dims->data[i]);
+    }
+
+    // Set output shape to match input shape.
+    TF_LITE_ENSURE_STATUS(context->ResizeTensor(
+        context, output, TfLiteIntArrayCopy(input1->dims)));
+    return kTfLiteOk;
+  };
+
+  reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
+    const TfLiteTensor* a0;
+    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &a0));
+    TF_LITE_ENSURE(context, a0);
+    TF_LITE_ENSURE(context, a0->data.f);
+    const TfLiteTensor* a1;
+    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &a1));
+    TF_LITE_ENSURE(context, a1);
+    TF_LITE_ENSURE(context, a1->data.f);
+    TfLiteTensor* out;
+    TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
+    TF_LITE_ENSURE(context, out);
+    TF_LITE_ENSURE(context, out->data.f);
+    // Set output data to element-wise sum of input data.
+    int num = a0->dims->data[0];
+    for (int i = 0; i < num; i++) {
+      out->data.f[i] = a0->data.f[i] + a1->data.f[i];
+    }
+    return kTfLiteOk;
+  };
+  return reg;
+}
+
+void TestDelegate::SetUp() {
+  interpreter_.reset(new Interpreter);
+  interpreter_->AddTensors(5);
+  interpreter_->SetInputs({0, 1});
+  interpreter_->SetOutputs({3, 4});
+  TfLiteQuantizationParams quant;
+  interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, quant);
+  interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, quant);
+  interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, quant);
+  interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, quant);
+  interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3}, quant);
+  TfLiteRegistration reg = AddOpRegistration();
+  interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, &reg);
+  interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, &reg);
+  interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, &reg);
+}
+
+void TestDelegate::TearDown() {
+  // Interpreter relies on delegate to free the resources properly. Thus
+  // the life cycle of delegate must be longer than interpreter.
+  interpreter_.reset();
+  delegate_.reset();
+}
+
+TestDelegate::SimpleDelegate::SimpleDelegate(const std::vector<int>& nodes,
+                                             int64_t delegate_flags,
+                                             bool fail_node_prepare,
+                                             int min_ops_per_subset,
+                                             bool fail_node_invoke,
+                                             bool automatic_shape_propagation)
+    : nodes_(nodes),
+      fail_delegate_node_prepare_(fail_node_prepare),
+      min_ops_per_subset_(min_ops_per_subset),
+      fail_delegate_node_invoke_(fail_node_invoke),
+      automatic_shape_propagation_(automatic_shape_propagation) {
+  delegate_.Prepare = [](TfLiteContext* context,
+                         TfLiteDelegate* delegate) -> TfLiteStatus {
+    auto* simple = static_cast<SimpleDelegate*>(delegate->data_);
+    TfLiteIntArray* nodes_to_separate =
+        TfLiteIntArrayCreate(simple->nodes_.size());
+    // Mark nodes that we want in TfLiteIntArray* structure.
+    int index = 0;
+    for (auto node_index : simple->nodes_) {
+      nodes_to_separate->data[index++] = node_index;
+      // make sure node is added
+      TfLiteNode* node;
+      TfLiteRegistration* reg;
+      context->GetNodeAndRegistration(context, node_index, &node, &reg);
+      TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
+      TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
+    }
+    // Check that all nodes are available
+    TfLiteIntArray* execution_plan;
+    TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
+    for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
+      int node_index = execution_plan->data[exec_index];
+      TfLiteNode* node;
+      TfLiteRegistration* reg;
+      context->GetNodeAndRegistration(context, node_index, &node, &reg);
+      if (exec_index == node_index) {
+        // Check op details only if it wasn't delegated already.
+        TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
+        TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
+      }
+    }
+
+    // Get preview of delegate partitioning from the context.
+    TfLiteDelegateParams* params_array;
+    int num_partitions;
+    TFLITE_CHECK_EQ(
+        context->PreviewDelegatePartitioning(context, nodes_to_separate,
+                                             &params_array, &num_partitions),
+        kTfLiteOk);
+
+    if (simple->min_ops_per_subset() > 0) {
+      // Build a new vector of ops from subsets with at least the minimum
+      // size.
+      std::vector<int> allowed_ops;
+      for (int idx = 0; idx < num_partitions; ++idx) {
+        const auto* nodes_in_subset = params_array[idx].nodes_to_replace;
+        if (nodes_in_subset->size < simple->min_ops_per_subset()) continue;
+        allowed_ops.insert(allowed_ops.end(), nodes_in_subset->data,
+                           nodes_in_subset->data + nodes_in_subset->size);
+      }
+
+      // Free existing nodes_to_separate & initialize a new array with
+      // allowed_ops.
+      TfLiteIntArrayFree(nodes_to_separate);
+      nodes_to_separate = TfLiteIntArrayCreate(allowed_ops.size());
+      memcpy(nodes_to_separate->data, allowed_ops.data(),
+             sizeof(int) * nodes_to_separate->size);
+    }
+
+    // Another call to PreviewDelegateParitioning should be okay, since
+    // partitioning memory is managed by context.
+    TFLITE_CHECK_EQ(
+        context->PreviewDelegatePartitioning(context, nodes_to_separate,
+                                             &params_array, &num_partitions),
+        kTfLiteOk);
+
+    context->ReplaceNodeSubsetsWithDelegateKernels(
+        context, simple->FakeFusedRegistration(), nodes_to_separate, delegate);
+    TfLiteIntArrayFree(nodes_to_separate);
+    return kTfLiteOk;
+  };
+  delegate_.CopyToBufferHandle = [](TfLiteContext* context,
+                                    TfLiteDelegate* delegate,
+                                    TfLiteBufferHandle buffer_handle,
+                                    TfLiteTensor* tensor) -> TfLiteStatus {
+    // TODO(b/156586986): Implement tests to test buffer copying logic.
+    return kTfLiteOk;
+  };
+  delegate_.CopyFromBufferHandle = [](TfLiteContext* context,
+                                      TfLiteDelegate* delegate,
+                                      TfLiteBufferHandle buffer_handle,
+                                      TfLiteTensor* output) -> TfLiteStatus {
+    TFLITE_CHECK_GE(buffer_handle, -1);
+    TFLITE_CHECK_EQ(output->buffer_handle, buffer_handle);
+    const float floats[] = {6., 6., 6.};
+    int num = output->dims->data[0];
+    for (int i = 0; i < num; i++) {
+      output->data.f[i] = floats[i];
+    }
+    return kTfLiteOk;
+  };
+
+  delegate_.FreeBufferHandle =
+      [](TfLiteContext* context, TfLiteDelegate* delegate,
+         TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; };
+  // Store type-punned data SimpleDelegate structure.
+  delegate_.data_ = static_cast<void*>(this);
+  delegate_.flags = delegate_flags;
+}
+
+TfLiteRegistration TestDelegate::SimpleDelegate::FakeFusedRegistration() {
+  TfLiteRegistration reg = {nullptr};
+  reg.custom_name = "fake_fused_op";
+
+  // Different flavors of the delegate kernel's Invoke(), dependent on
+  // testing parameters.
+  if (fail_delegate_node_invoke_) {
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
+      return kTfLiteError;
+    };
+  } else {
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
+      // Copy input data to output data.
+      const TfLiteTensor* a0;
+      const TfLiteTensor* a1;
+      if (node->inputs->size == 2) {
+        a0 = GetInput(context, node, 0);
+        a1 = GetInput(context, node, 1);
+      } else {
+        a0 = GetInput(context, node, 0);
+        a1 = a0;
+      }
+      TfLiteTensor* out;
+      TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
+      int num = 1;
+      for (int i = 0; i < a0->dims->size; ++i) {
+        num *= a0->dims->data[i];
+      }
+      for (int i = 0; i < num; i++) {
+        out->data.f[i] = a0->data.f[i] + a1->data.f[i];
+      }
+      if (out->buffer_handle != kTfLiteNullBufferHandle) {
+        // Make the data stale so that CopyFromBufferHandle can be invoked
+        out->data_is_stale = true;
+      }
+      return kTfLiteOk;
+    };
+  }
+
+  // Different flavors of the delegate kernel's Prepare(), dependent on
+  // testing parameters.
+  if (automatic_shape_propagation_) {
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      // Shapes should already by propagated by the runtime, just need to
+      // check.
+      const TfLiteTensor* input1;
+      TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
+      TfLiteTensor* output;
+      TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+      const int input_dims_size = input1->dims->size;
+      TF_LITE_ENSURE(context, output->dims->size == input_dims_size);
+      for (int i = 0; i < input_dims_size; ++i) {
+        TF_LITE_ENSURE(context, output->dims->data[i] == input1->dims->data[i]);
+      }
+      return kTfLiteOk;
+    };
+  } else if (fail_delegate_node_prepare_) {
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      return kTfLiteError;
+    };
+  } else {
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      // Set output size to input size
+      const TfLiteTensor* input1;
+      const TfLiteTensor* input2;
+      if (node->inputs->size == 2) {
+        input1 = GetInput(context, node, 0);
+        input2 = GetInput(context, node, 1);
+      } else {
+        input1 = GetInput(context, node, 0);
+        input2 = input1;
+      }
+      TfLiteTensor* output;
+      TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+
+      TF_LITE_ENSURE_STATUS(context->ResizeTensor(
+          context, output, TfLiteIntArrayCopy(input1->dims)));
+      return kTfLiteOk;
+    };
+  }
+
+  return reg;
+}
+
+void TestFP16Delegation::SetUp() {
+  interpreter_.reset(new Interpreter);
+  interpreter_->AddTensors(13);
+  interpreter_->SetInputs({0});
+  interpreter_->SetOutputs({12});
+
+  float16_const_ = Eigen::half_impl::float_to_half_rtne(2.f);
+
+  // TENSORS.
+  TfLiteQuantizationParams quant;
+  // Input.
+  interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {1}, quant);
+  // fp16 constant, dequantize output, Add0 output.
+  interpreter_->SetTensorParametersReadOnly(
+      1, kTfLiteFloat16, "", {1}, quant,
+      reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
+  interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {1}, quant);
+  interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {1}, quant);
+  // fp16 constant, dequantize output, Add1 output.
+  interpreter_->SetTensorParametersReadOnly(
+      4, kTfLiteFloat16, "", {1}, quant,
+      reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
+  interpreter_->SetTensorParametersReadWrite(5, kTfLiteFloat32, "", {1}, quant);
+  interpreter_->SetTensorParametersReadWrite(6, kTfLiteFloat32, "", {1}, quant);
+  // fp16 constant, dequantize output, Mul0 output.
+  interpreter_->SetTensorParametersReadOnly(
+      7, kTfLiteFloat16, "", {1}, quant,
+      reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
+  interpreter_->SetTensorParametersReadWrite(8, kTfLiteFloat32, "", {1}, quant);
+  interpreter_->SetTensorParametersReadWrite(9, kTfLiteFloat32, "", {1}, quant);
+  // fp16 constant, dequantize output, Add2 output.
+  interpreter_->SetTensorParametersReadOnly(
+      10, kTfLiteFloat16, "", {1}, quant,
+      reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
+  interpreter_->SetTensorParametersReadWrite(11, kTfLiteFloat32, "", {1},
+                                             quant);
+  interpreter_->SetTensorParametersReadWrite(12, kTfLiteFloat32, "", {1},
+                                             quant);
+
+  // NODES.
+  auto* add_reg = ops::builtin::Register_ADD();
+  auto* mul_reg = ops::builtin::Register_MUL();
+  auto* deq_reg = ops::builtin::Register_DEQUANTIZE();
+  add_reg->builtin_code = kTfLiteBuiltinAdd;
+  deq_reg->builtin_code = kTfLiteBuiltinDequantize;
+  mul_reg->builtin_code = kTfLiteBuiltinMul;
+  TfLiteAddParams* builtin_data0 =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  TfLiteAddParams* builtin_data1 =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  TfLiteMulParams* builtin_data2 =
+      reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
+  TfLiteAddParams* builtin_data3 =
+      reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
+  builtin_data0->activation = kTfLiteActNone;
+  builtin_data1->activation = kTfLiteActNone;
+  builtin_data2->activation = kTfLiteActNone;
+  builtin_data3->activation = kTfLiteActNone;
+  interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, deq_reg);
+  interpreter_->AddNodeWithParameters({0, 2}, {3}, nullptr, 0, builtin_data0,
+                                      add_reg);
+  interpreter_->AddNodeWithParameters({4}, {5}, nullptr, 0, nullptr, deq_reg);
+  interpreter_->AddNodeWithParameters({3, 5}, {6}, nullptr, 0, builtin_data1,
+                                      add_reg);
+  interpreter_->AddNodeWithParameters({7}, {8}, nullptr, 0, nullptr, deq_reg);
+  interpreter_->AddNodeWithParameters({6, 8}, {9}, nullptr, 0, builtin_data2,
+                                      mul_reg);
+  interpreter_->AddNodeWithParameters({10}, {11}, nullptr, 0, nullptr, deq_reg);
+  interpreter_->AddNodeWithParameters({9, 11}, {12}, nullptr, 0, builtin_data3,
+                                      add_reg);
+}
+
+void TestFP16Delegation::VerifyInvoke() {
+  std::vector<float> input = {3.0f};
+  std::vector<float> expected_output = {16.0f};
+
+  const int input_tensor_idx = interpreter_->inputs()[0];
+  const int output_tensor_idx = interpreter_->outputs()[0];
+
+  memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
+         sizeof(float));
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
+  for (int i = 0; i < 1; ++i) {
+    EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
+  }
+}
+
+TestFP16Delegation::FP16Delegate::FP16Delegate(int num_delegated_subsets,
+                                               bool fail_node_prepare,
+                                               bool fail_node_invoke)
+    : num_delegated_subsets_(num_delegated_subsets),
+      fail_delegate_node_prepare_(fail_node_prepare),
+      fail_delegate_node_invoke_(fail_node_invoke) {
+  delegate_.Prepare = [](TfLiteContext* context,
+                         TfLiteDelegate* delegate) -> TfLiteStatus {
+    auto* fp16_delegate = static_cast<FP16Delegate*>(delegate->data_);
+    // FP16 graph partitioning.
+    delegates::IsNodeSupportedFn node_supported_fn =
+        [=](TfLiteContext* context, TfLiteNode* node,
+            TfLiteRegistration* registration,
+            std::string* unsupported_details) -> bool {
+      return registration->builtin_code == kTfLiteBuiltinAdd;
+    };
+    delegates::FP16GraphPartitionHelper partition_helper(context,
+                                                         node_supported_fn);
+    TfLiteIntArray* nodes_to_separate = nullptr;
+    if (partition_helper.Partition(nullptr) != kTfLiteOk) {
+      nodes_to_separate = TfLiteIntArrayCreate(0);
+    } else {
+      std::vector<int> ops_to_replace =
+          partition_helper.GetNodesOfFirstNLargestPartitions(
+              fp16_delegate->num_delegated_subsets());
+      nodes_to_separate = ConvertVectorToTfLiteIntArray(ops_to_replace);
+    }
+
+    context->ReplaceNodeSubsetsWithDelegateKernels(
+        context, fp16_delegate->FakeFusedRegistration(), nodes_to_separate,
+        delegate);
+    TfLiteIntArrayFree(nodes_to_separate);
+    return kTfLiteOk;
+  };
+  delegate_.CopyFromBufferHandle =
+      [](TfLiteContext* context, TfLiteDelegate* delegate,
+         TfLiteBufferHandle buffer_handle,
+         TfLiteTensor* output) -> TfLiteStatus { return kTfLiteOk; };
+  delegate_.FreeBufferHandle = nullptr;
+  delegate_.CopyToBufferHandle = nullptr;
+  // Store type-punned data SimpleDelegate structure.
+  delegate_.data_ = static_cast<void*>(this);
+  delegate_.flags = kTfLiteDelegateFlagsNone;
+}
+
+TfLiteRegistration TestFP16Delegation::FP16Delegate::FakeFusedRegistration() {
+  TfLiteRegistration reg = {nullptr};
+  reg.custom_name = "fake_fp16_add_op";
+
+  // Different flavors of the delegate kernel's Invoke(), dependent on
+  // testing parameters.
+  if (fail_delegate_node_invoke_) {
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
+      return kTfLiteError;
+    };
+  } else {
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
+      float output = 0;
+      for (int i = 0; i < node->inputs->size; ++i) {
+        const TfLiteTensor* input_tensor = GetInput(context, node, i);
+        if (input_tensor->type == kTfLiteFloat32) {
+          output += input_tensor->data.f[0];
+        } else {
+          // All constants are 2.
+          output += 2;
+        }
+      }
+      TfLiteTensor* out = GetOutput(context, node, 0);
+      out->data.f[0] = output;
+      return kTfLiteOk;
+    };
+  }
+
+  // Different flavors of the delegate kernel's Prepare(), dependent on
+  // testing parameters.
+  if (fail_delegate_node_prepare_) {
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      return kTfLiteError;
+    };
+  } else {
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      // Set output size to input size
+      const TfLiteTensor* input = GetInput(context, node, 0);
+      TfLiteTensor* output = GetOutput(context, node, 0);
+      TF_LITE_ENSURE_STATUS(context->ResizeTensor(
+          context, output, TfLiteIntArrayCopy(input->dims)));
+      return kTfLiteOk;
+    };
+  }
+
+  return reg;
+}
+
+}  // namespace test_utils
+}  // namespace delegates
+}  // namespace tflite
--- a/tensorflow/lite/delegates/delegate_test_util.h
+++ b/tensorflow/lite/delegates/delegate_test_util.h
@ -0,0 +1,138 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_
+#define TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "third_party/eigen3/Eigen/Core"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+
+namespace tflite {
+namespace delegates {
+namespace test_utils {
+
+// Build a kernel registration for a custom addition op that adds its two
+// tensor inputs to produce a tensor output.
+TfLiteRegistration AddOpRegistration();
+
+// TestDelegate is a friend of Interpreter to access RemoveAllDelegates().
+class TestDelegate : public ::testing::Test {
+ protected:
+  void SetUp() override;
+
+  void TearDown() override;
+
+  TfLiteBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle;
+
+  TfLiteBufferHandle AllocateBufferHandle() { return ++last_allocated_handle_; }
+
+  TfLiteStatus RemoveAllDelegates() {
+    return interpreter_->RemoveAllDelegates();
+  }
+
+ protected:
+  class SimpleDelegate {
+   public:
+    // Create a simple implementation of a TfLiteDelegate. We use the C++ class
+    // SimpleDelegate and it can produce a handle TfLiteDelegate that is
+    // value-copyable and compatible with TfLite.
+    // fail_node_prepare: To simulate failure of Delegate node's Prepare().
+    // min_ops_per_subset: If >0, partitioning preview is used to choose only
+    // those subsets with min_ops_per_subset number of nodes.
+    // fail_node_invoke: To simulate failure of Delegate node's Invoke().
+    // automatic_shape_propagation: This assumes that the runtime will propagate
+    // shapes using the original execution plan.
+    explicit SimpleDelegate(const std::vector<int>& nodes,
+                            int64_t delegate_flags = kTfLiteDelegateFlagsNone,
+                            bool fail_node_prepare = false,
+                            int min_ops_per_subset = 0,
+                            bool fail_node_invoke = false,
+                            bool automatic_shape_propagation = false);
+
+    TfLiteRegistration FakeFusedRegistration();
+
+    TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
+
+    int min_ops_per_subset() { return min_ops_per_subset_; }
+
+   private:
+    std::vector<int> nodes_;
+    TfLiteDelegate delegate_;
+    bool fail_delegate_node_prepare_ = false;
+    int min_ops_per_subset_ = 0;
+    bool fail_delegate_node_invoke_ = false;
+    bool automatic_shape_propagation_ = false;
+  };
+
+  std::unique_ptr<Interpreter> interpreter_;
+  std::unique_ptr<SimpleDelegate> delegate_, delegate2_;
+};
+
+// Tests delegate functionality related to FP16 graphs.
+// Model architecture:
+// 1->DEQ->2   4->DEQ->5   7->DEQ->8   10->DEQ->11
+//         |           |           |            |
+// 0----->ADD->3----->ADD->6----->MUL->9------>ADD-->12
+// Input: 0, Output:12.
+// All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10
+//
+// Delegate only supports ADD, so can have up to two delegated partitions.
+// TODO(b/156707497): Add more cases here once we have landed CPU kernels
+// supporting FP16.
+class TestFP16Delegation : public ::testing::TestWithParam<int> {
+ protected:
+  void SetUp() override;
+
+  void VerifyInvoke();
+
+  void TearDown() override { interpreter_.reset(); }
+
+ protected:
+  class FP16Delegate {
+   public:
+    // Uses FP16GraphPartitionHelper to accept ADD nodes with fp16 input.
+    explicit FP16Delegate(int num_delegated_subsets,
+                          bool fail_node_prepare = false,
+                          bool fail_node_invoke = false);
+
+    TfLiteRegistration FakeFusedRegistration();
+
+    TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
+
+    int num_delegated_subsets() { return num_delegated_subsets_; }
+
+   private:
+    TfLiteDelegate delegate_;
+    int num_delegated_subsets_;
+    bool fail_delegate_node_prepare_ = false;
+    bool fail_delegate_node_invoke_ = false;
+  };
+
+  std::unique_ptr<Interpreter> interpreter_;
+  std::unique_ptr<FP16Delegate> delegate_;
+  Eigen::half float16_const_;
+};
+
+}  // namespace test_utils
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_
--- a/tensorflow/lite/delegates/interpreter_utils_test.cc
+++ b/tensorflow/lite/delegates/interpreter_utils_test.cc
@ -0,0 +1,142 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/interpreter_utils.h"
+
+#include <string.h>
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/delegate_test_util.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/testing/util.h"
+
+namespace tflite {
+namespace delegates {
+
+using test_utils::TestDelegate;
+using test_utils::TestFP16Delegation;
+
+namespace {
+
+TEST_F(TestDelegate, DelegateNodeInvokeFailureFallback) {
+  delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
+      {0, 1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
+      0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  // Delegation modified execution plan.
+  ASSERT_EQ(interpreter_->execution_plan().size(), 1);
+
+  std::vector<float> input = {1.0f, 2.0f, 3.0f};
+  std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
+  constexpr int kOutputTensorIndex = 3;
+
+  memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
+  memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
+  EXPECT_EQ(
+      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
+      kTfLiteDelegateError);
+  // Delegation removed, returning to original execution plan.
+  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
+  // Check outputs.
+  TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
+  for (int i = 0; i < 3; ++i) {
+    EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
+  }
+}
+
+TEST_F(TestDelegate, TestFallbackWithMultipleDelegates) {
+  // First delegate only supports node 0.
+  // This delegate should support dynamic tensors, otherwise the second won't be
+  // applied.
+  delegate_ = std::unique_ptr<SimpleDelegate>(
+      new SimpleDelegate({0}, kTfLiteDelegateFlagsAllowDynamicTensors));
+  // Second delegate supports nodes 1 & 2, and makes the graph immutable.
+  delegate2_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
+      {1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
+      0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
+  // Pre-delegation execution plan should have three nodes.
+  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  // Should be two delegates nodes.
+  ASSERT_EQ(interpreter_->execution_plan().size(), 2);
+
+  std::vector<float> input = {1.0f, 2.0f, 3.0f};
+  std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
+  constexpr int kOutputTensorIndex = 2;
+  TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
+
+  memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
+  memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
+  EXPECT_EQ(
+      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
+      kTfLiteDelegateError);
+  // All delegates should be undone.
+  EXPECT_EQ(interpreter_->execution_plan().size(), 3);
+  for (int i = 0; i < 3; ++i) {
+    EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
+  }
+}
+
+TEST_P(TestFP16Delegation, DelegateInvokeWithCPUFallback) {
+  delegate_ = std::unique_ptr<FP16Delegate>(new FP16Delegate(
+      /**num_delegated_subsets**/ GetParam(), /**fail_node_prepare**/ false,
+      /**fail_node_invoke**/ true));
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+
+  std::vector<float> input = {3.0f};
+  std::vector<float> expected_output = {16.0f};
+
+  const int input_tensor_idx = interpreter_->inputs()[0];
+  const int output_tensor_idx = interpreter_->outputs()[0];
+
+  memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
+         sizeof(float));
+  EXPECT_EQ(
+      delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
+      kTfLiteDelegateError);
+  TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
+  for (int i = 0; i < 1; ++i) {
+    EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
+  }
+
+  ASSERT_EQ(interpreter_->execution_plan().size(), 8);
+  VerifyInvoke();
+}
+
+INSTANTIATE_TEST_SUITE_P(TestFP16Delegation, TestFP16Delegation,
+                         ::testing::Values(1, 2));
+
+}  // anonymous namespace
+}  // namespace delegates
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/tensorflow/lite/interpreter.h
+++ b/tensorflow/lite/interpreter.h
@ -40,10 +40,12 @@ limitations under the License.

 namespace tflite {

-class InterpreterTest;
-class TestDelegate;
+class InterpreterTest;  // Class for friend declarations.
 namespace delegates {
 class InterpreterUtils;  // Class for friend declarations.
+namespace test_utils {
+class TestDelegate;  // Class for friend declarations.
+}  // namespace test_utils
 }  // namespace delegates

 /// An interpreter for a graph of nodes that input and output from tensors.
@ -662,8 +664,8 @@ class Interpreter {
  };
  friend class InterpreterBuilder;
  friend class tflite::InterpreterTest;
-  friend class tflite::TestDelegate;
  friend class tflite::delegates::InterpreterUtils;
+  friend class tflite::delegates::test_utils::TestDelegate;

  /// Set the value of an external context.
  static void SetExternalContext(struct TfLiteContext* context,