Split delegate_test.cc: move tests of InvokeWithCPUFallback into

a new unit test interpreter_utils_test.cc, to match the name of the
file interpreter_utils.cc which defines InvokeWithCPUFallback.

This required moving the test infrastructure that is now shared
between delegate_test.cc and interpreter_test.cc into a separate
compilation unit delegate_test_utils.{h,cc}.

PiperOrigin-RevId: 345034742
Change-Id: If91f0d70d0b4d8c160685f13d6e313dd16fd5425
This commit is contained in:
Fergus Henderson 2020-12-01 09:21:35 -08:00 committed by TensorFlower Gardener
parent d10afb111a
commit 0b2e5e42d5
6 changed files with 848 additions and 656 deletions

View File

@ -67,18 +67,20 @@ cc_test(
)
cc_test(
name = "delegate_test",
name = "interpreter_utils_test",
size = "small",
srcs = ["delegate_test.cc"],
srcs = ["interpreter_utils_test.cc"],
features = ["-dynamic_link_test_srcs"], # see go/dynamic_link_test_srcs
deps = [
":delegate_test_util",
":interpreter_utils",
":utils",
"//tensorflow/lite:framework",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:string",
"//tensorflow/lite:util",
"//tensorflow/lite:version",
"//tensorflow/lite/core/api",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/kernels:kernel_util",
"//tensorflow/lite/kernels/internal:compatibility",
@ -87,5 +89,59 @@ cc_test(
"//tensorflow/lite/testing:util",
"//third_party/eigen3",
"@com_google_googletest//:gtest",
"@flatbuffers",
],
)
cc_test(
name = "delegate_test",
size = "small",
srcs = ["delegate_test.cc"],
features = ["-dynamic_link_test_srcs"], # see go/dynamic_link_test_srcs
deps = [
":delegate_test_util",
":interpreter_utils",
":utils",
"//tensorflow/lite:framework",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:string",
"//tensorflow/lite:util",
"//tensorflow/lite:version",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/kernels:kernel_util",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/schema:schema_conversion_utils",
"//tensorflow/lite/schema:schema_fbs",
"//tensorflow/lite/testing:util",
"//third_party/eigen3",
"@com_google_googletest//:gtest",
"@flatbuffers",
],
)
cc_library(
name = "delegate_test_util",
testonly = True,
srcs = ["delegate_test_util.cc"],
hdrs = ["delegate_test_util.h"],
deps = [
":interpreter_utils",
":utils",
"//tensorflow/lite:builtin_ops",
"//tensorflow/lite:framework",
"//tensorflow/lite:string",
"//tensorflow/lite:util",
"//tensorflow/lite:version",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/kernels:kernel_util",
"//tensorflow/lite/kernels/internal:compatibility",
"//tensorflow/lite/schema:schema_conversion_utils",
"//tensorflow/lite/schema:schema_fbs",
"//tensorflow/lite/testing:util",
"//third_party/eigen3",
"@com_google_googletest//:gtest",
"@flatbuffers",
],
)

View File

@ -14,352 +14,32 @@ limitations under the License.
==============================================================================*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <memory>
#include <utility>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "third_party/eigen3/Eigen/Core"
#include "tensorflow/lite/builtin_op_data.h"
#include "tensorflow/lite/builtin_ops.h"
#include "tensorflow/lite/delegates/interpreter_utils.h"
#include "tensorflow/lite/delegates/utils.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/delegates/delegate_test_util.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/interpreter_builder.h"
#include "tensorflow/lite/kernels/builtin_op_kernels.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/schema/schema_conversion_utils.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/testing/util.h"
#include "tensorflow/lite/util.h"
#include "tensorflow/lite/version.h"
namespace tflite {
namespace {
namespace delegates {
// Build a kernel registration for a custom addition op that adds its two
// tensor inputs to produce a tensor output.
TfLiteRegistration AddOpRegistration() {
TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
using test_utils::TestDelegate;
using test_utils::TestFP16Delegation;
reg.custom_name = "my_add";
reg.builtin_code = tflite::BuiltinOperator_CUSTOM;
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
// Verify that the two inputs have the same shape.
TF_LITE_ENSURE_EQ(context, input1->dims->size, input2->dims->size);
for (int i = 0; i < input1->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, input1->dims->data[i], input2->dims->data[i]);
}
// Set output shape to match input shape.
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input1->dims)));
return kTfLiteOk;
};
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* a0;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &a0));
TF_LITE_ENSURE(context, a0);
TF_LITE_ENSURE(context, a0->data.f);
const TfLiteTensor* a1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &a1));
TF_LITE_ENSURE(context, a1);
TF_LITE_ENSURE(context, a1->data.f);
TfLiteTensor* out;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
TF_LITE_ENSURE(context, out);
TF_LITE_ENSURE(context, out->data.f);
// Set output data to element-wise sum of input data.
int num = a0->dims->data[0];
for (int i = 0; i < num; i++) {
out->data.f[i] = a0->data.f[i] + a1->data.f[i];
}
return kTfLiteOk;
};
return reg;
}
} // namespace
// TestDelegate is a friend of Interpreter to access RemoveAllDelegates().
class TestDelegate : public ::testing::Test {
protected:
void SetUp() override {
interpreter_.reset(new Interpreter);
interpreter_->AddTensors(5);
interpreter_->SetInputs({0, 1});
interpreter_->SetOutputs({3, 4});
TfLiteQuantizationParams quant;
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3},
quant);
TfLiteRegistration reg = AddOpRegistration();
interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, &reg);
interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, &reg);
interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, &reg);
}
void TearDown() override {
// Interpreter relies on delegate to free the resources properly. Thus
// the life cycle of delegate must be longer than interpreter.
interpreter_.reset();
delegate_.reset();
}
TfLiteBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle;
TfLiteBufferHandle AllocateBufferHandle() { return ++last_allocated_handle_; }
TfLiteStatus RemoveAllDelegates() {
return interpreter_->RemoveAllDelegates();
}
protected:
class SimpleDelegate {
public:
// Create a simple implementation of a TfLiteDelegate. We use the C++ class
// SimpleDelegate and it can produce a handle TfLiteDelegate that is
// value-copyable and compatible with TfLite.
// fail_node_prepare: To simulate failure of Delegate node's Prepare().
// min_ops_per_subset: If >0, partitioning preview is used to choose only
// those subsets with min_ops_per_subset number of nodes.
// fail_node_invoke: To simulate failure of Delegate node's Invoke().
// automatic_shape_propagation: This assumes that the runtime will propagate
// shapes using the original execution plan.
explicit SimpleDelegate(const std::vector<int>& nodes,
int64_t delegate_flags = kTfLiteDelegateFlagsNone,
bool fail_node_prepare = false,
int min_ops_per_subset = 0,
bool fail_node_invoke = false,
bool automatic_shape_propagation = false)
: nodes_(nodes),
fail_delegate_node_prepare_(fail_node_prepare),
min_ops_per_subset_(min_ops_per_subset),
fail_delegate_node_invoke_(fail_node_invoke),
automatic_shape_propagation_(automatic_shape_propagation) {
delegate_.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
auto* simple = static_cast<SimpleDelegate*>(delegate->data_);
TfLiteIntArray* nodes_to_separate =
TfLiteIntArrayCreate(simple->nodes_.size());
// Mark nodes that we want in TfLiteIntArray* structure.
int index = 0;
for (auto node_index : simple->nodes_) {
nodes_to_separate->data[index++] = node_index;
// make sure node is added
TfLiteNode* node;
TfLiteRegistration* reg;
context->GetNodeAndRegistration(context, node_index, &node, &reg);
TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
}
// Check that all nodes are available
TfLiteIntArray* execution_plan;
TF_LITE_ENSURE_STATUS(
context->GetExecutionPlan(context, &execution_plan));
for (int exec_index = 0; exec_index < execution_plan->size;
exec_index++) {
int node_index = execution_plan->data[exec_index];
TfLiteNode* node;
TfLiteRegistration* reg;
context->GetNodeAndRegistration(context, node_index, &node, &reg);
if (exec_index == node_index) {
// Check op details only if it wasn't delegated already.
TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
}
}
// Get preview of delegate partitioning from the context.
TfLiteDelegateParams* params_array;
int num_partitions;
TFLITE_CHECK_EQ(
context->PreviewDelegatePartitioning(
context, nodes_to_separate, &params_array, &num_partitions),
kTfLiteOk);
if (simple->min_ops_per_subset() > 0) {
// Build a new vector of ops from subsets with at least the minimum
// size.
std::vector<int> allowed_ops;
for (int idx = 0; idx < num_partitions; ++idx) {
const auto* nodes_in_subset = params_array[idx].nodes_to_replace;
if (nodes_in_subset->size < simple->min_ops_per_subset()) continue;
allowed_ops.insert(allowed_ops.end(), nodes_in_subset->data,
nodes_in_subset->data + nodes_in_subset->size);
}
// Free existing nodes_to_separate & initialize a new array with
// allowed_ops.
TfLiteIntArrayFree(nodes_to_separate);
nodes_to_separate = TfLiteIntArrayCreate(allowed_ops.size());
memcpy(nodes_to_separate->data, allowed_ops.data(),
sizeof(int) * nodes_to_separate->size);
}
// Another call to PreviewDelegateParitioning should be okay, since
// partitioning memory is managed by context.
TFLITE_CHECK_EQ(
context->PreviewDelegatePartitioning(
context, nodes_to_separate, &params_array, &num_partitions),
kTfLiteOk);
context->ReplaceNodeSubsetsWithDelegateKernels(
context, simple->FakeFusedRegistration(), nodes_to_separate,
delegate);
TfLiteIntArrayFree(nodes_to_separate);
return kTfLiteOk;
};
delegate_.CopyToBufferHandle = [](TfLiteContext* context,
TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* tensor) -> TfLiteStatus {
// TODO(b/156586986): Implement tests to test buffer copying logic.
return kTfLiteOk;
};
delegate_.CopyFromBufferHandle =
[](TfLiteContext* context, TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* output) -> TfLiteStatus {
TFLITE_CHECK_GE(buffer_handle, -1);
TFLITE_CHECK_EQ(output->buffer_handle, buffer_handle);
const float floats[] = {6., 6., 6.};
int num = output->dims->data[0];
for (int i = 0; i < num; i++) {
output->data.f[i] = floats[i];
}
return kTfLiteOk;
};
delegate_.FreeBufferHandle =
[](TfLiteContext* context, TfLiteDelegate* delegate,
TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; };
// Store type-punned data SimpleDelegate structure.
delegate_.data_ = static_cast<void*>(this);
delegate_.flags = delegate_flags;
}
TfLiteRegistration FakeFusedRegistration() {
TfLiteRegistration reg = {nullptr};
reg.custom_name = "fake_fused_op";
// Different flavors of the delegate kernel's Invoke(), dependent on
// testing parameters.
if (fail_delegate_node_invoke_) {
reg.invoke = [](TfLiteContext* context,
TfLiteNode* node) -> TfLiteStatus {
return kTfLiteError;
};
} else {
reg.invoke = [](TfLiteContext* context,
TfLiteNode* node) -> TfLiteStatus {
// Copy input data to output data.
const TfLiteTensor* a0;
const TfLiteTensor* a1;
if (node->inputs->size == 2) {
a0 = GetInput(context, node, 0);
a1 = GetInput(context, node, 1);
} else {
a0 = GetInput(context, node, 0);
a1 = a0;
}
TfLiteTensor* out;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
int num = 1;
for (int i = 0; i < a0->dims->size; ++i) {
num *= a0->dims->data[i];
}
for (int i = 0; i < num; i++) {
out->data.f[i] = a0->data.f[i] + a1->data.f[i];
}
if (out->buffer_handle != kTfLiteNullBufferHandle) {
// Make the data stale so that CopyFromBufferHandle can be invoked
out->data_is_stale = true;
}
return kTfLiteOk;
};
}
// Different flavors of the delegate kernel's Prepare(), dependent on
// testing parameters.
if (automatic_shape_propagation_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Shapes should already by propagated by the runtime, just need to
// check.
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
const int input_dims_size = input1->dims->size;
TF_LITE_ENSURE(context, output->dims->size == input_dims_size);
for (int i = 0; i < input_dims_size; ++i) {
TF_LITE_ENSURE(context,
output->dims->data[i] == input1->dims->data[i]);
}
return kTfLiteOk;
};
} else if (fail_delegate_node_prepare_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
};
} else {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Set output size to input size
const TfLiteTensor* input1;
const TfLiteTensor* input2;
if (node->inputs->size == 2) {
input1 = GetInput(context, node, 0);
input2 = GetInput(context, node, 1);
} else {
input1 = GetInput(context, node, 0);
input2 = input1;
}
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input1->dims)));
return kTfLiteOk;
};
}
return reg;
}
TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
int min_ops_per_subset() { return min_ops_per_subset_; }
private:
std::vector<int> nodes_;
TfLiteDelegate delegate_;
bool fail_delegate_node_prepare_ = false;
int min_ops_per_subset_ = 0;
bool fail_delegate_node_invoke_ = false;
bool automatic_shape_propagation_ = false;
};
std::unique_ptr<Interpreter> interpreter_;
std::unique_ptr<SimpleDelegate> delegate_, delegate2_;
};
namespace {
TEST_F(TestDelegate, BasicDelegate) {
@ -444,34 +124,6 @@ TEST_F(TestDelegate, DelegateNodeInvokeFailure) {
}
}
TEST_F(TestDelegate, DelegateNodeInvokeFailureFallback) {
delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
{0, 1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
// Delegation modified execution plan.
ASSERT_EQ(interpreter_->execution_plan().size(), 1);
std::vector<float> input = {1.0f, 2.0f, 3.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
constexpr int kOutputTensorIndex = 3;
memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
// Delegation removed, returning to original execution plan.
ASSERT_EQ(interpreter_->execution_plan().size(), 3);
// Check outputs.
TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
}
TEST_F(TestDelegate, SecondDelegationPrepareFailure) {
// First delegate only supports nodes 1, 2. Gets applied successfully.
// This delegate should support dynamic tensors, otherwise the second won't be
@ -940,44 +592,6 @@ TEST_F(TestDelegate, TestRequirePropagatedShapes_MultipleDelegates) {
}
}
TEST_F(TestDelegate, TestFallbackWithMultipleDelegates) {
// First delegate only supports node 0.
// This delegate should support dynamic tensors, otherwise the second won't be
// applied.
delegate_ = std::unique_ptr<SimpleDelegate>(
new SimpleDelegate({0}, kTfLiteDelegateFlagsAllowDynamicTensors));
// Second delegate supports nodes 1 & 2, and makes the graph immutable.
delegate2_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
{1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
// Pre-delegation execution plan should have three nodes.
ASSERT_EQ(interpreter_->execution_plan().size(), 3);
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()),
kTfLiteOk);
// Should be two delegates nodes.
ASSERT_EQ(interpreter_->execution_plan().size(), 2);
std::vector<float> input = {1.0f, 2.0f, 3.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
constexpr int kOutputTensorIndex = 2;
TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
// All delegates should be undone.
EXPECT_EQ(interpreter_->execution_plan().size(), 3);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
}
TEST_F(TestDelegate, ReleaseNonPersistentMemoryWithDelegates) {
// First delegate only supports node 0.
// This delegate should support dynamic tensors, otherwise the second won't be
@ -1297,232 +911,6 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) {
// Tests for FP16 graphs
// =====================
// Tests delegate functionality related to FP16 graphs.
// Model architecture:
// 1->DEQ->2 4->DEQ->5 7->DEQ->8 10->DEQ->11
// | | | |
// 0----->ADD->3----->ADD->6----->MUL->9------>ADD-->12
// Input: 0, Output:12.
// All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10
//
// Delegate only supports ADD, so can have up to two delegated partitions.
// TODO(b/156707497): Add more cases here once we have landed CPU kernels
// supporting FP16.
class TestFP16Delegation : public ::testing::TestWithParam<int> {
protected:
void SetUp() override {
interpreter_.reset(new Interpreter);
interpreter_->AddTensors(13);
interpreter_->SetInputs({0});
interpreter_->SetOutputs({12});
float16_const_ = Eigen::half_impl::float_to_half_rtne(2.f);
// TENSORS.
TfLiteQuantizationParams quant;
// Input.
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {1},
quant);
// fp16 constant, dequantize output, Add0 output.
interpreter_->SetTensorParametersReadOnly(
1, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {1},
quant);
interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {1},
quant);
// fp16 constant, dequantize output, Add1 output.
interpreter_->SetTensorParametersReadOnly(
4, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(5, kTfLiteFloat32, "", {1},
quant);
interpreter_->SetTensorParametersReadWrite(6, kTfLiteFloat32, "", {1},
quant);
// fp16 constant, dequantize output, Mul0 output.
interpreter_->SetTensorParametersReadOnly(
7, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(8, kTfLiteFloat32, "", {1},
quant);
interpreter_->SetTensorParametersReadWrite(9, kTfLiteFloat32, "", {1},
quant);
// fp16 constant, dequantize output, Add2 output.
interpreter_->SetTensorParametersReadOnly(
10, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(11, kTfLiteFloat32, "", {1},
quant);
interpreter_->SetTensorParametersReadWrite(12, kTfLiteFloat32, "", {1},
quant);
// NODES.
auto* add_reg = ops::builtin::Register_ADD();
auto* mul_reg = ops::builtin::Register_MUL();
auto* deq_reg = ops::builtin::Register_DEQUANTIZE();
add_reg->builtin_code = kTfLiteBuiltinAdd;
deq_reg->builtin_code = kTfLiteBuiltinDequantize;
mul_reg->builtin_code = kTfLiteBuiltinMul;
TfLiteAddParams* builtin_data0 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteAddParams* builtin_data1 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteMulParams* builtin_data2 =
reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
TfLiteAddParams* builtin_data3 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data0->activation = kTfLiteActNone;
builtin_data1->activation = kTfLiteActNone;
builtin_data2->activation = kTfLiteActNone;
builtin_data3->activation = kTfLiteActNone;
interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({0, 2}, {3}, nullptr, 0, builtin_data0,
add_reg);
interpreter_->AddNodeWithParameters({4}, {5}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({3, 5}, {6}, nullptr, 0, builtin_data1,
add_reg);
interpreter_->AddNodeWithParameters({7}, {8}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({6, 8}, {9}, nullptr, 0, builtin_data2,
mul_reg);
interpreter_->AddNodeWithParameters({10}, {11}, nullptr, 0, nullptr,
deq_reg);
interpreter_->AddNodeWithParameters({9, 11}, {12}, nullptr, 0,
builtin_data3, add_reg);
}
void VerifyInvoke() {
std::vector<float> input = {3.0f};
std::vector<float> expected_output = {16.0f};
const int input_tensor_idx = interpreter_->inputs()[0];
const int output_tensor_idx = interpreter_->outputs()[0];
memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
sizeof(float));
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
for (int i = 0; i < 1; ++i) {
EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
}
}
void TearDown() override { interpreter_.reset(); }
protected:
class FP16Delegate {
public:
// Uses FP16GraphPartitionHelper to accept ADD nodes with fp16 input.
explicit FP16Delegate(int num_delegated_subsets,
bool fail_node_prepare = false,
bool fail_node_invoke = false)
: num_delegated_subsets_(num_delegated_subsets),
fail_delegate_node_prepare_(fail_node_prepare),
fail_delegate_node_invoke_(fail_node_invoke) {
delegate_.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
auto* fp16_delegate = static_cast<FP16Delegate*>(delegate->data_);
// FP16 graph partitioning.
delegates::IsNodeSupportedFn node_supported_fn =
[=](TfLiteContext* context, TfLiteNode* node,
TfLiteRegistration* registration,
std::string* unsupported_details) -> bool {
return registration->builtin_code == kTfLiteBuiltinAdd;
};
delegates::FP16GraphPartitionHelper partition_helper(context,
node_supported_fn);
TfLiteIntArray* nodes_to_separate = nullptr;
if (partition_helper.Partition(nullptr) != kTfLiteOk) {
nodes_to_separate = TfLiteIntArrayCreate(0);
} else {
std::vector<int> ops_to_replace =
partition_helper.GetNodesOfFirstNLargestPartitions(
fp16_delegate->num_delegated_subsets());
nodes_to_separate = ConvertVectorToTfLiteIntArray(ops_to_replace);
}
context->ReplaceNodeSubsetsWithDelegateKernels(
context, fp16_delegate->FakeFusedRegistration(), nodes_to_separate,
delegate);
TfLiteIntArrayFree(nodes_to_separate);
return kTfLiteOk;
};
delegate_.CopyFromBufferHandle =
[](TfLiteContext* context, TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* output) -> TfLiteStatus { return kTfLiteOk; };
delegate_.FreeBufferHandle = nullptr;
delegate_.CopyToBufferHandle = nullptr;
// Store type-punned data SimpleDelegate structure.
delegate_.data_ = static_cast<void*>(this);
delegate_.flags = kTfLiteDelegateFlagsNone;
}
TfLiteRegistration FakeFusedRegistration() {
TfLiteRegistration reg = {nullptr};
reg.custom_name = "fake_fp16_add_op";
// Different flavors of the delegate kernel's Invoke(), dependent on
// testing parameters.
if (fail_delegate_node_invoke_) {
reg.invoke = [](TfLiteContext* context,
TfLiteNode* node) -> TfLiteStatus {
return kTfLiteError;
};
} else {
reg.invoke = [](TfLiteContext* context,
TfLiteNode* node) -> TfLiteStatus {
float output = 0;
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* input_tensor = GetInput(context, node, i);
if (input_tensor->type == kTfLiteFloat32) {
output += input_tensor->data.f[0];
} else {
// All constants are 2.
output += 2;
}
}
TfLiteTensor* out = GetOutput(context, node, 0);
out->data.f[0] = output;
return kTfLiteOk;
};
}
// Different flavors of the delegate kernel's Prepare(), dependent on
// testing parameters.
if (fail_delegate_node_prepare_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
};
} else {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Set output size to input size
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input->dims)));
return kTfLiteOk;
};
}
return reg;
}
TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
int num_delegated_subsets() { return num_delegated_subsets_; }
private:
TfLiteDelegate delegate_;
int num_delegated_subsets_;
bool fail_delegate_node_prepare_ = false;
bool fail_delegate_node_invoke_ = false;
};
std::unique_ptr<Interpreter> interpreter_;
std::unique_ptr<FP16Delegate> delegate_;
Eigen::half float16_const_;
};
TEST_P(TestFP16Delegation, NonDelegatedInterpreterWorks) {
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
@ -1551,38 +939,11 @@ TEST_P(TestFP16Delegation, DelegatePrepareFails) {
VerifyInvoke();
}
TEST_P(TestFP16Delegation, DelegateInvokeWithCPUFallback) {
delegate_ = std::unique_ptr<FP16Delegate>(new FP16Delegate(
/**num_delegated_subsets**/ GetParam(), /**fail_node_prepare**/ false,
/**fail_node_invoke**/ true));
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
std::vector<float> input = {3.0f};
std::vector<float> expected_output = {16.0f};
const int input_tensor_idx = interpreter_->inputs()[0];
const int output_tensor_idx = interpreter_->outputs()[0];
memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
for (int i = 0; i < 1; ++i) {
EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
}
ASSERT_EQ(interpreter_->execution_plan().size(), 8);
VerifyInvoke();
}
INSTANTIATE_TEST_SUITE_P(TestFP16Delegation, TestFP16Delegation,
::testing::Values(1, 2));
} // namespace
} // anonymous namespace
} // namespace delegates
} // namespace tflite
int main(int argc, char** argv) {

View File

@ -0,0 +1,493 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/delegate_test_util.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <memory>
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include "third_party/eigen3/Eigen/Core"
#include "tensorflow/lite/builtin_ops.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/delegates/utils.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/builtin_op_kernels.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/string_type.h"
#include "tensorflow/lite/util.h"
namespace tflite {
namespace delegates {
namespace test_utils {
TfLiteRegistration AddOpRegistration() {
TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
reg.custom_name = "my_add";
reg.builtin_code = tflite::BuiltinOperator_CUSTOM;
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
const TfLiteTensor* input2;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input2));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
// Verify that the two inputs have the same shape.
TF_LITE_ENSURE_EQ(context, input1->dims->size, input2->dims->size);
for (int i = 0; i < input1->dims->size; ++i) {
TF_LITE_ENSURE_EQ(context, input1->dims->data[i], input2->dims->data[i]);
}
// Set output shape to match input shape.
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input1->dims)));
return kTfLiteOk;
};
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* a0;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &a0));
TF_LITE_ENSURE(context, a0);
TF_LITE_ENSURE(context, a0->data.f);
const TfLiteTensor* a1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &a1));
TF_LITE_ENSURE(context, a1);
TF_LITE_ENSURE(context, a1->data.f);
TfLiteTensor* out;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
TF_LITE_ENSURE(context, out);
TF_LITE_ENSURE(context, out->data.f);
// Set output data to element-wise sum of input data.
int num = a0->dims->data[0];
for (int i = 0; i < num; i++) {
out->data.f[i] = a0->data.f[i] + a1->data.f[i];
}
return kTfLiteOk;
};
return reg;
}
void TestDelegate::SetUp() {
interpreter_.reset(new Interpreter);
interpreter_->AddTensors(5);
interpreter_->SetInputs({0, 1});
interpreter_->SetOutputs({3, 4});
TfLiteQuantizationParams quant;
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, quant);
interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, quant);
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, quant);
interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, quant);
interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3}, quant);
TfLiteRegistration reg = AddOpRegistration();
interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, &reg);
interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, &reg);
interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, &reg);
}
void TestDelegate::TearDown() {
// Interpreter relies on delegate to free the resources properly. Thus
// the life cycle of delegate must be longer than interpreter.
interpreter_.reset();
delegate_.reset();
}
TestDelegate::SimpleDelegate::SimpleDelegate(const std::vector<int>& nodes,
int64_t delegate_flags,
bool fail_node_prepare,
int min_ops_per_subset,
bool fail_node_invoke,
bool automatic_shape_propagation)
: nodes_(nodes),
fail_delegate_node_prepare_(fail_node_prepare),
min_ops_per_subset_(min_ops_per_subset),
fail_delegate_node_invoke_(fail_node_invoke),
automatic_shape_propagation_(automatic_shape_propagation) {
delegate_.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
auto* simple = static_cast<SimpleDelegate*>(delegate->data_);
TfLiteIntArray* nodes_to_separate =
TfLiteIntArrayCreate(simple->nodes_.size());
// Mark nodes that we want in TfLiteIntArray* structure.
int index = 0;
for (auto node_index : simple->nodes_) {
nodes_to_separate->data[index++] = node_index;
// make sure node is added
TfLiteNode* node;
TfLiteRegistration* reg;
context->GetNodeAndRegistration(context, node_index, &node, &reg);
TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
}
// Check that all nodes are available
TfLiteIntArray* execution_plan;
TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
int node_index = execution_plan->data[exec_index];
TfLiteNode* node;
TfLiteRegistration* reg;
context->GetNodeAndRegistration(context, node_index, &node, &reg);
if (exec_index == node_index) {
// Check op details only if it wasn't delegated already.
TFLITE_CHECK_EQ(reg->builtin_code, tflite::BuiltinOperator_CUSTOM);
TFLITE_CHECK_EQ(strcmp(reg->custom_name, "my_add"), 0);
}
}
// Get preview of delegate partitioning from the context.
TfLiteDelegateParams* params_array;
int num_partitions;
TFLITE_CHECK_EQ(
context->PreviewDelegatePartitioning(context, nodes_to_separate,
&params_array, &num_partitions),
kTfLiteOk);
if (simple->min_ops_per_subset() > 0) {
// Build a new vector of ops from subsets with at least the minimum
// size.
std::vector<int> allowed_ops;
for (int idx = 0; idx < num_partitions; ++idx) {
const auto* nodes_in_subset = params_array[idx].nodes_to_replace;
if (nodes_in_subset->size < simple->min_ops_per_subset()) continue;
allowed_ops.insert(allowed_ops.end(), nodes_in_subset->data,
nodes_in_subset->data + nodes_in_subset->size);
}
// Free existing nodes_to_separate & initialize a new array with
// allowed_ops.
TfLiteIntArrayFree(nodes_to_separate);
nodes_to_separate = TfLiteIntArrayCreate(allowed_ops.size());
memcpy(nodes_to_separate->data, allowed_ops.data(),
sizeof(int) * nodes_to_separate->size);
}
// Another call to PreviewDelegateParitioning should be okay, since
// partitioning memory is managed by context.
TFLITE_CHECK_EQ(
context->PreviewDelegatePartitioning(context, nodes_to_separate,
&params_array, &num_partitions),
kTfLiteOk);
context->ReplaceNodeSubsetsWithDelegateKernels(
context, simple->FakeFusedRegistration(), nodes_to_separate, delegate);
TfLiteIntArrayFree(nodes_to_separate);
return kTfLiteOk;
};
delegate_.CopyToBufferHandle = [](TfLiteContext* context,
TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* tensor) -> TfLiteStatus {
// TODO(b/156586986): Implement tests to test buffer copying logic.
return kTfLiteOk;
};
delegate_.CopyFromBufferHandle = [](TfLiteContext* context,
TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* output) -> TfLiteStatus {
TFLITE_CHECK_GE(buffer_handle, -1);
TFLITE_CHECK_EQ(output->buffer_handle, buffer_handle);
const float floats[] = {6., 6., 6.};
int num = output->dims->data[0];
for (int i = 0; i < num; i++) {
output->data.f[i] = floats[i];
}
return kTfLiteOk;
};
delegate_.FreeBufferHandle =
[](TfLiteContext* context, TfLiteDelegate* delegate,
TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; };
// Store type-punned data SimpleDelegate structure.
delegate_.data_ = static_cast<void*>(this);
delegate_.flags = delegate_flags;
}
TfLiteRegistration TestDelegate::SimpleDelegate::FakeFusedRegistration() {
TfLiteRegistration reg = {nullptr};
reg.custom_name = "fake_fused_op";
// Different flavors of the delegate kernel's Invoke(), dependent on
// testing parameters.
if (fail_delegate_node_invoke_) {
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
return kTfLiteError;
};
} else {
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
// Copy input data to output data.
const TfLiteTensor* a0;
const TfLiteTensor* a1;
if (node->inputs->size == 2) {
a0 = GetInput(context, node, 0);
a1 = GetInput(context, node, 1);
} else {
a0 = GetInput(context, node, 0);
a1 = a0;
}
TfLiteTensor* out;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &out));
int num = 1;
for (int i = 0; i < a0->dims->size; ++i) {
num *= a0->dims->data[i];
}
for (int i = 0; i < num; i++) {
out->data.f[i] = a0->data.f[i] + a1->data.f[i];
}
if (out->buffer_handle != kTfLiteNullBufferHandle) {
// Make the data stale so that CopyFromBufferHandle can be invoked
out->data_is_stale = true;
}
return kTfLiteOk;
};
}
// Different flavors of the delegate kernel's Prepare(), dependent on
// testing parameters.
if (automatic_shape_propagation_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Shapes should already by propagated by the runtime, just need to
// check.
const TfLiteTensor* input1;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input1));
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
const int input_dims_size = input1->dims->size;
TF_LITE_ENSURE(context, output->dims->size == input_dims_size);
for (int i = 0; i < input_dims_size; ++i) {
TF_LITE_ENSURE(context, output->dims->data[i] == input1->dims->data[i]);
}
return kTfLiteOk;
};
} else if (fail_delegate_node_prepare_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
};
} else {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Set output size to input size
const TfLiteTensor* input1;
const TfLiteTensor* input2;
if (node->inputs->size == 2) {
input1 = GetInput(context, node, 0);
input2 = GetInput(context, node, 1);
} else {
input1 = GetInput(context, node, 0);
input2 = input1;
}
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input1->dims)));
return kTfLiteOk;
};
}
return reg;
}
void TestFP16Delegation::SetUp() {
interpreter_.reset(new Interpreter);
interpreter_->AddTensors(13);
interpreter_->SetInputs({0});
interpreter_->SetOutputs({12});
float16_const_ = Eigen::half_impl::float_to_half_rtne(2.f);
// TENSORS.
TfLiteQuantizationParams quant;
// Input.
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {1}, quant);
// fp16 constant, dequantize output, Add0 output.
interpreter_->SetTensorParametersReadOnly(
1, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {1}, quant);
interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {1}, quant);
// fp16 constant, dequantize output, Add1 output.
interpreter_->SetTensorParametersReadOnly(
4, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(5, kTfLiteFloat32, "", {1}, quant);
interpreter_->SetTensorParametersReadWrite(6, kTfLiteFloat32, "", {1}, quant);
// fp16 constant, dequantize output, Mul0 output.
interpreter_->SetTensorParametersReadOnly(
7, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(8, kTfLiteFloat32, "", {1}, quant);
interpreter_->SetTensorParametersReadWrite(9, kTfLiteFloat32, "", {1}, quant);
// fp16 constant, dequantize output, Add2 output.
interpreter_->SetTensorParametersReadOnly(
10, kTfLiteFloat16, "", {1}, quant,
reinterpret_cast<const char*>(&float16_const_), sizeof(TfLiteFloat16));
interpreter_->SetTensorParametersReadWrite(11, kTfLiteFloat32, "", {1},
quant);
interpreter_->SetTensorParametersReadWrite(12, kTfLiteFloat32, "", {1},
quant);
// NODES.
auto* add_reg = ops::builtin::Register_ADD();
auto* mul_reg = ops::builtin::Register_MUL();
auto* deq_reg = ops::builtin::Register_DEQUANTIZE();
add_reg->builtin_code = kTfLiteBuiltinAdd;
deq_reg->builtin_code = kTfLiteBuiltinDequantize;
mul_reg->builtin_code = kTfLiteBuiltinMul;
TfLiteAddParams* builtin_data0 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteAddParams* builtin_data1 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteMulParams* builtin_data2 =
reinterpret_cast<TfLiteMulParams*>(malloc(sizeof(TfLiteMulParams)));
TfLiteAddParams* builtin_data3 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data0->activation = kTfLiteActNone;
builtin_data1->activation = kTfLiteActNone;
builtin_data2->activation = kTfLiteActNone;
builtin_data3->activation = kTfLiteActNone;
interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({0, 2}, {3}, nullptr, 0, builtin_data0,
add_reg);
interpreter_->AddNodeWithParameters({4}, {5}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({3, 5}, {6}, nullptr, 0, builtin_data1,
add_reg);
interpreter_->AddNodeWithParameters({7}, {8}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({6, 8}, {9}, nullptr, 0, builtin_data2,
mul_reg);
interpreter_->AddNodeWithParameters({10}, {11}, nullptr, 0, nullptr, deq_reg);
interpreter_->AddNodeWithParameters({9, 11}, {12}, nullptr, 0, builtin_data3,
add_reg);
}
void TestFP16Delegation::VerifyInvoke() {
std::vector<float> input = {3.0f};
std::vector<float> expected_output = {16.0f};
const int input_tensor_idx = interpreter_->inputs()[0];
const int output_tensor_idx = interpreter_->outputs()[0];
memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
sizeof(float));
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
for (int i = 0; i < 1; ++i) {
EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
}
}
TestFP16Delegation::FP16Delegate::FP16Delegate(int num_delegated_subsets,
bool fail_node_prepare,
bool fail_node_invoke)
: num_delegated_subsets_(num_delegated_subsets),
fail_delegate_node_prepare_(fail_node_prepare),
fail_delegate_node_invoke_(fail_node_invoke) {
delegate_.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
auto* fp16_delegate = static_cast<FP16Delegate*>(delegate->data_);
// FP16 graph partitioning.
delegates::IsNodeSupportedFn node_supported_fn =
[=](TfLiteContext* context, TfLiteNode* node,
TfLiteRegistration* registration,
std::string* unsupported_details) -> bool {
return registration->builtin_code == kTfLiteBuiltinAdd;
};
delegates::FP16GraphPartitionHelper partition_helper(context,
node_supported_fn);
TfLiteIntArray* nodes_to_separate = nullptr;
if (partition_helper.Partition(nullptr) != kTfLiteOk) {
nodes_to_separate = TfLiteIntArrayCreate(0);
} else {
std::vector<int> ops_to_replace =
partition_helper.GetNodesOfFirstNLargestPartitions(
fp16_delegate->num_delegated_subsets());
nodes_to_separate = ConvertVectorToTfLiteIntArray(ops_to_replace);
}
context->ReplaceNodeSubsetsWithDelegateKernels(
context, fp16_delegate->FakeFusedRegistration(), nodes_to_separate,
delegate);
TfLiteIntArrayFree(nodes_to_separate);
return kTfLiteOk;
};
delegate_.CopyFromBufferHandle =
[](TfLiteContext* context, TfLiteDelegate* delegate,
TfLiteBufferHandle buffer_handle,
TfLiteTensor* output) -> TfLiteStatus { return kTfLiteOk; };
delegate_.FreeBufferHandle = nullptr;
delegate_.CopyToBufferHandle = nullptr;
// Store type-punned data SimpleDelegate structure.
delegate_.data_ = static_cast<void*>(this);
delegate_.flags = kTfLiteDelegateFlagsNone;
}
TfLiteRegistration TestFP16Delegation::FP16Delegate::FakeFusedRegistration() {
TfLiteRegistration reg = {nullptr};
reg.custom_name = "fake_fp16_add_op";
// Different flavors of the delegate kernel's Invoke(), dependent on
// testing parameters.
if (fail_delegate_node_invoke_) {
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
return kTfLiteError;
};
} else {
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
float output = 0;
for (int i = 0; i < node->inputs->size; ++i) {
const TfLiteTensor* input_tensor = GetInput(context, node, i);
if (input_tensor->type == kTfLiteFloat32) {
output += input_tensor->data.f[0];
} else {
// All constants are 2.
output += 2;
}
}
TfLiteTensor* out = GetOutput(context, node, 0);
out->data.f[0] = output;
return kTfLiteOk;
};
}
// Different flavors of the delegate kernel's Prepare(), dependent on
// testing parameters.
if (fail_delegate_node_prepare_) {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
};
} else {
reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Set output size to input size
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_STATUS(context->ResizeTensor(
context, output, TfLiteIntArrayCopy(input->dims)));
return kTfLiteOk;
};
}
return reg;
}
} // namespace test_utils
} // namespace delegates
} // namespace tflite

View File

@ -0,0 +1,138 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_
#define TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_
#include <stdint.h>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include "third_party/eigen3/Eigen/Core"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
namespace tflite {
namespace delegates {
namespace test_utils {
// Build a kernel registration for a custom addition op that adds its two
// tensor inputs to produce a tensor output.
TfLiteRegistration AddOpRegistration();
// TestDelegate is a friend of Interpreter to access RemoveAllDelegates().
class TestDelegate : public ::testing::Test {
protected:
void SetUp() override;
void TearDown() override;
TfLiteBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle;
TfLiteBufferHandle AllocateBufferHandle() { return ++last_allocated_handle_; }
TfLiteStatus RemoveAllDelegates() {
return interpreter_->RemoveAllDelegates();
}
protected:
class SimpleDelegate {
public:
// Create a simple implementation of a TfLiteDelegate. We use the C++ class
// SimpleDelegate and it can produce a handle TfLiteDelegate that is
// value-copyable and compatible with TfLite.
// fail_node_prepare: To simulate failure of Delegate node's Prepare().
// min_ops_per_subset: If >0, partitioning preview is used to choose only
// those subsets with min_ops_per_subset number of nodes.
// fail_node_invoke: To simulate failure of Delegate node's Invoke().
// automatic_shape_propagation: This assumes that the runtime will propagate
// shapes using the original execution plan.
explicit SimpleDelegate(const std::vector<int>& nodes,
int64_t delegate_flags = kTfLiteDelegateFlagsNone,
bool fail_node_prepare = false,
int min_ops_per_subset = 0,
bool fail_node_invoke = false,
bool automatic_shape_propagation = false);
TfLiteRegistration FakeFusedRegistration();
TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
int min_ops_per_subset() { return min_ops_per_subset_; }
private:
std::vector<int> nodes_;
TfLiteDelegate delegate_;
bool fail_delegate_node_prepare_ = false;
int min_ops_per_subset_ = 0;
bool fail_delegate_node_invoke_ = false;
bool automatic_shape_propagation_ = false;
};
std::unique_ptr<Interpreter> interpreter_;
std::unique_ptr<SimpleDelegate> delegate_, delegate2_;
};
// Tests delegate functionality related to FP16 graphs.
// Model architecture:
// 1->DEQ->2 4->DEQ->5 7->DEQ->8 10->DEQ->11
// | | | |
// 0----->ADD->3----->ADD->6----->MUL->9------>ADD-->12
// Input: 0, Output:12.
// All constants are 2, so the function is: (x + 2 + 2) * 2 + 2 = 2x + 10
//
// Delegate only supports ADD, so can have up to two delegated partitions.
// TODO(b/156707497): Add more cases here once we have landed CPU kernels
// supporting FP16.
class TestFP16Delegation : public ::testing::TestWithParam<int> {
protected:
void SetUp() override;
void VerifyInvoke();
void TearDown() override { interpreter_.reset(); }
protected:
class FP16Delegate {
public:
// Uses FP16GraphPartitionHelper to accept ADD nodes with fp16 input.
explicit FP16Delegate(int num_delegated_subsets,
bool fail_node_prepare = false,
bool fail_node_invoke = false);
TfLiteRegistration FakeFusedRegistration();
TfLiteDelegate* get_tf_lite_delegate() { return &delegate_; }
int num_delegated_subsets() { return num_delegated_subsets_; }
private:
TfLiteDelegate delegate_;
int num_delegated_subsets_;
bool fail_delegate_node_prepare_ = false;
bool fail_delegate_node_invoke_ = false;
};
std::unique_ptr<Interpreter> interpreter_;
std::unique_ptr<FP16Delegate> delegate_;
Eigen::half float16_const_;
};
} // namespace test_utils
} // namespace delegates
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_DELEGATE_TEST_UTIL_

View File

@ -0,0 +1,142 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/interpreter_utils.h"
#include <string.h>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/delegate_test_util.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/testing/util.h"
namespace tflite {
namespace delegates {
using test_utils::TestDelegate;
using test_utils::TestFP16Delegation;
namespace {
TEST_F(TestDelegate, DelegateNodeInvokeFailureFallback) {
delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
{0, 1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
// Delegation modified execution plan.
ASSERT_EQ(interpreter_->execution_plan().size(), 1);
std::vector<float> input = {1.0f, 2.0f, 3.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
constexpr int kOutputTensorIndex = 3;
memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
// Delegation removed, returning to original execution plan.
ASSERT_EQ(interpreter_->execution_plan().size(), 3);
// Check outputs.
TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
}
TEST_F(TestDelegate, TestFallbackWithMultipleDelegates) {
// First delegate only supports node 0.
// This delegate should support dynamic tensors, otherwise the second won't be
// applied.
delegate_ = std::unique_ptr<SimpleDelegate>(
new SimpleDelegate({0}, kTfLiteDelegateFlagsAllowDynamicTensors));
// Second delegate supports nodes 1 & 2, and makes the graph immutable.
delegate2_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate(
{1, 2}, kTfLiteDelegateFlagsNone, false /**fail_node_prepare**/,
0 /**min_ops_per_subset**/, true /**fail_node_invoke**/));
// Pre-delegation execution plan should have three nodes.
ASSERT_EQ(interpreter_->execution_plan().size(), 3);
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()),
kTfLiteOk);
// Should be two delegates nodes.
ASSERT_EQ(interpreter_->execution_plan().size(), 2);
std::vector<float> input = {1.0f, 2.0f, 3.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
constexpr int kOutputTensorIndex = 2;
TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex);
memcpy(interpreter_->typed_tensor<float>(0), input.data(), 3 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(), 3 * sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
// All delegates should be undone.
EXPECT_EQ(interpreter_->execution_plan().size(), 3);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
}
TEST_P(TestFP16Delegation, DelegateInvokeWithCPUFallback) {
delegate_ = std::unique_ptr<FP16Delegate>(new FP16Delegate(
/**num_delegated_subsets**/ GetParam(), /**fail_node_prepare**/ false,
/**fail_node_invoke**/ true));
ASSERT_EQ(
interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
kTfLiteOk);
std::vector<float> input = {3.0f};
std::vector<float> expected_output = {16.0f};
const int input_tensor_idx = interpreter_->inputs()[0];
const int output_tensor_idx = interpreter_->outputs()[0];
memcpy(interpreter_->typed_tensor<float>(input_tensor_idx), input.data(),
sizeof(float));
EXPECT_EQ(
delegates::InterpreterUtils::InvokeWithCPUFallback(interpreter_.get()),
kTfLiteDelegateError);
TfLiteTensor* output_tensor = interpreter_->tensor(output_tensor_idx);
for (int i = 0; i < 1; ++i) {
EXPECT_EQ(output_tensor->data.f[i], expected_output[i]) << i;
}
ASSERT_EQ(interpreter_->execution_plan().size(), 8);
VerifyInvoke();
}
INSTANTIATE_TEST_SUITE_P(TestFP16Delegation, TestFP16Delegation,
::testing::Values(1, 2));
} // anonymous namespace
} // namespace delegates
} // namespace tflite
int main(int argc, char** argv) {
::tflite::LogToStderr();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -40,10 +40,12 @@ limitations under the License.
namespace tflite {
class InterpreterTest;
class TestDelegate;
class InterpreterTest; // Class for friend declarations.
namespace delegates {
class InterpreterUtils; // Class for friend declarations.
namespace test_utils {
class TestDelegate; // Class for friend declarations.
} // namespace test_utils
} // namespace delegates
/// An interpreter for a graph of nodes that input and output from tensors.
@ -662,8 +664,8 @@ class Interpreter {
};
friend class InterpreterBuilder;
friend class tflite::InterpreterTest;
friend class tflite::TestDelegate;
friend class tflite::delegates::InterpreterUtils;
friend class tflite::delegates::test_utils::TestDelegate;
/// Set the value of an external context.
static void SetExternalContext(struct TfLiteContext* context,