Support partition limitation even when accelerator name is not specified
PiperOrigin-RevId: 296406206 Change-Id: I4bdd9d0e9b578401bb380828c628e14ef5aa711c
This commit is contained in:
parent
45c98a790e
commit
f781e5eb44
@ -179,6 +179,7 @@ cc_test(
|
|||||||
":nnapi_delegate",
|
":nnapi_delegate",
|
||||||
":nnapi_delegate_mock_test",
|
":nnapi_delegate_mock_test",
|
||||||
"//tensorflow/lite:framework",
|
"//tensorflow/lite:framework",
|
||||||
|
"//tensorflow/lite:kernel_api",
|
||||||
"//tensorflow/lite:minimal_logging",
|
"//tensorflow/lite:minimal_logging",
|
||||||
"//tensorflow/lite/c:common",
|
"//tensorflow/lite/c:common",
|
||||||
"//tensorflow/lite/kernels:test_util",
|
"//tensorflow/lite/kernels:test_util",
|
||||||
|
@ -4185,25 +4185,29 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|||||||
.version = 1,
|
.version = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<int>& nodes_to_delegate = supported_nodes;
|
std::vector<int> nodes_to_delegate;
|
||||||
if (is_accelerator_specified) {
|
|
||||||
std::vector<int> device_supported_nodes;
|
|
||||||
int num_partitions;
|
int num_partitions;
|
||||||
TfLiteDelegateParams* params_array;
|
TfLiteDelegateParams* params_array;
|
||||||
|
if (is_accelerator_specified) {
|
||||||
|
// Filtering out nodes not supported by target accelerators
|
||||||
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
|
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
|
||||||
context, delegate, nnapi, supported_nodes, &device_supported_nodes,
|
context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
|
||||||
&num_partitions, ¶ms_array, nnapi_errno));
|
&num_partitions, ¶ms_array, nnapi_errno));
|
||||||
|
} else {
|
||||||
TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions(
|
nodes_to_delegate = supported_nodes;
|
||||||
delegate_options.max_number_delegated_partitions,
|
auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
|
||||||
std::vector<TfLiteDelegateParams>(params_array,
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
||||||
params_array + num_partitions),
|
context, supported_nodes_int_array.get(), ¶ms_array,
|
||||||
&device_supported_nodes));
|
&num_partitions));
|
||||||
|
|
||||||
nodes_to_delegate = device_supported_nodes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
|
||||||
|
std::vector<TfLiteDelegateParams>(
|
||||||
|
params_array, params_array + num_partitions),
|
||||||
|
&nodes_to_delegate));
|
||||||
|
|
||||||
if (nodes_to_delegate.empty()) {
|
if (nodes_to_delegate.empty()) {
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
} else {
|
} else {
|
||||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
|||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
@ -23,6 +24,7 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
#include "tensorflow/lite/builtin_ops.h"
|
||||||
#include "tensorflow/lite/c/common.h"
|
#include "tensorflow/lite/c/common.h"
|
||||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
|
||||||
@ -545,13 +547,39 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Model with a chain of no-op (add with zero operations)
|
// Model with a chain of no-op (add with zero operations)
|
||||||
|
// interleaved with no-op custom nodes.
|
||||||
class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
|
class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
|
||||||
public:
|
public:
|
||||||
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
|
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
|
||||||
|
const std::unordered_set<int>& custom_nodes_indexes,
|
||||||
const NnApi* nnapi, const std::string& accelerator_name,
|
const NnApi* nnapi, const std::string& accelerator_name,
|
||||||
int max_nnapi_partitions)
|
int max_nnapi_partitions)
|
||||||
: MultiOpModel(),
|
: MultiOpModel(),
|
||||||
AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
|
AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
|
||||||
|
Init(input_shape, graph_size, custom_nodes_indexes);
|
||||||
|
}
|
||||||
|
|
||||||
|
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
|
||||||
|
const std::unordered_set<int>& custom_nodes_indexes,
|
||||||
|
const NnApi* nnapi, int max_nnapi_partitions)
|
||||||
|
: MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
|
||||||
|
Init(input_shape, graph_size, custom_nodes_indexes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
|
||||||
|
|
||||||
|
int CountNnApiPartitions() {
|
||||||
|
return std::count_if(
|
||||||
|
std::begin(interpreter_->execution_plan()),
|
||||||
|
std::end(interpreter_->execution_plan()), [this](const int node_index) {
|
||||||
|
return interpreter_->node_and_registration(node_index)
|
||||||
|
->first.delegate != nullptr;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Init(const std::vector<int>& input_shape, int graph_size,
|
||||||
|
const std::unordered_set<int>& custom_nodes_indexes) {
|
||||||
auto* delegate = GetDelegate();
|
auto* delegate = GetDelegate();
|
||||||
this->SetApplyDelegate([delegate](Interpreter* interpreter) {
|
this->SetApplyDelegate([delegate](Interpreter* interpreter) {
|
||||||
interpreter->ModifyGraphWithDelegate(delegate);
|
interpreter->ModifyGraphWithDelegate(delegate);
|
||||||
@ -574,11 +602,16 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
|
|||||||
{intermediate_outputs[0]});
|
{intermediate_outputs[0]});
|
||||||
|
|
||||||
for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
|
for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
|
||||||
|
if (custom_nodes_indexes.count(i + 1) == 1) {
|
||||||
|
AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
|
||||||
|
{intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
|
||||||
|
} else {
|
||||||
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
||||||
CreateAddOptions(builder_).Union(),
|
CreateAddOptions(builder_).Union(),
|
||||||
{intermediate_outputs[i], zero_input_},
|
{intermediate_outputs[i], zero_input_},
|
||||||
{intermediate_outputs[i + 1]});
|
{intermediate_outputs[i + 1]});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
AddBuiltinOp(
|
AddBuiltinOp(
|
||||||
BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
||||||
@ -592,18 +625,42 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
|
|||||||
PopulateTensor(zero_input_, zero);
|
PopulateTensor(zero_input_, zero);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
|
// Return the registration of a custom node simply copying input to output.
|
||||||
|
TfLiteRegistration* CustomNoOpNode() {
|
||||||
|
static TfLiteRegistration no_op = {
|
||||||
|
.init = [](TfLiteContext* context, const char* buffer,
|
||||||
|
size_t length) -> void* { return nullptr; },
|
||||||
|
|
||||||
int CountNnApiPartitions() {
|
.free = [](TfLiteContext* context, void* buffer) -> void {},
|
||||||
return std::count_if(
|
|
||||||
std::begin(interpreter_->execution_plan()),
|
.prepare = [](TfLiteContext* context,
|
||||||
std::end(interpreter_->execution_plan()), [this](const int node_index) {
|
TfLiteNode* node) -> TfLiteStatus {
|
||||||
return interpreter_->node_and_registration(node_index)
|
if (node->inputs->size != 1 || node->outputs->size != 1) {
|
||||||
->first.delegate != nullptr;
|
return kTfLiteError;
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
return kTfLiteOk;
|
||||||
|
},
|
||||||
|
|
||||||
|
.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
|
||||||
|
auto input_tensor = context->tensors[node->inputs->data[0]];
|
||||||
|
auto output_tensor = context->tensors[node->outputs->data[0]];
|
||||||
|
|
||||||
|
std::copy(input_tensor.data.raw,
|
||||||
|
input_tensor.data.raw + input_tensor.bytes,
|
||||||
|
output_tensor.data.raw);
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
},
|
||||||
|
|
||||||
|
.profiling_string = nullptr,
|
||||||
|
.builtin_code = kTfLiteBuiltinDelegate,
|
||||||
|
.custom_name = "NoOpTestDelegate",
|
||||||
|
.version = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
return &no_op;
|
||||||
|
}
|
||||||
int input_;
|
int input_;
|
||||||
int zero_input_;
|
int zero_input_;
|
||||||
int output_;
|
int output_;
|
||||||
@ -643,7 +700,8 @@ class DelegatePartitionLimitTest
|
|||||||
// input_shape.
|
// input_shape.
|
||||||
void Init(int max_nnapi_partitions,
|
void Init(int max_nnapi_partitions,
|
||||||
const std::vector<int>& nnapi_partition_sizes,
|
const std::vector<int>& nnapi_partition_sizes,
|
||||||
const std::vector<int>& input_shape) {
|
const std::vector<int>& input_shape,
|
||||||
|
bool specify_accelerator = true) {
|
||||||
// The graph will have as number of nodes the sum of nodes in the NNAPI
|
// The graph will have as number of nodes the sum of nodes in the NNAPI
|
||||||
// partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
|
// partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
|
||||||
// not supported by NNAPI and will cause the
|
// not supported by NNAPI and will cause the
|
||||||
@ -658,6 +716,12 @@ class DelegatePartitionLimitTest
|
|||||||
unsupported_ops_idxs.insert(partition_node_idx);
|
unsupported_ops_idxs.insert(partition_node_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (specify_accelerator) {
|
||||||
|
// Building a model that will contain initially a single partition
|
||||||
|
// and will get then partitioned by checking the operations supported
|
||||||
|
// by the target accelerator.
|
||||||
|
// This because I am not able to know the size of each partition in my
|
||||||
|
// stubbed GetSupportedOperationsForDevices API.
|
||||||
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
|
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
|
||||||
graph_size_, unsupported_ops_idxs);
|
graph_size_, unsupported_ops_idxs);
|
||||||
|
|
||||||
@ -665,13 +729,23 @@ class DelegatePartitionLimitTest
|
|||||||
[](const ANeuralNetworksModel* model,
|
[](const ANeuralNetworksModel* model,
|
||||||
const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
|
const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
|
||||||
bool* supported_ops) -> int {
|
bool* supported_ops) -> int {
|
||||||
DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops);
|
DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
|
||||||
|
supported_ops);
|
||||||
return ANEURALNETWORKS_NO_ERROR;
|
return ANEURALNETWORKS_NO_ERROR;
|
||||||
});
|
});
|
||||||
|
|
||||||
model_ = std::make_unique<LongIdentityModel>(
|
model_ = std::make_unique<LongIdentityModel>(
|
||||||
input_shape, graph_size_, nnapi_mock_->GetNnApi(),
|
input_shape, graph_size_,
|
||||||
|
/*custom_nodes_indexes=*/std::unordered_set<int>(),
|
||||||
|
nnapi_mock_->GetNnApi(),
|
||||||
/*accelerator_name=*/"test-device", max_nnapi_partitions);
|
/*accelerator_name=*/"test-device", max_nnapi_partitions);
|
||||||
|
} else {
|
||||||
|
// Building a model containing custom nodes that won't be supported
|
||||||
|
// by the delegate and generate the partitions.
|
||||||
|
model_ = std::make_unique<LongIdentityModel>(
|
||||||
|
input_shape, graph_size_, unsupported_ops_idxs,
|
||||||
|
nnapi_mock_->GetNnApi(), max_nnapi_partitions);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<LongIdentityModel> model_;
|
std::unique_ptr<LongIdentityModel> model_;
|
||||||
@ -718,24 +792,44 @@ TEST_F(DelegatePartitionLimitTest,
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
|
TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
|
||||||
|
int kLargestModelSize = 3;
|
||||||
Init(/*max_nnapi_partitions=*/1,
|
Init(/*max_nnapi_partitions=*/1,
|
||||||
/*nnapi_partition_sizes=*/{3, 2},
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
/*input_shape=*/{1, 2, 2, 1});
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
|
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
|
||||||
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3);
|
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
|
||||||
|
OriginalGraphSize() - kLargestModelSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(DelegatePartitionLimitTest,
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
ShouldDelegatePartitionsWithHigherNodeCount) {
|
ShouldDelegatePartitionsWithHigherNodeCount) {
|
||||||
|
int kLargestModelSize = 5;
|
||||||
|
int kSecondLargestModelSize = 4;
|
||||||
Init(/*max_nnapi_partitions=*/2,
|
Init(/*max_nnapi_partitions=*/2,
|
||||||
/*nnapi_partition_sizes=*/{1, 5, 2, 4},
|
/*nnapi_partition_sizes=*/
|
||||||
|
{1, kLargestModelSize, 2, kSecondLargestModelSize},
|
||||||
/*input_shape=*/{1, 2, 2, 1});
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
|
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
|
ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
|
||||||
|
int kLargestModelSize = 5;
|
||||||
|
int kSecondLargestModelSize = 4;
|
||||||
|
Init(/*max_nnapi_partitions=*/2,
|
||||||
|
/*nnapi_partition_sizes=*/
|
||||||
|
{1, kLargestModelSize, 2, kSecondLargestModelSize},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
|
EXPECT_EQ(
|
||||||
|
model_->CountOpsExecutedByCpuKernel(),
|
||||||
|
OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user