Support partition limitation even when accelerator name is not specified

PiperOrigin-RevId: 296406206
Change-Id: I4bdd9d0e9b578401bb380828c628e14ef5aa711c
This commit is contained in:
Stefano Galarraga 2020-02-21 03:54:58 -08:00 committed by TensorFlower Gardener
parent 45c98a790e
commit f781e5eb44
3 changed files with 142 additions and 43 deletions

View File

@ -179,6 +179,7 @@ cc_test(
":nnapi_delegate", ":nnapi_delegate",
":nnapi_delegate_mock_test", ":nnapi_delegate_mock_test",
"//tensorflow/lite:framework", "//tensorflow/lite:framework",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging", "//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common", "//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:test_util", "//tensorflow/lite/kernels:test_util",

View File

@ -4185,25 +4185,29 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
.version = 1, .version = 1,
}; };
std::vector<int>& nodes_to_delegate = supported_nodes; std::vector<int> nodes_to_delegate;
int num_partitions;
TfLiteDelegateParams* params_array;
if (is_accelerator_specified) { if (is_accelerator_specified) {
std::vector<int> device_supported_nodes; // Filtering out nodes not supported by target accelerators
int num_partitions;
TfLiteDelegateParams* params_array;
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator( TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
context, delegate, nnapi, supported_nodes, &device_supported_nodes, context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
&num_partitions, &params_array, nnapi_errno)); &num_partitions, &params_array, nnapi_errno));
} else {
TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions( nodes_to_delegate = supported_nodes;
delegate_options.max_number_delegated_partitions, auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
std::vector<TfLiteDelegateParams>(params_array, TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
params_array + num_partitions), context, supported_nodes_int_array.get(), &params_array,
&device_supported_nodes)); &num_partitions));
nodes_to_delegate = device_supported_nodes;
} }
TF_LITE_ENSURE_STATUS(
LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
std::vector<TfLiteDelegateParams>(
params_array, params_array + num_partitions),
&nodes_to_delegate));
if (nodes_to_delegate.empty()) { if (nodes_to_delegate.empty()) {
return kTfLiteOk; return kTfLiteOk;
} else { } else {

View File

@ -15,6 +15,7 @@ limitations under the License.
#include <sys/mman.h> #include <sys/mman.h>
#include <algorithm> #include <algorithm>
#include <array>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
#include <numeric> #include <numeric>
@ -23,6 +24,7 @@ limitations under the License.
#include <vector> #include <vector>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "tensorflow/lite/builtin_ops.h"
#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
@ -545,13 +547,39 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
} }
// Model with a chain of no-op (add with zero operations) // Model with a chain of no-op (add with zero operations)
// interleaved with no-op custom nodes.
class LongIdentityModel : public MultiOpModel, public AcceleratedModel { class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
public: public:
LongIdentityModel(const std::vector<int>& input_shape, int graph_size, LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
const std::unordered_set<int>& custom_nodes_indexes,
const NnApi* nnapi, const std::string& accelerator_name, const NnApi* nnapi, const std::string& accelerator_name,
int max_nnapi_partitions) int max_nnapi_partitions)
: MultiOpModel(), : MultiOpModel(),
AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) { AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
Init(input_shape, graph_size, custom_nodes_indexes);
}
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
const std::unordered_set<int>& custom_nodes_indexes,
const NnApi* nnapi, int max_nnapi_partitions)
: MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) {
Init(input_shape, graph_size, custom_nodes_indexes);
}
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
int CountNnApiPartitions() {
return std::count_if(
std::begin(interpreter_->execution_plan()),
std::end(interpreter_->execution_plan()), [this](const int node_index) {
return interpreter_->node_and_registration(node_index)
->first.delegate != nullptr;
});
}
private:
void Init(const std::vector<int>& input_shape, int graph_size,
const std::unordered_set<int>& custom_nodes_indexes) {
auto* delegate = GetDelegate(); auto* delegate = GetDelegate();
this->SetApplyDelegate([delegate](Interpreter* interpreter) { this->SetApplyDelegate([delegate](Interpreter* interpreter) {
interpreter->ModifyGraphWithDelegate(delegate); interpreter->ModifyGraphWithDelegate(delegate);
@ -574,10 +602,15 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
{intermediate_outputs[0]}); {intermediate_outputs[0]});
for (int i = 0; i < intermediate_outputs.size() - 1; i++) { for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, if (custom_nodes_indexes.count(i + 1) == 1) {
CreateAddOptions(builder_).Union(), AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); },
{intermediate_outputs[i], zero_input_}, {intermediate_outputs[i]}, {intermediate_outputs[i + 1]});
{intermediate_outputs[i + 1]}); } else {
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
CreateAddOptions(builder_).Union(),
{intermediate_outputs[i], zero_input_},
{intermediate_outputs[i + 1]});
}
} }
AddBuiltinOp( AddBuiltinOp(
@ -592,18 +625,42 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
PopulateTensor(zero_input_, zero); PopulateTensor(zero_input_, zero);
} }
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); } // Return the registration of a custom node simply copying input to output.
TfLiteRegistration* CustomNoOpNode() {
static TfLiteRegistration no_op = {
.init = [](TfLiteContext* context, const char* buffer,
size_t length) -> void* { return nullptr; },
int CountNnApiPartitions() { .free = [](TfLiteContext* context, void* buffer) -> void {},
return std::count_if(
std::begin(interpreter_->execution_plan()), .prepare = [](TfLiteContext* context,
std::end(interpreter_->execution_plan()), [this](const int node_index) { TfLiteNode* node) -> TfLiteStatus {
return interpreter_->node_and_registration(node_index) if (node->inputs->size != 1 || node->outputs->size != 1) {
->first.delegate != nullptr; return kTfLiteError;
}); }
return kTfLiteOk;
},
.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
auto input_tensor = context->tensors[node->inputs->data[0]];
auto output_tensor = context->tensors[node->outputs->data[0]];
std::copy(input_tensor.data.raw,
input_tensor.data.raw + input_tensor.bytes,
output_tensor.data.raw);
return kTfLiteOk;
},
.profiling_string = nullptr,
.builtin_code = kTfLiteBuiltinDelegate,
.custom_name = "NoOpTestDelegate",
.version = 1,
};
return &no_op;
} }
private:
int input_; int input_;
int zero_input_; int zero_input_;
int output_; int output_;
@ -643,7 +700,8 @@ class DelegatePartitionLimitTest
// input_shape. // input_shape.
void Init(int max_nnapi_partitions, void Init(int max_nnapi_partitions,
const std::vector<int>& nnapi_partition_sizes, const std::vector<int>& nnapi_partition_sizes,
const std::vector<int>& input_shape) { const std::vector<int>& input_shape,
bool specify_accelerator = true) {
// The graph will have as number of nodes the sum of nodes in the NNAPI // The graph will have as number of nodes the sum of nodes in the NNAPI
// partitions plus nnapi_partition_sizes.size() - 1 nodes that will be // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
// not supported by NNAPI and will cause the // not supported by NNAPI and will cause the
@ -658,20 +716,36 @@ class DelegatePartitionLimitTest
unsupported_ops_idxs.insert(partition_node_idx); unsupported_ops_idxs.insert(partition_node_idx);
} }
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes( if (specify_accelerator) {
graph_size_, unsupported_ops_idxs); // Building a model that will contain initially a single partition
// and will get then partitioned by checking the operations supported
// by the target accelerator.
// This because I am not able to know the size of each partition in my
// stubbed GetSupportedOperationsForDevices API.
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
graph_size_, unsupported_ops_idxs);
nnapi_mock_->StubGetSupportedOperationsForDevicesWith( nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
[](const ANeuralNetworksModel* model, [](const ANeuralNetworksModel* model,
const ANeuralNetworksDevice* const* devices, uint32_t num_devices, const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
bool* supported_ops) -> int { bool* supported_ops) -> int {
DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops); DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(
return ANEURALNETWORKS_NO_ERROR; supported_ops);
}); return ANEURALNETWORKS_NO_ERROR;
});
model_ = std::make_unique<LongIdentityModel>( model_ = std::make_unique<LongIdentityModel>(
input_shape, graph_size_, nnapi_mock_->GetNnApi(), input_shape, graph_size_,
/*accelerator_name=*/"test-device", max_nnapi_partitions); /*custom_nodes_indexes=*/std::unordered_set<int>(),
nnapi_mock_->GetNnApi(),
/*accelerator_name=*/"test-device", max_nnapi_partitions);
} else {
// Building a model containing custom nodes that won't be supported
// by the delegate and generate the partitions.
model_ = std::make_unique<LongIdentityModel>(
input_shape, graph_size_, unsupported_ops_idxs,
nnapi_mock_->GetNnApi(), max_nnapi_partitions);
}
} }
std::unique_ptr<LongIdentityModel> model_; std::unique_ptr<LongIdentityModel> model_;
@ -718,24 +792,44 @@ TEST_F(DelegatePartitionLimitTest,
} }
TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) { TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
int kLargestModelSize = 3;
Init(/*max_nnapi_partitions=*/1, Init(/*max_nnapi_partitions=*/1,
/*nnapi_partition_sizes=*/{3, 2}, /*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1}); /*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 1); EXPECT_EQ(model_->CountNnApiPartitions(), 1);
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3); EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(),
OriginalGraphSize() - kLargestModelSize);
} }
TEST_F(DelegatePartitionLimitTest, TEST_F(DelegatePartitionLimitTest,
ShouldDelegatePartitionsWithHigherNodeCount) { ShouldDelegatePartitionsWithHigherNodeCount) {
int kLargestModelSize = 5;
int kSecondLargestModelSize = 4;
Init(/*max_nnapi_partitions=*/2, Init(/*max_nnapi_partitions=*/2,
/*nnapi_partition_sizes=*/{1, 5, 2, 4}, /*nnapi_partition_sizes=*/
{1, kLargestModelSize, 2, kSecondLargestModelSize},
/*input_shape=*/{1, 2, 2, 1}); /*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 2); EXPECT_EQ(model_->CountNnApiPartitions(), 2);
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9); EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
} }
TEST_F(DelegatePartitionLimitTest,
ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) {
int kLargestModelSize = 5;
int kSecondLargestModelSize = 4;
Init(/*max_nnapi_partitions=*/2,
/*nnapi_partition_sizes=*/
{1, kLargestModelSize, 2, kSecondLargestModelSize},
/*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false);
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
EXPECT_EQ(
model_->CountOpsExecutedByCpuKernel(),
OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize));
}
} // namespace } // namespace
} // namespace tflite } // namespace tflite