From f781e5eb44d3ece0ca82627e7cf4c87ad95e4620 Mon Sep 17 00:00:00 2001 From: Stefano Galarraga Date: Fri, 21 Feb 2020 03:54:58 -0800 Subject: [PATCH] Support partition limitation even when accelerator name is not specified PiperOrigin-RevId: 296406206 Change-Id: I4bdd9d0e9b578401bb380828c628e14ef5aa711c --- tensorflow/lite/delegates/nnapi/BUILD | 1 + .../lite/delegates/nnapi/nnapi_delegate.cc | 32 ++-- .../nnapi_delegate_device_selection_test.cc | 152 ++++++++++++++---- 3 files changed, 142 insertions(+), 43 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD index ee47ad0e24d..021f74b6ab0 100644 --- a/tensorflow/lite/delegates/nnapi/BUILD +++ b/tensorflow/lite/delegates/nnapi/BUILD @@ -179,6 +179,7 @@ cc_test( ":nnapi_delegate", ":nnapi_delegate_mock_test", "//tensorflow/lite:framework", + "//tensorflow/lite:kernel_api", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:test_util", diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index a3a4babd91f..84b3cfb2f15 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -4185,25 +4185,29 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context, .version = 1, }; - std::vector& nodes_to_delegate = supported_nodes; + std::vector nodes_to_delegate; + + int num_partitions; + TfLiteDelegateParams* params_array; if (is_accelerator_specified) { - std::vector device_supported_nodes; - int num_partitions; - TfLiteDelegateParams* params_array; - + // Filtering out nodes not supported by target accelerators TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator( - context, delegate, nnapi, supported_nodes, &device_supported_nodes, + context, delegate, nnapi, supported_nodes, &nodes_to_delegate, &num_partitions, ¶ms_array, nnapi_errno)); - - TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions( - delegate_options.max_number_delegated_partitions, - std::vector(params_array, - params_array + num_partitions), - &device_supported_nodes)); - - nodes_to_delegate = device_supported_nodes; + } else { + nodes_to_delegate = supported_nodes; + auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes); + TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning( + context, supported_nodes_int_array.get(), ¶ms_array, + &num_partitions)); } + TF_LITE_ENSURE_STATUS( + LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions, + std::vector( + params_array, params_array + num_partitions), + &nodes_to_delegate)); + if (nodes_to_delegate.empty()) { return kTfLiteOk; } else { diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc index bf9e00bee69..d6183e63013 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include #include +#include #include #include #include @@ -23,6 +24,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h" @@ -545,13 +547,39 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) { } // Model with a chain of no-op (add with zero operations) +// interleaved with no-op custom nodes. class LongIdentityModel : public MultiOpModel, public AcceleratedModel { public: LongIdentityModel(const std::vector& input_shape, int graph_size, + const std::unordered_set& custom_nodes_indexes, const NnApi* nnapi, const std::string& accelerator_name, int max_nnapi_partitions) : MultiOpModel(), AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) { + Init(input_shape, graph_size, custom_nodes_indexes); + } + + LongIdentityModel(const std::vector& input_shape, int graph_size, + const std::unordered_set& custom_nodes_indexes, + const NnApi* nnapi, int max_nnapi_partitions) + : MultiOpModel(), AcceleratedModel(nnapi, false, max_nnapi_partitions) { + Init(input_shape, graph_size, custom_nodes_indexes); + } + + void SetInput(std::vector value) { PopulateTensor(input_, value); } + + int CountNnApiPartitions() { + return std::count_if( + std::begin(interpreter_->execution_plan()), + std::end(interpreter_->execution_plan()), [this](const int node_index) { + return interpreter_->node_and_registration(node_index) + ->first.delegate != nullptr; + }); + } + + private: + void Init(const std::vector& input_shape, int graph_size, + const std::unordered_set& custom_nodes_indexes) { auto* delegate = GetDelegate(); this->SetApplyDelegate([delegate](Interpreter* interpreter) { interpreter->ModifyGraphWithDelegate(delegate); @@ -574,10 +602,15 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel { {intermediate_outputs[0]}); for (int i = 0; i < intermediate_outputs.size() - 1; i++) { - AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, - CreateAddOptions(builder_).Union(), - {intermediate_outputs[i], zero_input_}, - {intermediate_outputs[i + 1]}); + if (custom_nodes_indexes.count(i + 1) == 1) { + AddCustomOp("custom_no_op", {}, [this]() { return CustomNoOpNode(); }, + {intermediate_outputs[i]}, {intermediate_outputs[i + 1]}); + } else { + AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, + CreateAddOptions(builder_).Union(), + {intermediate_outputs[i], zero_input_}, + {intermediate_outputs[i + 1]}); + } } AddBuiltinOp( @@ -592,18 +625,42 @@ class LongIdentityModel : public MultiOpModel, public AcceleratedModel { PopulateTensor(zero_input_, zero); } - void SetInput(std::vector value) { PopulateTensor(input_, value); } + // Return the registration of a custom node simply copying input to output. + TfLiteRegistration* CustomNoOpNode() { + static TfLiteRegistration no_op = { + .init = [](TfLiteContext* context, const char* buffer, + size_t length) -> void* { return nullptr; }, - int CountNnApiPartitions() { - return std::count_if( - std::begin(interpreter_->execution_plan()), - std::end(interpreter_->execution_plan()), [this](const int node_index) { - return interpreter_->node_and_registration(node_index) - ->first.delegate != nullptr; - }); + .free = [](TfLiteContext* context, void* buffer) -> void {}, + + .prepare = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + if (node->inputs->size != 1 || node->outputs->size != 1) { + return kTfLiteError; + } + + return kTfLiteOk; + }, + + .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus { + auto input_tensor = context->tensors[node->inputs->data[0]]; + auto output_tensor = context->tensors[node->outputs->data[0]]; + + std::copy(input_tensor.data.raw, + input_tensor.data.raw + input_tensor.bytes, + output_tensor.data.raw); + + return kTfLiteOk; + }, + + .profiling_string = nullptr, + .builtin_code = kTfLiteBuiltinDelegate, + .custom_name = "NoOpTestDelegate", + .version = 1, + }; + + return &no_op; } - - private: int input_; int zero_input_; int output_; @@ -643,7 +700,8 @@ class DelegatePartitionLimitTest // input_shape. void Init(int max_nnapi_partitions, const std::vector& nnapi_partition_sizes, - const std::vector& input_shape) { + const std::vector& input_shape, + bool specify_accelerator = true) { // The graph will have as number of nodes the sum of nodes in the NNAPI // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be // not supported by NNAPI and will cause the @@ -658,20 +716,36 @@ class DelegatePartitionLimitTest unsupported_ops_idxs.insert(partition_node_idx); } - DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes( - graph_size_, unsupported_ops_idxs); + if (specify_accelerator) { + // Building a model that will contain initially a single partition + // and will get then partitioned by checking the operations supported + // by the target accelerator. + // This because I am not able to know the size of each partition in my + // stubbed GetSupportedOperationsForDevices API. + DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes( + graph_size_, unsupported_ops_idxs); - nnapi_mock_->StubGetSupportedOperationsForDevicesWith( - [](const ANeuralNetworksModel* model, - const ANeuralNetworksDevice* const* devices, uint32_t num_devices, - bool* supported_ops) -> int { - DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops); - return ANEURALNETWORKS_NO_ERROR; - }); + nnapi_mock_->StubGetSupportedOperationsForDevicesWith( + [](const ANeuralNetworksModel* model, + const ANeuralNetworksDevice* const* devices, uint32_t num_devices, + bool* supported_ops) -> int { + DelegatePartitionLimitTestNodeFilter()->SetNodeSupport( + supported_ops); + return ANEURALNETWORKS_NO_ERROR; + }); - model_ = std::make_unique( - input_shape, graph_size_, nnapi_mock_->GetNnApi(), - /*accelerator_name=*/"test-device", max_nnapi_partitions); + model_ = std::make_unique( + input_shape, graph_size_, + /*custom_nodes_indexes=*/std::unordered_set(), + nnapi_mock_->GetNnApi(), + /*accelerator_name=*/"test-device", max_nnapi_partitions); + } else { + // Building a model containing custom nodes that won't be supported + // by the delegate and generate the partitions. + model_ = std::make_unique( + input_shape, graph_size_, unsupported_ops_idxs, + nnapi_mock_->GetNnApi(), max_nnapi_partitions); + } } std::unique_ptr model_; @@ -718,24 +792,44 @@ TEST_F(DelegatePartitionLimitTest, } TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) { + int kLargestModelSize = 3; Init(/*max_nnapi_partitions=*/1, /*nnapi_partition_sizes=*/{3, 2}, /*input_shape=*/{1, 2, 2, 1}); EXPECT_EQ(model_->CountNnApiPartitions(), 1); - EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3); + EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), + OriginalGraphSize() - kLargestModelSize); } TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionsWithHigherNodeCount) { + int kLargestModelSize = 5; + int kSecondLargestModelSize = 4; Init(/*max_nnapi_partitions=*/2, - /*nnapi_partition_sizes=*/{1, 5, 2, 4}, + /*nnapi_partition_sizes=*/ + {1, kLargestModelSize, 2, kSecondLargestModelSize}, /*input_shape=*/{1, 2, 2, 1}); EXPECT_EQ(model_->CountNnApiPartitions(), 2); EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9); } +TEST_F(DelegatePartitionLimitTest, + ShouldLimitPartitionsEvenWithoutAcceleratorNameSpecified) { + int kLargestModelSize = 5; + int kSecondLargestModelSize = 4; + Init(/*max_nnapi_partitions=*/2, + /*nnapi_partition_sizes=*/ + {1, kLargestModelSize, 2, kSecondLargestModelSize}, + /*input_shape=*/{1, 2, 2, 1}, /*specify_accelerator=*/false); + + EXPECT_EQ(model_->CountNnApiPartitions(), 2); + EXPECT_EQ( + model_->CountOpsExecutedByCpuKernel(), + OriginalGraphSize() - (kLargestModelSize + kSecondLargestModelSize)); +} + } // namespace } // namespace tflite