Fix partitioning bug for multiple-delegate cases

PiperOrigin-RevId: 331184246 Change-Id: I22e1c3d195dcbbf846d7cef95aba8507bdd90b7b
2020-09-11 11:11:45 -07:00 · 2020-09-11 11:11:45 -07:00 · 8edf47bfcc
commit 8edf47bfcc
parent 77aba03c2a
7 changed files with 100 additions and 26 deletions
--- a/tensorflow/lite/arena_planner.cc
+++ b/tensorflow/lite/arena_planner.cc
@ -140,7 +140,7 @@ TfLiteStatus ArenaPlanner::PlanAllocations() {
  }

  // Count references to node input tensors.
-  for (size_t i = 0; i < graph_info_->num_nodes(); ++i) {
+  for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
    const TfLiteNode& node = graph_info_->node(i);
    TfLiteIntArray* node_inputs = node.inputs;
    for (int j = 0; j < node_inputs->size; ++j) {
@ -158,7 +158,7 @@ TfLiteStatus ArenaPlanner::PlanAllocations() {
    }
  }
  // Go through the graph in execution order.
-  for (size_t i = 0; i < graph_info_->num_nodes(); ++i) {
+  for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
    const TfLiteNode& node = graph_info_->node(i);

    // First queue output tensors for allocation.
@ -197,8 +197,8 @@ TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
  dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
  allocs_.resize(graph_info_->num_tensors());
  // Set allocation and deallocation for temporary tensors.
-  for (size_t i = first_node;
-       i <= static_cast<size_t>(last_node) && i < graph_info_->num_nodes();
+  for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
+                              i < graph_info_->num_execution_nodes();
       ++i) {
    const TfLiteNode& node = graph_info_->node(i);
    TfLiteIntArray* node_temporaries = node.temporaries;
--- a/tensorflow/lite/arena_planner_test.cc
+++ b/tensorflow/lite/arena_planner_test.cc
@ -134,7 +134,8 @@ class TestGraphInfo : public GraphInfo {
  TfLiteTensor* tensor(size_t index) override {
    return &graph_->tensors()->at(index);
  }
-  size_t num_nodes() const override { return graph_->nodes().size(); }
+  size_t num_execution_nodes() const override { return graph_->nodes().size(); }
+  size_t num_total_nodes() const override { return graph_->nodes().size(); }
  const TfLiteNode& node(size_t index) const override {
    return graph_->nodes()[index];
  }
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@ -167,9 +167,10 @@ class InterpreterInfo : public GraphInfo {
  TfLiteTensor* tensor(size_t index) override {
    return &subgraph_->tensors()[index];
  }
-  size_t num_nodes() const override {
+  size_t num_execution_nodes() const override {
    return subgraph_->execution_plan().size();
  }
+  size_t num_total_nodes() const override { return subgraph_->nodes_size(); }
  const TfLiteNode& node(size_t index) const override {
    int node_index = subgraph_->execution_plan()[index];
    return subgraph_->nodes_and_registration()[node_index].first;
--- a/tensorflow/lite/delegates/delegate_test.cc
+++ b/tensorflow/lite/delegates/delegate_test.cc
@ -526,6 +526,35 @@ TEST_F(TestDelegate, SecondDelegationInvokeFailure) {
  }
 }

+// This test ensures that node indices in multi-delegate application are handled
+// correctly by the TFLite partitioning algorithm.
+TEST_F(TestDelegate, TwoDelegates_ExecutionPlanIndicesDifferent) {
+  // First delegate supports nodes 0, 1.
+  // After this delegation, the execution plan size is 2.
+  delegate_ = std::unique_ptr<SimpleDelegate>(
+      new SimpleDelegate({0, 1}, kTfLiteDelegateFlagsAllowDynamicTensors));
+  // Second delegate supports (original) node index 2.
+  // The execution plan has 2 nodes, so this verifies that the partitioning
+  // algorithm correctly refers to (original) node indices instead of execution
+  // plan indices.
+  delegate2_ = std::unique_ptr<SimpleDelegate>(
+      new SimpleDelegate({2}, kTfLiteDelegateFlagsNone));
+
+  ASSERT_EQ(interpreter_->execution_plan().size(), 3);
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  ASSERT_EQ(interpreter_->execution_plan().size(), 2);
+
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  ASSERT_EQ(interpreter_->execution_plan().size(), 2);
+
+  // Verify Invoke works.
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+}
+
 TEST_F(TestDelegate, StaticDelegateMakesGraphImmutable) {
  delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate({0, 1, 2}));
  ASSERT_EQ(
--- a/tensorflow/lite/graph_info.cc
+++ b/tensorflow/lite/graph_info.cc
@ -40,7 +40,7 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
      std::vector<NodeSubset>* node_subsets)
      : info_(info),
        node_subsets_(node_subsets),
-        node_type_(info->num_nodes(), NodeSubset::kTfNonPartition) {
+        node_type_(info_->num_total_nodes(), NodeSubset::kTfNonPartition) {
    // Populate the node_type_ map.
    for (auto node_index : TfLiteIntArrayView(nodes_to_partition)) {
      node_type_[node_index] = NodeSubset::kTfPartition;
@ -54,10 +54,11 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
    tensor_epochs_.clear();
    tensor_epochs_.resize(info_->num_tensors(), kEpochAlwaysReady);
    node_epochs_.clear();
-    node_epochs_.resize(info_->num_nodes(), kEpochNotReady);
+    node_epochs_.resize(info_->num_execution_nodes(), kEpochNotReady);
    // Set computed tensors to be kEpochNotReady (initializer set everything to
    // AlwaysReady).
-    for (int node_index = 0; node_index < info_->num_nodes(); node_index++) {
+    for (int node_index = 0; node_index < info_->num_execution_nodes();
+         node_index++) {
      const TfLiteNode& node = info_->node(node_index);
      for (int output_tensor_index : TfLiteIntArrayView(node.outputs)) {
        tensor_epochs_[output_tensor_index] = kEpochNotReady;
@ -112,10 +113,10 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
    kEpochAlwaysReady = -2
  };

-  // Updates the  node `node_index` and returns true if it is assigned to an
-  // epoch. False is returned if the node is already set to an epoch, its inputs
-  // are not all assigned to epochs, or if it cannot be assigned to the current
-  // epoch since the epoch's node_type doesn't match.
+  // Updates the node at `node_index` in the execution plan and returns true if
+  // it is assigned to an epoch. False is returned if the node is already set to
+  // an epoch, its inputs are not all assigned to epochs, or if it cannot be
+  // assigned to the current epoch since the epoch's node_type doesn't match.
  bool UpdateNode(int node_index) {
    const TfLiteNode& node = info_->node(node_index);
    NodeSubset& current_subset = node_subsets_->back();
@ -132,18 +133,20 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
        return false;
      }
    }
+
+    int original_node_idx = info_->node_index(node_index);
    // When we are starting a new epoch, the first ready node defines
    // the type of that epoch.
    if (current_subset.type == NodeSubset::kTfUnexplored) {
-      current_subset.type = node_type_[node_index];
+      current_subset.type = node_type_[original_node_idx];
    }
    // The node gets assigned to this epoch if it is the same type as
    // the epoch's assigned type. Note, if this is the current ready
    // node encountered during this epoch, this condition will be
    // automatically true.
-    if (current_subset.type == node_type_[node_index]) {
+    if (current_subset.type == node_type_[original_node_idx]) {
      node_epochs_[node_index] = current_epoch;
-      current_subset.nodes.push_back(info_->node_index(node_index));
+      current_subset.nodes.push_back(original_node_idx);
      // All outputs of this node now are assigned to this epoch as
      // well.
      for (int output_tensor_index : TfLiteIntArrayView(node.outputs)) {
@ -180,7 +183,8 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
    // loop until no more nodes can be updated.
    while (true) {
      bool did_something = false;
-      for (int node_index = 0; node_index < info_->num_nodes(); node_index++) {
+      for (int node_index = 0; node_index < info_->num_execution_nodes();
+           node_index++) {
        if (UpdateNode(node_index)) {
          did_something = true;
        }
@ -193,6 +197,9 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
  const GraphInfo* info_;
  // List of node_subsets to populate
  std::vector<NodeSubset>* node_subsets_;
+  // NOTE: This vector contains a place-holder for *all* nodes in the graph, not
+  // just ones in the execution plan. This is because nodes_to_partition is
+  // passed in as a list of original node indices & not execution plan indices.
  std::vector<NodeSubset::Type> node_type_;
  // Maps from tensor index to the epoch in which it is assigned. Also special
  // negative values of kEpochNotReady if not assigned, kEpochAlwaysReady if it
--- a/tensorflow/lite/graph_info.h
+++ b/tensorflow/lite/graph_info.h
@ -34,15 +34,21 @@ class GraphInfo {
  // num_tensors().
  virtual TfLiteTensor* tensor(size_t index) = 0;

-  // Total number of nodes in the graph.
-  virtual size_t num_nodes() const = 0;
+  // Number of nodes in the current execution plan.
+  virtual size_t num_execution_nodes() const = 0;

-  // Returns a node given its index which is expected to be between 0 and
-  // num_nodes().
+  // Total number of known nodes, which may include nodes that are no longer in
+  // the execution plan. This happens in case of applying multiple delegates.
+  // Should be >= num_execution_nodes()
+  virtual size_t num_total_nodes() const = 0;
+
+  // Returns a node given its index in the execution plan, which is expected to
+  // be between 0 and num_execution_nodes().
  virtual const TfLiteNode& node(size_t index) const = 0;

  // Returns an implementation-specific node index which may be different from
-  // index.
+  // execution-plan index.
+  // Expected to be between 0 and num_total_nodes().
  virtual size_t node_index(size_t index) const = 0;

  // Returns the indices of the input tensors.
--- a/tensorflow/lite/graph_info_test.cc
+++ b/tensorflow/lite/graph_info_test.cc
@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#include "tensorflow/lite/graph_info.h"
+
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-
-#include "tensorflow/lite/graph_info.h"
 #include "tensorflow/lite/testing/util.h"

 namespace tflite {
@ -34,6 +34,8 @@ class SimpleTestGraph : public GraphInfo {
 public:
  explicit SimpleTestGraph(int node_index_offset = 0)
      : node_index_offset_(node_index_offset) {
+    // 'node_index_offset' number of nodes are not present in the execution
+    // plan. (and hence not considered for partitioning)
    for (int i = 0; i < node_index_offset; ++i) AddNode({}, {});
  }

@ -44,7 +46,8 @@ class SimpleTestGraph : public GraphInfo {
    }
  }

-  size_t num_nodes() const override {
+  size_t num_total_nodes() const override { return nodes_.size(); }
+  size_t num_execution_nodes() const override {
    return nodes_.size() - node_index_offset_;
  }
  const TfLiteNode& node(size_t index) const override {
@ -156,7 +159,7 @@ TEST(PartitionTest, Nodes1PartitionNodes0) {
  CheckPartitionSubgraphs(generated_subgraphs, {expected_subgraph});
 }

-TEST(PartitionTest, Nodes1PartitionNodes0WithOffset) {
+TEST(PartitionTest, Nodes1PartitionNodes0_WithOffset) {
  constexpr int node_index_offset = 17;
  SimpleTestGraph graph(node_index_offset);
  graph.AddTensors(2);
@ -243,6 +246,33 @@ TEST(PartitionTest, Nodes2PartitionNodes1) {
                          {expected_subgraph0, expected_subgraph1});
 }

+// Same as above, but with node offset to ensure correct handling of original vs
+// execution plan indices.
+TEST(PartitionTest, Nodes2PartitionNodes1_WithOffset) {
+  constexpr int node_index_offset = 17;
+  SimpleTestGraph graph(node_index_offset);
+  graph.AddTensors(3);
+  graph.AddNode({0}, {1});
+  graph.AddNode({1}, {2});
+  graph.SetInputsAndOutputs({0}, {2});
+  std::vector<int> nodes_to_partition = {node_index_offset + 1};
+  std::vector<NodeSubset> generated_subgraphs;
+  PartitionGraph(graph, nodes_to_partition, &generated_subgraphs);
+
+  NodeSubset expected_subgraph0;
+  expected_subgraph0.type = NodeSubset::kTfPartition;
+  expected_subgraph0.nodes = {node_index_offset + 0};
+  expected_subgraph0.input_tensors = {0};
+  expected_subgraph0.output_tensors = {1};
+  NodeSubset expected_subgraph1;
+  expected_subgraph1.type = NodeSubset::kTfPartition;
+  expected_subgraph1.nodes = {node_index_offset + 1};
+  expected_subgraph1.input_tensors = {1};
+  expected_subgraph1.output_tensors = {2};
+  CheckPartitionSubgraphs(generated_subgraphs,
+                          {expected_subgraph0, expected_subgraph1});
+}
+
 // Test a 2 node graph where both nodes are fully partitioned.
 // Input: tensor(0) -> node(0) -> tensor(1) -> node(1) -> tensor(2),
 //    nodes_to_partition = [0, 1]