Use a simpler external Cpu backend context class to replace the actual functionalities of the existing ref-counted cpu backend context class.

PiperOrigin-RevId: 259252521
2019-07-21 21:01:16 -07:00 · 2019-07-21 21:01:16 -07:00 · b4e5625437
commit b4e5625437
parent 96d0f42d1b
9 changed files with 232 additions and 70 deletions
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@ -94,6 +94,16 @@ cc_library(
    deps = ["//tensorflow/lite/c:c_api_internal"],
 )

+cc_library(
+    name = "external_cpu_backend_context",
+    srcs = ["external_cpu_backend_context.cc"],
+    hdrs = ["external_cpu_backend_context.h"],
+    copts = TFLITE_DEFAULT_COPTS,
+    deps = [
+        "//tensorflow/lite/c:c_api_internal",
+    ],
+)
+
 cc_library(
    name = "graph_info",
    hdrs = ["graph_info.h"],
@ -201,6 +211,7 @@ cc_library(
    deps = [
        ":allocation",
        ":arena_planner",
+        ":external_cpu_backend_context",
        ":graph_info",
        ":memory_planner",
        ":minimal_logging",
--- a/tensorflow/lite/external_cpu_backend_context.cc
+++ b/tensorflow/lite/external_cpu_backend_context.cc
@ -0,0 +1,38 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/external_cpu_backend_context.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus RefreshExternalCpuBackendContext(TfLiteContext* context) {
+  auto* const external_context = static_cast<ExternalCpuBackendContext*>(
+      context->GetExternalContext(context, kTfLiteCpuBackendContext));
+  if (external_context && external_context->internal_backend_context() &&
+      context->recommended_num_threads != -1) {
+    external_context->internal_backend_context()->set_max_num_threads(
+        context->recommended_num_threads);
+  }
+  return kTfLiteOk;
+}
+}  // namespace
+
+ExternalCpuBackendContext::ExternalCpuBackendContext()
+    : internal_backend_context_(nullptr) {
+  this->type = kTfLiteCpuBackendContext;
+  this->Refresh = RefreshExternalCpuBackendContext;
+}
+
+}  // namespace tflite
--- a/tensorflow/lite/external_cpu_backend_context.h
+++ b/tensorflow/lite/external_cpu_backend_context.h
@ -0,0 +1,110 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
+#define TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
+
+#include <memory>
+#include <utility>
+
+#include "tensorflow/lite/c/c_api_internal.h"
+
+namespace tflite {
+
+// This is the base class for TF Lite internal backend contexts (like a
+// RUY-based cpu backend context class). A derived internal backend context is
+// generally a collection of utilities (i.e. a thread pool etc.) for TF Lite to
+// use certain keneral libraries, such as Gemmlowp, RUY, etc., to implement TF
+// Lite operators.
+// TODO(b/130950871): Make this class as a interface-only abstract class.
+class TfLiteInternalBackendContext {
+ public:
+  virtual ~TfLiteInternalBackendContext() {}
+
+  int max_num_threads() const { return max_num_threads_; }
+
+  virtual void set_max_num_threads(int max_num_threads) {
+    max_num_threads_ = max_num_threads;
+  }
+
+ protected:
+  TfLiteInternalBackendContext() {}
+
+  // The maximum number of threads used for parallelizing TfLite computation.
+  int max_num_threads_;
+
+ private:
+  TfLiteInternalBackendContext(const TfLiteInternalBackendContext&) = delete;
+  TfLiteInternalBackendContext& operator=(const TfLiteInternalBackendContext&) =
+      delete;
+};
+
+// This TfLiteExternalContext-derived class is the default
+// 'kTfLiteCpuBackendContext'-typed context that's used internally in TF Lite
+// framework. The primary purpose of having this class is to allow the same cpu
+// backend context to be sharable among a set of TF Lite interpreters so that
+// certain system costs are saved, like saving the cost of having multiple
+// thread pools in each separate cpu backend context etc..
+//
+// Note: as of 2019/07/19, such context sharing among a set of interpreters will
+// break the execution if these interpreters are invoked simultaneously. It
+// works only when these context-sharing interpreters are invoked in a
+// serialized way. Here's an example to illustrate the context sharing among 2
+// TF Lite interpreters:
+//
+//  TfLiteInternalBackendContext* global_ctxt = new ExternalCpuBackendContext();
+//  interpreter1 = /*...*/;
+//  interpreter1->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
+//  interpreter2 = /*...*/;
+//  interpreter2->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
+//
+//  interpreter1->SetNumThreads(2);
+//  interpreter1->Invoke();
+//
+//  interpreter2->SetNumThreads(4);
+//  interpreter2->Invoke();
+//
+// After sharing the context, calling 'SetNumThreads' on any of the
+// context-sharing interpreters will have the global impact as it also refreshes
+// the #thread info in the global cpu backend context (i.e. 'global_ctxt' above)
+// that affects how much parallelism an interpreter invocation will use.
+// Therefore, if different number of threads are used among different
+// interpreters, don't call 'SetNumThreads' consectutively but call it
+// separately between each interpreter's invocation as illustrated above.
+class ExternalCpuBackendContext : public TfLiteExternalContext {
+ public:
+  ExternalCpuBackendContext();
+  ~ExternalCpuBackendContext() {}
+
+  void set_internal_backend_context(
+      std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context) {
+    internal_backend_context_ = std::move(internal_backend_context);
+  }
+
+  TfLiteInternalBackendContext* internal_backend_context() const {
+    return internal_backend_context_.get();
+  }
+
+ private:
+  // Note the actual internal backend context object is lazily initialized.
+  std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context_;
+
+  ExternalCpuBackendContext(const ExternalCpuBackendContext&) = delete;
+  ExternalCpuBackendContext& operator=(const ExternalCpuBackendContext&) =
+      delete;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
--- a/tensorflow/lite/interpreter.cc
+++ b/tensorflow/lite/interpreter.cc
@ -71,6 +71,12 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
    external_contexts_[i] = nullptr;
  }

+  // This operation is cheap because we allocate the CPU context resources (i.e.
+  // threads) lazily.
+  own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
+  external_contexts_[kTfLiteCpuBackendContext] =
+      own_external_cpu_backend_context_.get();
+
  UseNNAPI(false);
 }

@ -78,6 +84,26 @@ Interpreter::~Interpreter() {}

 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
                                     TfLiteExternalContext* ctx) {
+  if (ctx == own_external_cpu_backend_context_.get()) {
+    error_reporter_->Report(
+        "WARNING: The passed external context is identical to the internally "
+        "owned one.");
+    return;
+  }
+
+  // We have an internally owned external context of kTfLiteCpuBackendContext.
+  // If it's overwritten here, we will release the resource of the internally
+  // owned external context.
+  // Note: the 'max thread count' info associated with the overwritten context
+  // will be lost here, and such info is now detemined by the new context, thus
+  // affecting how much parallelism a TFLite op would have.
+  if (kTfLiteCpuBackendContext == type &&
+      external_contexts_[kTfLiteCpuBackendContext] ==
+          own_external_cpu_backend_context_.get()) {
+    own_external_cpu_backend_context_.reset();
+  }
+
+  // This essentially changes the "external_contexts_[type]".
  primary_subgraph().SetExternalContext(type, ctx);
 }

--- a/tensorflow/lite/interpreter.h
+++ b/tensorflow/lite/interpreter.h
@ -20,6 +20,7 @@ limitations under the License.
 #include <complex>
 #include <cstdio>
 #include <cstdlib>
+#include <memory>
 #include <vector>

 #include "tensorflow/lite/allocation.h"
@ -27,6 +28,7 @@ limitations under the License.
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/core/api/profiler.h"
 #include "tensorflow/lite/core/subgraph.h"
+#include "tensorflow/lite/external_cpu_backend_context.h"
 #include "tensorflow/lite/memory_planner.h"
 #include "tensorflow/lite/stderr_reporter.h"

@ -460,7 +462,9 @@ class Interpreter {
    return op_reg.profiling_string(context_, node);
  }

-  /// Set the value of an external context.
+  // Set the value of an external context. TFLite interpreter doesn't take the
+  // memory ownership of this external context 'ctx', and the context should
+  // outlive the TFLite interpreter.
  void SetExternalContext(TfLiteExternalContextType type,
                          TfLiteExternalContext* ctx);

@ -526,6 +530,13 @@ class Interpreter {
  // List of active external contexts.
  TfLiteExternalContext* external_contexts_[kTfLiteMaxExternalContexts];

+  // The default external cpu backend context. After an TFLite interpreter is
+  // initialized, 'external_contexts_[kTfLiteCpuBackendContext]' is set to point
+  // to this object. However, if this element value is overwritten via calling
+  // 'SetExternalContext(kTfLiteCpuBackendContext, ...)', we will reset this to
+  // nullptr if necessary.
+  std::unique_ptr<ExternalCpuBackendContext> own_external_cpu_backend_context_;
+
  // Subgraphs
  std::vector<std::unique_ptr<Subgraph>> subgraphs_;
 };
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@ -227,6 +227,7 @@ cc_library(
        # gemmlowp_context_ and ruy_context_ members.
        "//tensorflow/lite/experimental/ruy:context",
        "@gemmlowp",
+        "//tensorflow/lite:external_cpu_backend_context",
    ],
 )

@ -319,8 +320,8 @@ cc_library(
    deps = [
        ":cpu_backend_context",
        ":op_macros",
+        "//tensorflow/lite:external_cpu_backend_context",
        "//tensorflow/lite/c:c_api_internal",
-        "@gemmlowp",
    ],
 )

--- a/tensorflow/lite/kernels/cpu_backend_context.cc
+++ b/tensorflow/lite/kernels/cpu_backend_context.cc
@ -21,7 +21,8 @@ limitations under the License.
 namespace tflite {

 CpuBackendContext::CpuBackendContext()
-    : ruy_context_(new ruy::Context),
+    : TfLiteInternalBackendContext(),
+      ruy_context_(new ruy::Context),
      gemmlowp_context_(new gemmlowp::GemmContext) {
  set_max_num_threads(1);
 }
--- a/tensorflow/lite/kernels/cpu_backend_context.h
+++ b/tensorflow/lite/kernels/cpu_backend_context.h
@ -20,13 +20,14 @@ limitations under the License.

 #include "public/gemmlowp.h"
 #include "tensorflow/lite/experimental/ruy/context.h"
+#include "tensorflow/lite/external_cpu_backend_context.h"

 namespace tflite {

-class CpuBackendContext final {
+class CpuBackendContext final : public TfLiteInternalBackendContext {
 public:
  CpuBackendContext();
-  ~CpuBackendContext();
+  ~CpuBackendContext() override;

  ruy::Context* ruy_context() const { return ruy_context_.get(); }

@ -44,10 +45,7 @@ class CpuBackendContext final {
  //
  // This value also gets propagated to back-ends, where it plays the same
  // information-only role.
-  void set_max_num_threads(int max_num_threads);
-
-  // See set_max_num_threads.
-  int max_num_threads() const { return max_num_threads_; }
+  void set_max_num_threads(int max_num_threads) override;

 private:
  // To enable a smooth transition from the current direct usage
@ -59,9 +57,6 @@ class CpuBackendContext final {
  const std::unique_ptr<ruy::Context> ruy_context_;
  const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

-  // See set_max_num_threads.
-  int max_num_threads_;
-
  CpuBackendContext(const CpuBackendContext&) = delete;
 };

--- a/tensorflow/lite/kernels/cpu_backend_support.cc
+++ b/tensorflow/lite/kernels/cpu_backend_support.cc
@ -17,74 +17,43 @@ limitations under the License.
 #include <memory>

 #include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/external_cpu_backend_context.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/op_macros.h"

 namespace tflite {
 namespace cpu_backend_support {

-namespace {
-
-// TODO(b/130950871) we probably shouldn't be using any reference-counting
-// but this is an existing idiom.
-struct RefCountedCpuBackendContext : public TfLiteExternalContext {
-  std::unique_ptr<CpuBackendContext> cpu_backend_context;
-  int num_references = 0;
-};
-
-RefCountedCpuBackendContext* GetCpuBackendContext(TfLiteContext* context) {
-  return static_cast<RefCountedCpuBackendContext*>(
-      context->GetExternalContext(context, kTfLiteCpuBackendContext));
-}
-
-TfLiteStatus Refresh(TfLiteContext* context) {
-  auto* refcounted = GetCpuBackendContext(context);
-  if (refcounted != nullptr) {
-    refcounted->cpu_backend_context->set_max_num_threads(
-        context->recommended_num_threads);
-  }
-  return kTfLiteOk;
-}
-
-}  // namespace
-
-void IncrementUsageCounter(TfLiteContext* context) {
-  RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
-  if (refcounted == nullptr) {
-    refcounted = new RefCountedCpuBackendContext;
-    refcounted->type = kTfLiteCpuBackendContext;
-    refcounted->Refresh = Refresh;
-    refcounted->cpu_backend_context.reset(new CpuBackendContext);
-    if (context->recommended_num_threads != -1) {
-      refcounted->cpu_backend_context->set_max_num_threads(
-          context->recommended_num_threads);
-    }
-    refcounted->num_references = 0;
-    context->SetExternalContext(context, kTfLiteCpuBackendContext, refcounted);
-  }
-  refcounted->num_references++;
-}
-
-void DecrementUsageCounter(TfLiteContext* context) {
-  RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
-  if (refcounted == nullptr) {
-    TF_LITE_FATAL(
-        "Call to DecrementUsageCounter() not preceded by "
-        "IncrementUsageCounter()");
-  }
-  if (--refcounted->num_references == 0) {
-    delete refcounted;
-    context->SetExternalContext(context, kTfLiteCpuBackendContext, nullptr);
-  }
-}
+// TODO(b/130950871): Remove all refrences to the following two no-op functions
+// once the new ExternalCpuBackendContext class is checked in.
+void IncrementUsageCounter(TfLiteContext* context) {}
+void DecrementUsageCounter(TfLiteContext* context) {}

 CpuBackendContext* GetFromContext(TfLiteContext* context) {
-  RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
-  if (refcounted == nullptr) {
+  auto* external_context = static_cast<ExternalCpuBackendContext*>(
+      context->GetExternalContext(context, kTfLiteCpuBackendContext));
+
+  if (external_context == nullptr) {
    TF_LITE_FATAL(
-        "Call to GetFromContext() not preceded by IncrementUsageCounter()");
+        "ExternalCpuBackendContext isn't properly initialized during TFLite "
+        "interpreter initialization.");
  }
-  return refcounted->cpu_backend_context.get();
+
+  auto* cpu_backend_context = static_cast<CpuBackendContext*>(
+      external_context->internal_backend_context());
+  if (cpu_backend_context == nullptr) {
+    // We do the lazy initialization here for the TfLiteInternalBackendContext
+    // that's wrapped inside ExternalCpuBackendContext.
+    cpu_backend_context = new CpuBackendContext();
+    if (context->recommended_num_threads != -1) {
+      cpu_backend_context->set_max_num_threads(
+          context->recommended_num_threads);
+    }
+    external_context->set_internal_backend_context(
+        std::unique_ptr<TfLiteInternalBackendContext>(cpu_backend_context));
+  }
+
+  return cpu_backend_context;
 }

 }  // namespace cpu_backend_support