Use a simpler external Cpu backend context class to replace the actual functionalities of the existing ref-counted cpu backend context class.

PiperOrigin-RevId: 259252521
This commit is contained in:
Chao Mei 2019-07-21 21:01:16 -07:00 committed by TensorFlower Gardener
parent 96d0f42d1b
commit b4e5625437
9 changed files with 232 additions and 70 deletions

View File

@ -94,6 +94,16 @@ cc_library(
deps = ["//tensorflow/lite/c:c_api_internal"],
)
cc_library(
name = "external_cpu_backend_context",
srcs = ["external_cpu_backend_context.cc"],
hdrs = ["external_cpu_backend_context.h"],
copts = TFLITE_DEFAULT_COPTS,
deps = [
"//tensorflow/lite/c:c_api_internal",
],
)
cc_library(
name = "graph_info",
hdrs = ["graph_info.h"],
@ -201,6 +211,7 @@ cc_library(
deps = [
":allocation",
":arena_planner",
":external_cpu_backend_context",
":graph_info",
":memory_planner",
":minimal_logging",

View File

@ -0,0 +1,38 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/external_cpu_backend_context.h"
namespace tflite {
namespace {
TfLiteStatus RefreshExternalCpuBackendContext(TfLiteContext* context) {
auto* const external_context = static_cast<ExternalCpuBackendContext*>(
context->GetExternalContext(context, kTfLiteCpuBackendContext));
if (external_context && external_context->internal_backend_context() &&
context->recommended_num_threads != -1) {
external_context->internal_backend_context()->set_max_num_threads(
context->recommended_num_threads);
}
return kTfLiteOk;
}
} // namespace
ExternalCpuBackendContext::ExternalCpuBackendContext()
: internal_backend_context_(nullptr) {
this->type = kTfLiteCpuBackendContext;
this->Refresh = RefreshExternalCpuBackendContext;
}
} // namespace tflite

View File

@ -0,0 +1,110 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
#define TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
#include <memory>
#include <utility>
#include "tensorflow/lite/c/c_api_internal.h"
namespace tflite {
// This is the base class for TF Lite internal backend contexts (like a
// RUY-based cpu backend context class). A derived internal backend context is
// generally a collection of utilities (i.e. a thread pool etc.) for TF Lite to
// use certain keneral libraries, such as Gemmlowp, RUY, etc., to implement TF
// Lite operators.
// TODO(b/130950871): Make this class as a interface-only abstract class.
class TfLiteInternalBackendContext {
public:
virtual ~TfLiteInternalBackendContext() {}
int max_num_threads() const { return max_num_threads_; }
virtual void set_max_num_threads(int max_num_threads) {
max_num_threads_ = max_num_threads;
}
protected:
TfLiteInternalBackendContext() {}
// The maximum number of threads used for parallelizing TfLite computation.
int max_num_threads_;
private:
TfLiteInternalBackendContext(const TfLiteInternalBackendContext&) = delete;
TfLiteInternalBackendContext& operator=(const TfLiteInternalBackendContext&) =
delete;
};
// This TfLiteExternalContext-derived class is the default
// 'kTfLiteCpuBackendContext'-typed context that's used internally in TF Lite
// framework. The primary purpose of having this class is to allow the same cpu
// backend context to be sharable among a set of TF Lite interpreters so that
// certain system costs are saved, like saving the cost of having multiple
// thread pools in each separate cpu backend context etc..
//
// Note: as of 2019/07/19, such context sharing among a set of interpreters will
// break the execution if these interpreters are invoked simultaneously. It
// works only when these context-sharing interpreters are invoked in a
// serialized way. Here's an example to illustrate the context sharing among 2
// TF Lite interpreters:
//
// TfLiteInternalBackendContext* global_ctxt = new ExternalCpuBackendContext();
// interpreter1 = /*...*/;
// interpreter1->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
// interpreter2 = /*...*/;
// interpreter2->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
//
// interpreter1->SetNumThreads(2);
// interpreter1->Invoke();
//
// interpreter2->SetNumThreads(4);
// interpreter2->Invoke();
//
// After sharing the context, calling 'SetNumThreads' on any of the
// context-sharing interpreters will have the global impact as it also refreshes
// the #thread info in the global cpu backend context (i.e. 'global_ctxt' above)
// that affects how much parallelism an interpreter invocation will use.
// Therefore, if different number of threads are used among different
// interpreters, don't call 'SetNumThreads' consectutively but call it
// separately between each interpreter's invocation as illustrated above.
class ExternalCpuBackendContext : public TfLiteExternalContext {
public:
ExternalCpuBackendContext();
~ExternalCpuBackendContext() {}
void set_internal_backend_context(
std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context) {
internal_backend_context_ = std::move(internal_backend_context);
}
TfLiteInternalBackendContext* internal_backend_context() const {
return internal_backend_context_.get();
}
private:
// Note the actual internal backend context object is lazily initialized.
std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context_;
ExternalCpuBackendContext(const ExternalCpuBackendContext&) = delete;
ExternalCpuBackendContext& operator=(const ExternalCpuBackendContext&) =
delete;
};
} // namespace tflite
#endif // TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_

View File

@ -71,6 +71,12 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
external_contexts_[i] = nullptr;
}
// This operation is cheap because we allocate the CPU context resources (i.e.
// threads) lazily.
own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
external_contexts_[kTfLiteCpuBackendContext] =
own_external_cpu_backend_context_.get();
UseNNAPI(false);
}
@ -78,6 +84,26 @@ Interpreter::~Interpreter() {}
void Interpreter::SetExternalContext(TfLiteExternalContextType type,
TfLiteExternalContext* ctx) {
if (ctx == own_external_cpu_backend_context_.get()) {
error_reporter_->Report(
"WARNING: The passed external context is identical to the internally "
"owned one.");
return;
}
// We have an internally owned external context of kTfLiteCpuBackendContext.
// If it's overwritten here, we will release the resource of the internally
// owned external context.
// Note: the 'max thread count' info associated with the overwritten context
// will be lost here, and such info is now detemined by the new context, thus
// affecting how much parallelism a TFLite op would have.
if (kTfLiteCpuBackendContext == type &&
external_contexts_[kTfLiteCpuBackendContext] ==
own_external_cpu_backend_context_.get()) {
own_external_cpu_backend_context_.reset();
}
// This essentially changes the "external_contexts_[type]".
primary_subgraph().SetExternalContext(type, ctx);
}

View File

@ -20,6 +20,7 @@ limitations under the License.
#include <complex>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <vector>
#include "tensorflow/lite/allocation.h"
@ -27,6 +28,7 @@ limitations under the License.
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/core/api/profiler.h"
#include "tensorflow/lite/core/subgraph.h"
#include "tensorflow/lite/external_cpu_backend_context.h"
#include "tensorflow/lite/memory_planner.h"
#include "tensorflow/lite/stderr_reporter.h"
@ -460,7 +462,9 @@ class Interpreter {
return op_reg.profiling_string(context_, node);
}
/// Set the value of an external context.
// Set the value of an external context. TFLite interpreter doesn't take the
// memory ownership of this external context 'ctx', and the context should
// outlive the TFLite interpreter.
void SetExternalContext(TfLiteExternalContextType type,
TfLiteExternalContext* ctx);
@ -526,6 +530,13 @@ class Interpreter {
// List of active external contexts.
TfLiteExternalContext* external_contexts_[kTfLiteMaxExternalContexts];
// The default external cpu backend context. After an TFLite interpreter is
// initialized, 'external_contexts_[kTfLiteCpuBackendContext]' is set to point
// to this object. However, if this element value is overwritten via calling
// 'SetExternalContext(kTfLiteCpuBackendContext, ...)', we will reset this to
// nullptr if necessary.
std::unique_ptr<ExternalCpuBackendContext> own_external_cpu_backend_context_;
// Subgraphs
std::vector<std::unique_ptr<Subgraph>> subgraphs_;
};

View File

@ -227,6 +227,7 @@ cc_library(
# gemmlowp_context_ and ruy_context_ members.
"//tensorflow/lite/experimental/ruy:context",
"@gemmlowp",
"//tensorflow/lite:external_cpu_backend_context",
],
)
@ -319,8 +320,8 @@ cc_library(
deps = [
":cpu_backend_context",
":op_macros",
"//tensorflow/lite:external_cpu_backend_context",
"//tensorflow/lite/c:c_api_internal",
"@gemmlowp",
],
)

View File

@ -21,7 +21,8 @@ limitations under the License.
namespace tflite {
CpuBackendContext::CpuBackendContext()
: ruy_context_(new ruy::Context),
: TfLiteInternalBackendContext(),
ruy_context_(new ruy::Context),
gemmlowp_context_(new gemmlowp::GemmContext) {
set_max_num_threads(1);
}

View File

@ -20,13 +20,14 @@ limitations under the License.
#include "public/gemmlowp.h"
#include "tensorflow/lite/experimental/ruy/context.h"
#include "tensorflow/lite/external_cpu_backend_context.h"
namespace tflite {
class CpuBackendContext final {
class CpuBackendContext final : public TfLiteInternalBackendContext {
public:
CpuBackendContext();
~CpuBackendContext();
~CpuBackendContext() override;
ruy::Context* ruy_context() const { return ruy_context_.get(); }
@ -44,10 +45,7 @@ class CpuBackendContext final {
//
// This value also gets propagated to back-ends, where it plays the same
// information-only role.
void set_max_num_threads(int max_num_threads);
// See set_max_num_threads.
int max_num_threads() const { return max_num_threads_; }
void set_max_num_threads(int max_num_threads) override;
private:
// To enable a smooth transition from the current direct usage
@ -59,9 +57,6 @@ class CpuBackendContext final {
const std::unique_ptr<ruy::Context> ruy_context_;
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;
// See set_max_num_threads.
int max_num_threads_;
CpuBackendContext(const CpuBackendContext&) = delete;
};

View File

@ -17,74 +17,43 @@ limitations under the License.
#include <memory>
#include "tensorflow/lite/c/c_api_internal.h"
#include "tensorflow/lite/external_cpu_backend_context.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/op_macros.h"
namespace tflite {
namespace cpu_backend_support {
namespace {
// TODO(b/130950871) we probably shouldn't be using any reference-counting
// but this is an existing idiom.
struct RefCountedCpuBackendContext : public TfLiteExternalContext {
std::unique_ptr<CpuBackendContext> cpu_backend_context;
int num_references = 0;
};
RefCountedCpuBackendContext* GetCpuBackendContext(TfLiteContext* context) {
return static_cast<RefCountedCpuBackendContext*>(
context->GetExternalContext(context, kTfLiteCpuBackendContext));
}
TfLiteStatus Refresh(TfLiteContext* context) {
auto* refcounted = GetCpuBackendContext(context);
if (refcounted != nullptr) {
refcounted->cpu_backend_context->set_max_num_threads(
context->recommended_num_threads);
}
return kTfLiteOk;
}
} // namespace
void IncrementUsageCounter(TfLiteContext* context) {
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
if (refcounted == nullptr) {
refcounted = new RefCountedCpuBackendContext;
refcounted->type = kTfLiteCpuBackendContext;
refcounted->Refresh = Refresh;
refcounted->cpu_backend_context.reset(new CpuBackendContext);
if (context->recommended_num_threads != -1) {
refcounted->cpu_backend_context->set_max_num_threads(
context->recommended_num_threads);
}
refcounted->num_references = 0;
context->SetExternalContext(context, kTfLiteCpuBackendContext, refcounted);
}
refcounted->num_references++;
}
void DecrementUsageCounter(TfLiteContext* context) {
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
if (refcounted == nullptr) {
TF_LITE_FATAL(
"Call to DecrementUsageCounter() not preceded by "
"IncrementUsageCounter()");
}
if (--refcounted->num_references == 0) {
delete refcounted;
context->SetExternalContext(context, kTfLiteCpuBackendContext, nullptr);
}
}
// TODO(b/130950871): Remove all refrences to the following two no-op functions
// once the new ExternalCpuBackendContext class is checked in.
void IncrementUsageCounter(TfLiteContext* context) {}
void DecrementUsageCounter(TfLiteContext* context) {}
CpuBackendContext* GetFromContext(TfLiteContext* context) {
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
if (refcounted == nullptr) {
auto* external_context = static_cast<ExternalCpuBackendContext*>(
context->GetExternalContext(context, kTfLiteCpuBackendContext));
if (external_context == nullptr) {
TF_LITE_FATAL(
"Call to GetFromContext() not preceded by IncrementUsageCounter()");
"ExternalCpuBackendContext isn't properly initialized during TFLite "
"interpreter initialization.");
}
return refcounted->cpu_backend_context.get();
auto* cpu_backend_context = static_cast<CpuBackendContext*>(
external_context->internal_backend_context());
if (cpu_backend_context == nullptr) {
// We do the lazy initialization here for the TfLiteInternalBackendContext
// that's wrapped inside ExternalCpuBackendContext.
cpu_backend_context = new CpuBackendContext();
if (context->recommended_num_threads != -1) {
cpu_backend_context->set_max_num_threads(
context->recommended_num_threads);
}
external_context->set_internal_backend_context(
std::unique_ptr<TfLiteInternalBackendContext>(cpu_backend_context));
}
return cpu_backend_context;
}
} // namespace cpu_backend_support