Use a simpler external Cpu backend context class to replace the actual functionalities of the existing ref-counted cpu backend context class.
PiperOrigin-RevId: 259252521
This commit is contained in:
parent
96d0f42d1b
commit
b4e5625437
@ -94,6 +94,16 @@ cc_library(
|
||||
deps = ["//tensorflow/lite/c:c_api_internal"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "external_cpu_backend_context",
|
||||
srcs = ["external_cpu_backend_context.cc"],
|
||||
hdrs = ["external_cpu_backend_context.h"],
|
||||
copts = TFLITE_DEFAULT_COPTS,
|
||||
deps = [
|
||||
"//tensorflow/lite/c:c_api_internal",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "graph_info",
|
||||
hdrs = ["graph_info.h"],
|
||||
@ -201,6 +211,7 @@ cc_library(
|
||||
deps = [
|
||||
":allocation",
|
||||
":arena_planner",
|
||||
":external_cpu_backend_context",
|
||||
":graph_info",
|
||||
":memory_planner",
|
||||
":minimal_logging",
|
||||
|
38
tensorflow/lite/external_cpu_backend_context.cc
Normal file
38
tensorflow/lite/external_cpu_backend_context.cc
Normal file
@ -0,0 +1,38 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/external_cpu_backend_context.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace {
|
||||
|
||||
TfLiteStatus RefreshExternalCpuBackendContext(TfLiteContext* context) {
|
||||
auto* const external_context = static_cast<ExternalCpuBackendContext*>(
|
||||
context->GetExternalContext(context, kTfLiteCpuBackendContext));
|
||||
if (external_context && external_context->internal_backend_context() &&
|
||||
context->recommended_num_threads != -1) {
|
||||
external_context->internal_backend_context()->set_max_num_threads(
|
||||
context->recommended_num_threads);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
ExternalCpuBackendContext::ExternalCpuBackendContext()
|
||||
: internal_backend_context_(nullptr) {
|
||||
this->type = kTfLiteCpuBackendContext;
|
||||
this->Refresh = RefreshExternalCpuBackendContext;
|
||||
}
|
||||
|
||||
} // namespace tflite
|
110
tensorflow/lite/external_cpu_backend_context.h
Normal file
110
tensorflow/lite/external_cpu_backend_context.h
Normal file
@ -0,0 +1,110 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
|
||||
#define TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "tensorflow/lite/c/c_api_internal.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
// This is the base class for TF Lite internal backend contexts (like a
|
||||
// RUY-based cpu backend context class). A derived internal backend context is
|
||||
// generally a collection of utilities (i.e. a thread pool etc.) for TF Lite to
|
||||
// use certain keneral libraries, such as Gemmlowp, RUY, etc., to implement TF
|
||||
// Lite operators.
|
||||
// TODO(b/130950871): Make this class as a interface-only abstract class.
|
||||
class TfLiteInternalBackendContext {
|
||||
public:
|
||||
virtual ~TfLiteInternalBackendContext() {}
|
||||
|
||||
int max_num_threads() const { return max_num_threads_; }
|
||||
|
||||
virtual void set_max_num_threads(int max_num_threads) {
|
||||
max_num_threads_ = max_num_threads;
|
||||
}
|
||||
|
||||
protected:
|
||||
TfLiteInternalBackendContext() {}
|
||||
|
||||
// The maximum number of threads used for parallelizing TfLite computation.
|
||||
int max_num_threads_;
|
||||
|
||||
private:
|
||||
TfLiteInternalBackendContext(const TfLiteInternalBackendContext&) = delete;
|
||||
TfLiteInternalBackendContext& operator=(const TfLiteInternalBackendContext&) =
|
||||
delete;
|
||||
};
|
||||
|
||||
// This TfLiteExternalContext-derived class is the default
|
||||
// 'kTfLiteCpuBackendContext'-typed context that's used internally in TF Lite
|
||||
// framework. The primary purpose of having this class is to allow the same cpu
|
||||
// backend context to be sharable among a set of TF Lite interpreters so that
|
||||
// certain system costs are saved, like saving the cost of having multiple
|
||||
// thread pools in each separate cpu backend context etc..
|
||||
//
|
||||
// Note: as of 2019/07/19, such context sharing among a set of interpreters will
|
||||
// break the execution if these interpreters are invoked simultaneously. It
|
||||
// works only when these context-sharing interpreters are invoked in a
|
||||
// serialized way. Here's an example to illustrate the context sharing among 2
|
||||
// TF Lite interpreters:
|
||||
//
|
||||
// TfLiteInternalBackendContext* global_ctxt = new ExternalCpuBackendContext();
|
||||
// interpreter1 = /*...*/;
|
||||
// interpreter1->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
|
||||
// interpreter2 = /*...*/;
|
||||
// interpreter2->SetExternalContext(kTfLiteCpuBackendContext, global_ctxt);
|
||||
//
|
||||
// interpreter1->SetNumThreads(2);
|
||||
// interpreter1->Invoke();
|
||||
//
|
||||
// interpreter2->SetNumThreads(4);
|
||||
// interpreter2->Invoke();
|
||||
//
|
||||
// After sharing the context, calling 'SetNumThreads' on any of the
|
||||
// context-sharing interpreters will have the global impact as it also refreshes
|
||||
// the #thread info in the global cpu backend context (i.e. 'global_ctxt' above)
|
||||
// that affects how much parallelism an interpreter invocation will use.
|
||||
// Therefore, if different number of threads are used among different
|
||||
// interpreters, don't call 'SetNumThreads' consectutively but call it
|
||||
// separately between each interpreter's invocation as illustrated above.
|
||||
class ExternalCpuBackendContext : public TfLiteExternalContext {
|
||||
public:
|
||||
ExternalCpuBackendContext();
|
||||
~ExternalCpuBackendContext() {}
|
||||
|
||||
void set_internal_backend_context(
|
||||
std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context) {
|
||||
internal_backend_context_ = std::move(internal_backend_context);
|
||||
}
|
||||
|
||||
TfLiteInternalBackendContext* internal_backend_context() const {
|
||||
return internal_backend_context_.get();
|
||||
}
|
||||
|
||||
private:
|
||||
// Note the actual internal backend context object is lazily initialized.
|
||||
std::unique_ptr<TfLiteInternalBackendContext> internal_backend_context_;
|
||||
|
||||
ExternalCpuBackendContext(const ExternalCpuBackendContext&) = delete;
|
||||
ExternalCpuBackendContext& operator=(const ExternalCpuBackendContext&) =
|
||||
delete;
|
||||
};
|
||||
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_EXTERNAL_CPU_BACKEND_CONTEXT_H_
|
@ -71,6 +71,12 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
|
||||
external_contexts_[i] = nullptr;
|
||||
}
|
||||
|
||||
// This operation is cheap because we allocate the CPU context resources (i.e.
|
||||
// threads) lazily.
|
||||
own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
|
||||
external_contexts_[kTfLiteCpuBackendContext] =
|
||||
own_external_cpu_backend_context_.get();
|
||||
|
||||
UseNNAPI(false);
|
||||
}
|
||||
|
||||
@ -78,6 +84,26 @@ Interpreter::~Interpreter() {}
|
||||
|
||||
void Interpreter::SetExternalContext(TfLiteExternalContextType type,
|
||||
TfLiteExternalContext* ctx) {
|
||||
if (ctx == own_external_cpu_backend_context_.get()) {
|
||||
error_reporter_->Report(
|
||||
"WARNING: The passed external context is identical to the internally "
|
||||
"owned one.");
|
||||
return;
|
||||
}
|
||||
|
||||
// We have an internally owned external context of kTfLiteCpuBackendContext.
|
||||
// If it's overwritten here, we will release the resource of the internally
|
||||
// owned external context.
|
||||
// Note: the 'max thread count' info associated with the overwritten context
|
||||
// will be lost here, and such info is now detemined by the new context, thus
|
||||
// affecting how much parallelism a TFLite op would have.
|
||||
if (kTfLiteCpuBackendContext == type &&
|
||||
external_contexts_[kTfLiteCpuBackendContext] ==
|
||||
own_external_cpu_backend_context_.get()) {
|
||||
own_external_cpu_backend_context_.reset();
|
||||
}
|
||||
|
||||
// This essentially changes the "external_contexts_[type]".
|
||||
primary_subgraph().SetExternalContext(type, ctx);
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <complex>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/allocation.h"
|
||||
@ -27,6 +28,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||
#include "tensorflow/lite/core/api/profiler.h"
|
||||
#include "tensorflow/lite/core/subgraph.h"
|
||||
#include "tensorflow/lite/external_cpu_backend_context.h"
|
||||
#include "tensorflow/lite/memory_planner.h"
|
||||
#include "tensorflow/lite/stderr_reporter.h"
|
||||
|
||||
@ -460,7 +462,9 @@ class Interpreter {
|
||||
return op_reg.profiling_string(context_, node);
|
||||
}
|
||||
|
||||
/// Set the value of an external context.
|
||||
// Set the value of an external context. TFLite interpreter doesn't take the
|
||||
// memory ownership of this external context 'ctx', and the context should
|
||||
// outlive the TFLite interpreter.
|
||||
void SetExternalContext(TfLiteExternalContextType type,
|
||||
TfLiteExternalContext* ctx);
|
||||
|
||||
@ -526,6 +530,13 @@ class Interpreter {
|
||||
// List of active external contexts.
|
||||
TfLiteExternalContext* external_contexts_[kTfLiteMaxExternalContexts];
|
||||
|
||||
// The default external cpu backend context. After an TFLite interpreter is
|
||||
// initialized, 'external_contexts_[kTfLiteCpuBackendContext]' is set to point
|
||||
// to this object. However, if this element value is overwritten via calling
|
||||
// 'SetExternalContext(kTfLiteCpuBackendContext, ...)', we will reset this to
|
||||
// nullptr if necessary.
|
||||
std::unique_ptr<ExternalCpuBackendContext> own_external_cpu_backend_context_;
|
||||
|
||||
// Subgraphs
|
||||
std::vector<std::unique_ptr<Subgraph>> subgraphs_;
|
||||
};
|
||||
|
@ -227,6 +227,7 @@ cc_library(
|
||||
# gemmlowp_context_ and ruy_context_ members.
|
||||
"//tensorflow/lite/experimental/ruy:context",
|
||||
"@gemmlowp",
|
||||
"//tensorflow/lite:external_cpu_backend_context",
|
||||
],
|
||||
)
|
||||
|
||||
@ -319,8 +320,8 @@ cc_library(
|
||||
deps = [
|
||||
":cpu_backend_context",
|
||||
":op_macros",
|
||||
"//tensorflow/lite:external_cpu_backend_context",
|
||||
"//tensorflow/lite/c:c_api_internal",
|
||||
"@gemmlowp",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -21,7 +21,8 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
|
||||
CpuBackendContext::CpuBackendContext()
|
||||
: ruy_context_(new ruy::Context),
|
||||
: TfLiteInternalBackendContext(),
|
||||
ruy_context_(new ruy::Context),
|
||||
gemmlowp_context_(new gemmlowp::GemmContext) {
|
||||
set_max_num_threads(1);
|
||||
}
|
||||
|
@ -20,13 +20,14 @@ limitations under the License.
|
||||
|
||||
#include "public/gemmlowp.h"
|
||||
#include "tensorflow/lite/experimental/ruy/context.h"
|
||||
#include "tensorflow/lite/external_cpu_backend_context.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
class CpuBackendContext final {
|
||||
class CpuBackendContext final : public TfLiteInternalBackendContext {
|
||||
public:
|
||||
CpuBackendContext();
|
||||
~CpuBackendContext();
|
||||
~CpuBackendContext() override;
|
||||
|
||||
ruy::Context* ruy_context() const { return ruy_context_.get(); }
|
||||
|
||||
@ -44,10 +45,7 @@ class CpuBackendContext final {
|
||||
//
|
||||
// This value also gets propagated to back-ends, where it plays the same
|
||||
// information-only role.
|
||||
void set_max_num_threads(int max_num_threads);
|
||||
|
||||
// See set_max_num_threads.
|
||||
int max_num_threads() const { return max_num_threads_; }
|
||||
void set_max_num_threads(int max_num_threads) override;
|
||||
|
||||
private:
|
||||
// To enable a smooth transition from the current direct usage
|
||||
@ -59,9 +57,6 @@ class CpuBackendContext final {
|
||||
const std::unique_ptr<ruy::Context> ruy_context_;
|
||||
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;
|
||||
|
||||
// See set_max_num_threads.
|
||||
int max_num_threads_;
|
||||
|
||||
CpuBackendContext(const CpuBackendContext&) = delete;
|
||||
};
|
||||
|
||||
|
@ -17,74 +17,43 @@ limitations under the License.
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/c/c_api_internal.h"
|
||||
#include "tensorflow/lite/external_cpu_backend_context.h"
|
||||
#include "tensorflow/lite/kernels/cpu_backend_context.h"
|
||||
#include "tensorflow/lite/kernels/op_macros.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace cpu_backend_support {
|
||||
|
||||
namespace {
|
||||
|
||||
// TODO(b/130950871) we probably shouldn't be using any reference-counting
|
||||
// but this is an existing idiom.
|
||||
struct RefCountedCpuBackendContext : public TfLiteExternalContext {
|
||||
std::unique_ptr<CpuBackendContext> cpu_backend_context;
|
||||
int num_references = 0;
|
||||
};
|
||||
|
||||
RefCountedCpuBackendContext* GetCpuBackendContext(TfLiteContext* context) {
|
||||
return static_cast<RefCountedCpuBackendContext*>(
|
||||
context->GetExternalContext(context, kTfLiteCpuBackendContext));
|
||||
}
|
||||
|
||||
TfLiteStatus Refresh(TfLiteContext* context) {
|
||||
auto* refcounted = GetCpuBackendContext(context);
|
||||
if (refcounted != nullptr) {
|
||||
refcounted->cpu_backend_context->set_max_num_threads(
|
||||
context->recommended_num_threads);
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void IncrementUsageCounter(TfLiteContext* context) {
|
||||
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
|
||||
if (refcounted == nullptr) {
|
||||
refcounted = new RefCountedCpuBackendContext;
|
||||
refcounted->type = kTfLiteCpuBackendContext;
|
||||
refcounted->Refresh = Refresh;
|
||||
refcounted->cpu_backend_context.reset(new CpuBackendContext);
|
||||
if (context->recommended_num_threads != -1) {
|
||||
refcounted->cpu_backend_context->set_max_num_threads(
|
||||
context->recommended_num_threads);
|
||||
}
|
||||
refcounted->num_references = 0;
|
||||
context->SetExternalContext(context, kTfLiteCpuBackendContext, refcounted);
|
||||
}
|
||||
refcounted->num_references++;
|
||||
}
|
||||
|
||||
void DecrementUsageCounter(TfLiteContext* context) {
|
||||
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
|
||||
if (refcounted == nullptr) {
|
||||
TF_LITE_FATAL(
|
||||
"Call to DecrementUsageCounter() not preceded by "
|
||||
"IncrementUsageCounter()");
|
||||
}
|
||||
if (--refcounted->num_references == 0) {
|
||||
delete refcounted;
|
||||
context->SetExternalContext(context, kTfLiteCpuBackendContext, nullptr);
|
||||
}
|
||||
}
|
||||
// TODO(b/130950871): Remove all refrences to the following two no-op functions
|
||||
// once the new ExternalCpuBackendContext class is checked in.
|
||||
void IncrementUsageCounter(TfLiteContext* context) {}
|
||||
void DecrementUsageCounter(TfLiteContext* context) {}
|
||||
|
||||
CpuBackendContext* GetFromContext(TfLiteContext* context) {
|
||||
RefCountedCpuBackendContext* refcounted = GetCpuBackendContext(context);
|
||||
if (refcounted == nullptr) {
|
||||
auto* external_context = static_cast<ExternalCpuBackendContext*>(
|
||||
context->GetExternalContext(context, kTfLiteCpuBackendContext));
|
||||
|
||||
if (external_context == nullptr) {
|
||||
TF_LITE_FATAL(
|
||||
"Call to GetFromContext() not preceded by IncrementUsageCounter()");
|
||||
"ExternalCpuBackendContext isn't properly initialized during TFLite "
|
||||
"interpreter initialization.");
|
||||
}
|
||||
return refcounted->cpu_backend_context.get();
|
||||
|
||||
auto* cpu_backend_context = static_cast<CpuBackendContext*>(
|
||||
external_context->internal_backend_context());
|
||||
if (cpu_backend_context == nullptr) {
|
||||
// We do the lazy initialization here for the TfLiteInternalBackendContext
|
||||
// that's wrapped inside ExternalCpuBackendContext.
|
||||
cpu_backend_context = new CpuBackendContext();
|
||||
if (context->recommended_num_threads != -1) {
|
||||
cpu_backend_context->set_max_num_threads(
|
||||
context->recommended_num_threads);
|
||||
}
|
||||
external_context->set_internal_backend_context(
|
||||
std::unique_ptr<TfLiteInternalBackendContext>(cpu_backend_context));
|
||||
}
|
||||
|
||||
return cpu_backend_context;
|
||||
}
|
||||
|
||||
} // namespace cpu_backend_support
|
||||
|
Loading…
Reference in New Issue
Block a user