983 lines
34 KiB
C++
983 lines
34 KiB
C++
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#include "tensorflow/lite/delegates/gpu/cl/api.h"
|
|
|
|
#ifndef CL_DELEGATE_NO_GL
|
|
#define CL_DELEGATE_ALLOW_GL
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
|
|
#include "absl/memory/memory.h"
|
|
#include "absl/types/span.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/environment.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
|
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
#include <EGL/eglext.h>
|
|
|
|
#include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
|
|
#include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
|
|
#endif
|
|
|
|
namespace tflite {
|
|
namespace gpu {
|
|
namespace cl {
|
|
namespace {
|
|
|
|
// Both internal and external defs are identical, therefore nothing to connect
|
|
// here.
|
|
class NoopTensorTie : public TensorTie {
|
|
public:
|
|
NoopTensorTie(const TensorTieDef& def, TensorObject obj)
|
|
: TensorTie(def), obj_(obj) {}
|
|
|
|
static bool IsSupported(const TensorTieDef& def) {
|
|
return def.external_def == def.internal_def;
|
|
}
|
|
|
|
absl::Status SetExternalObject(TensorObject obj) final {
|
|
if (!def().external_def.object_def.user_provided) {
|
|
return absl::InvalidArgumentError("Tensor object is readonly.");
|
|
}
|
|
if (!IsValid(def().external_def, obj)) {
|
|
return absl::InvalidArgumentError("Given object is not valid");
|
|
}
|
|
obj_ = obj;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
TensorObject GetExternalObject() final { return obj_; }
|
|
|
|
absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
|
|
|
|
absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
|
|
|
|
private:
|
|
TensorObject obj_;
|
|
};
|
|
|
|
// Does one-step conversion between internal and external objects.
|
|
// It may also allocate external objects if requested.
|
|
class DefaultTensorTie : public TensorTie {
|
|
public:
|
|
DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
|
|
: TensorTie(def), internal_obj_(internal_obj) {}
|
|
|
|
static bool IsSupported(
|
|
const TensorTieDef& def,
|
|
const TensorObjectConverterBuilder& converter_builder) {
|
|
auto object_type = def.external_def.object_def.object_type;
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (def.external_def.object_def.user_provided &&
|
|
GlClBufferCopier::IsSupported(def.external_def.object_def,
|
|
def.internal_def.object_def)) {
|
|
return true;
|
|
}
|
|
#endif
|
|
return (object_type == ObjectType::OPENCL_BUFFER ||
|
|
object_type == ObjectType::OPENCL_TEXTURE ||
|
|
object_type == ObjectType::CPU_MEMORY) &&
|
|
converter_builder.IsSupported(def.internal_def, def.external_def) &&
|
|
converter_builder.IsSupported(def.external_def, def.internal_def);
|
|
}
|
|
|
|
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
|
|
TensorObjectConverterBuilder* converter_builder,
|
|
Environment* env, std::unique_ptr<TensorTie>* tie) {
|
|
auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
|
|
RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
|
|
*tie = std::move(tie_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status CopyToExternalObject() final {
|
|
if (!converter_to_) {
|
|
return absl::UnavailableError("Conversion is not available");
|
|
}
|
|
return converter_to_->Convert(internal_obj_, GetExternalObject());
|
|
}
|
|
|
|
absl::Status CopyFromExternalObject() final {
|
|
if (!converter_from_) {
|
|
return absl::UnavailableError("Conversion is not available");
|
|
}
|
|
return converter_from_->Convert(GetExternalObject(), internal_obj_);
|
|
}
|
|
|
|
absl::Status SetExternalObject(TensorObject obj) final {
|
|
if (!def().external_def.object_def.user_provided) {
|
|
return absl::InvalidArgumentError("External object is read-only");
|
|
}
|
|
if (!IsValid(def().external_def, obj)) {
|
|
return absl::InvalidArgumentError("Given object is not valid");
|
|
}
|
|
external_obj_ = obj;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
TensorObject GetExternalObject() final { return external_obj_; }
|
|
|
|
private:
|
|
absl::Status Init(TensorObjectConverterBuilder* converter_builder,
|
|
Environment* env) {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (def().external_def.object_def.user_provided &&
|
|
GlClBufferCopier::IsSupported(def().external_def.object_def,
|
|
def().internal_def.object_def)) {
|
|
converter_from_ = absl::make_unique<GlClBufferCopier>(
|
|
def().internal_def, def().external_def, env);
|
|
} else {
|
|
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
|
def().external_def, def().internal_def, &converter_from_));
|
|
}
|
|
if (def().external_def.object_def.user_provided &&
|
|
GlClBufferCopier::IsSupported(def().internal_def.object_def,
|
|
def().external_def.object_def)) {
|
|
converter_to_ = absl::make_unique<GlClBufferCopier>(
|
|
def().internal_def, def().external_def, env);
|
|
} else {
|
|
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
|
def().internal_def, def().external_def, &converter_to_));
|
|
}
|
|
#else
|
|
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
|
def().external_def, def().internal_def, &converter_from_));
|
|
RETURN_IF_ERROR(converter_builder->MakeConverter(
|
|
def().internal_def, def().external_def, &converter_to_));
|
|
#endif
|
|
return MaybeAllocateExternalObject(env);
|
|
}
|
|
|
|
absl::Status MaybeAllocateExternalObject(Environment* env) {
|
|
const TensorObjectDef& d = def().external_def;
|
|
if (d.object_def.user_provided) {
|
|
return absl::OkStatus();
|
|
}
|
|
switch (d.object_def.object_type) {
|
|
case ObjectType::CPU_MEMORY: {
|
|
size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
|
|
cpu_memory_.resize(bytes_size);
|
|
external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
|
|
break;
|
|
}
|
|
case ObjectType::OPENCL_TEXTURE:
|
|
case ObjectType::OPENCL_BUFFER: {
|
|
auto& dims = d.dimensions;
|
|
const BHWC shape(dims.b, dims.h, dims.w, dims.c);
|
|
const TensorDescriptor desc{
|
|
d.object_def.data_type,
|
|
ToTensorStorageType(d.object_def.object_type,
|
|
d.object_def.data_layout),
|
|
Layout::BHWC};
|
|
RETURN_IF_ERROR(
|
|
AllocateTensorMemory(env->context(), shape, desc, &cl_memory_));
|
|
if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
|
|
external_obj_ = OpenClTexture{cl_memory_.memory()};
|
|
} else {
|
|
external_obj_ = OpenClBuffer{cl_memory_.memory()};
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
return absl::InternalError("Unexpected object type");
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
const TensorObject internal_obj_;
|
|
TensorObject external_obj_;
|
|
CLMemory cl_memory_;
|
|
std::vector<uint8_t> cpu_memory_;
|
|
std::unique_ptr<TensorObjectConverter> converter_to_;
|
|
std::unique_ptr<TensorObjectConverter> converter_from_;
|
|
};
|
|
|
|
// Copies data to intermediate OpenCL buffer and then does two step conversion.
|
|
// It drives the following cases were one-step conversion is not supported:
|
|
// - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
|
|
class TwoStepTensorTie : public TensorTie {
|
|
public:
|
|
explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
|
|
|
|
static bool IsSupported(
|
|
const TensorTieDef& def,
|
|
const TensorObjectConverterBuilder& converter_builder) {
|
|
auto defs = MakeOuterInnerDefs(def);
|
|
return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
|
|
DefaultTensorTie::IsSupported(defs.second, converter_builder);
|
|
}
|
|
|
|
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
|
|
TensorObjectConverterBuilder* converter_builder,
|
|
Environment* env, std::unique_ptr<TensorTie>* tie) {
|
|
auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
|
|
RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
|
|
*tie = std::move(tie_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status CopyToExternalObject() final {
|
|
RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
|
|
return outer_tie_->CopyToExternalObject();
|
|
}
|
|
|
|
absl::Status CopyFromExternalObject() final {
|
|
RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
|
|
return inner_tie_->CopyFromExternalObject();
|
|
}
|
|
|
|
absl::Status SetExternalObject(TensorObject obj) final {
|
|
return outer_tie_->SetExternalObject(obj);
|
|
}
|
|
|
|
TensorObject GetExternalObject() final {
|
|
return outer_tie_->GetExternalObject();
|
|
}
|
|
|
|
private:
|
|
static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
|
|
const TensorTieDef& def) {
|
|
TensorTieDef outer_def;
|
|
outer_def.external_def = def.external_def;
|
|
outer_def.internal_def = def.external_def;
|
|
outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
|
|
outer_def.internal_def.object_def.user_provided = true;
|
|
|
|
TensorTieDef inner_def;
|
|
inner_def.external_def = outer_def.internal_def;
|
|
inner_def.external_def.object_def.user_provided = false;
|
|
inner_def.internal_def = def.internal_def;
|
|
return std::make_pair(outer_def, inner_def);
|
|
}
|
|
|
|
absl::Status Init(TensorObject internal_object,
|
|
TensorObjectConverterBuilder* converter_builder,
|
|
Environment* env) {
|
|
auto defs = MakeOuterInnerDefs(def());
|
|
RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
|
|
converter_builder, env, &inner_tie_));
|
|
return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
|
|
converter_builder, env, &outer_tie_);
|
|
}
|
|
|
|
std::unique_ptr<TensorTie> inner_tie_;
|
|
std::unique_ptr<TensorTie> outer_tie_;
|
|
};
|
|
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
// Captures GL object into CL context before performing a conversion.
|
|
class GlBufferHolder : public TensorTie {
|
|
public:
|
|
GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
|
|
Environment* env)
|
|
: TensorTie(def),
|
|
gl_interop_fabric_(gl_interop_fabric),
|
|
environment_(env) {}
|
|
|
|
static bool IsSupported(
|
|
const TensorTieDef& def,
|
|
const TensorObjectConverterBuilder& converter_builder) {
|
|
if (!def.external_def.object_def.user_provided ||
|
|
def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
|
|
return false;
|
|
}
|
|
return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
|
|
}
|
|
|
|
static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
|
|
TensorObjectConverterBuilder* converter_builder,
|
|
GlInteropFabric* gl_interop_fabric, Environment* env,
|
|
std::unique_ptr<TensorTie>* tie) {
|
|
auto tie_impl =
|
|
absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
|
|
RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
|
|
converter_builder, env,
|
|
&tie_impl->tie_));
|
|
*tie = std::move(tie_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SetExternalObject(TensorObject obj) final {
|
|
auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
|
|
if (!ssbo) {
|
|
return absl::InvalidArgumentError("Missing OpenGL SSBO");
|
|
}
|
|
auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
|
|
if (old_ssbo && ssbo->id == old_ssbo->id) {
|
|
return absl::OkStatus();
|
|
}
|
|
if (cl_object_.memory()) {
|
|
gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
|
|
}
|
|
RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
|
|
ssbo->id, def().access_type, &environment_->context(), &cl_object_));
|
|
external_obj_ = obj;
|
|
RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
|
|
gl_interop_fabric_->RegisterMemory(cl_object_.memory());
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
TensorObject GetExternalObject() final { return external_obj_; }
|
|
|
|
absl::Status CopyFromExternalObject() final {
|
|
return tie_->CopyFromExternalObject();
|
|
}
|
|
|
|
absl::Status CopyToExternalObject() final {
|
|
return tie_->CopyToExternalObject();
|
|
}
|
|
|
|
private:
|
|
static TensorTieDef MakeClDef(const TensorTieDef& def) {
|
|
auto cl_def = def;
|
|
cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
|
|
cl_def.external_def.object_def.user_provided = true;
|
|
return cl_def;
|
|
}
|
|
|
|
CLMemory cl_object_;
|
|
GlInteropFabric* gl_interop_fabric_;
|
|
Environment* environment_;
|
|
std::unique_ptr<TensorTie> tie_;
|
|
TensorObject external_obj_;
|
|
};
|
|
#endif
|
|
|
|
TensorObject TensorToObj(const Tensor& tensor) {
|
|
if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
|
|
return OpenClBuffer{tensor.GetMemoryPtr()};
|
|
}
|
|
if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
|
|
return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
|
|
}
|
|
return OpenClTexture{tensor.GetMemoryPtr()};
|
|
}
|
|
|
|
// Responsible for creating new tensor objects.
|
|
class TensorTieFactory {
|
|
public:
|
|
TensorTieFactory(Environment* env, InferenceContext* context
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
,
|
|
GlInteropFabric* gl_interop_fabric
|
|
#endif
|
|
)
|
|
: env_(*env),
|
|
context_(*context),
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
gl_interop_fabric_(gl_interop_fabric),
|
|
#endif
|
|
converter_builder_(NewConverterBuilder(env)) {
|
|
}
|
|
|
|
bool IsSupported(const TensorTieDef& def) const {
|
|
return IsValid(def.external_def.object_def) &&
|
|
(NoopTensorTie::IsSupported(def) ||
|
|
DefaultTensorTie::IsSupported(def, *converter_builder_) ||
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
(gl_interop_fabric_ &&
|
|
GlBufferHolder::IsSupported(def, *converter_builder_)) ||
|
|
#endif
|
|
TwoStepTensorTie::IsSupported(def, *converter_builder_));
|
|
}
|
|
|
|
absl::Status NewTensorTie(const TensorTieDef& def,
|
|
std::unique_ptr<TensorTie>* tie) {
|
|
TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
|
|
auto converter = converter_builder_.get();
|
|
if (NoopTensorTie::IsSupported(def)) {
|
|
*tie = absl::make_unique<NoopTensorTie>(def, internal_object);
|
|
return absl::OkStatus();
|
|
}
|
|
if (DefaultTensorTie::IsSupported(def, *converter)) {
|
|
return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
|
|
}
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
|
|
return GlBufferHolder::New(def, internal_object, converter,
|
|
gl_interop_fabric_, &env_, tie);
|
|
}
|
|
#endif
|
|
if (TwoStepTensorTie::IsSupported(def, *converter)) {
|
|
return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
|
|
}
|
|
return absl::UnimplementedError("Unsupported tensor tie definition.");
|
|
}
|
|
|
|
private:
|
|
Environment& env_;
|
|
InferenceContext& context_;
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
GlInteropFabric* gl_interop_fabric_;
|
|
#endif
|
|
std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
|
|
};
|
|
|
|
class InferenceRunnerImpl : public InferenceRunner {
|
|
public:
|
|
InferenceRunnerImpl(Environment* environment,
|
|
std::unique_ptr<InferenceContext> context
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
,
|
|
std::unique_ptr<GlInteropFabric> gl_interop_fabric
|
|
#endif
|
|
)
|
|
: queue_(environment->queue()),
|
|
context_(std::move(context))
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
,
|
|
gl_interop_fabric_(std::move(gl_interop_fabric))
|
|
#endif
|
|
{
|
|
}
|
|
|
|
absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
|
|
const std::vector<TensorTieDef>& outputs,
|
|
TensorTieFactory* factory) {
|
|
RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
|
|
return LinkTensors(outputs, factory, &outputs_);
|
|
}
|
|
|
|
std::vector<TensorObjectDef> inputs() const override {
|
|
return GetExternalDefinitions(inputs_);
|
|
}
|
|
|
|
std::vector<TensorObjectDef> outputs() const override {
|
|
return GetExternalDefinitions(outputs_);
|
|
}
|
|
|
|
absl::Status GetInputObject(int index, TensorObject* object) override {
|
|
if (index < 0 || index >= inputs_.size()) {
|
|
return absl::OutOfRangeError("Index is out of range");
|
|
}
|
|
*object = inputs_[index]->GetExternalObject();
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status GetOutputObject(int index, TensorObject* object) override {
|
|
if (index < 0 || index >= outputs_.size()) {
|
|
return absl::OutOfRangeError("Index is out of range");
|
|
}
|
|
*object = outputs_[index]->GetExternalObject();
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SetInputObject(int index, TensorObject object) override {
|
|
if (index < 0 || index >= inputs_.size()) {
|
|
return absl::OutOfRangeError("Input index is out of range");
|
|
}
|
|
return inputs_[index]->SetExternalObject(object);
|
|
}
|
|
|
|
absl::Status SetOutputObject(int index, TensorObject object) override {
|
|
if (index < 0 || index >= outputs_.size()) {
|
|
return absl::OutOfRangeError("Output index is out of range");
|
|
}
|
|
return outputs_[index]->SetExternalObject(object);
|
|
}
|
|
|
|
absl::Status Run() override {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (gl_interop_fabric_) {
|
|
RETURN_IF_ERROR(gl_interop_fabric_->Start());
|
|
}
|
|
#endif
|
|
for (auto& obj : inputs_) {
|
|
RETURN_IF_ERROR(obj->CopyFromExternalObject());
|
|
}
|
|
RETURN_IF_ERROR(context_->AddToQueue(queue_));
|
|
clFlush(queue_->queue());
|
|
for (auto& obj : outputs_) {
|
|
RETURN_IF_ERROR(obj->CopyToExternalObject());
|
|
}
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (gl_interop_fabric_) {
|
|
RETURN_IF_ERROR(gl_interop_fabric_->Finish());
|
|
}
|
|
#endif
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
private:
|
|
static absl::Status LinkTensors(
|
|
const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
|
|
std::vector<std::unique_ptr<TensorTie>>* objects) {
|
|
objects->reserve(defs.size());
|
|
for (auto& def : defs) {
|
|
std::unique_ptr<TensorTie> object;
|
|
RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
|
|
objects->push_back(std::move(object));
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
static std::vector<TensorObjectDef> GetExternalDefinitions(
|
|
const std::vector<std::unique_ptr<TensorTie>>& objects) {
|
|
std::vector<TensorObjectDef> defs;
|
|
defs.reserve(objects.size());
|
|
for (auto& obj : objects) {
|
|
defs.push_back(obj->def().external_def);
|
|
}
|
|
return defs;
|
|
}
|
|
|
|
CLCommandQueue* queue_;
|
|
std::unique_ptr<InferenceContext> context_;
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
|
|
#endif
|
|
std::vector<std::unique_ptr<TensorTie>> inputs_;
|
|
std::vector<std::unique_ptr<TensorTie>> outputs_;
|
|
};
|
|
|
|
TensorObjectDef TensorToDef(const Tensor& tensor) {
|
|
TensorObjectDef def;
|
|
def.dimensions.b = tensor.Batch();
|
|
def.dimensions.h = tensor.Height();
|
|
def.dimensions.w = tensor.Width();
|
|
def.dimensions.c = tensor.Channels();
|
|
def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
|
|
def.object_def.data_type = tensor.GetDataType();
|
|
def.object_def.object_type = ToObjectType(tensor.GetStorageType());
|
|
def.object_def.user_provided = false;
|
|
return def;
|
|
}
|
|
|
|
CalculationsPrecision GetPrecision(const Environment& env,
|
|
const InferenceOptions& options) {
|
|
CalculationsPrecision precision;
|
|
switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
|
|
case 1:
|
|
precision = CalculationsPrecision::F32;
|
|
break;
|
|
case 2:
|
|
precision = CalculationsPrecision::F32_F16;
|
|
break;
|
|
case 3:
|
|
precision = CalculationsPrecision::F16;
|
|
break;
|
|
default:
|
|
precision = CalculationsPrecision::F16;
|
|
break;
|
|
}
|
|
// Increase precision if lower precision is not supported.
|
|
if (!env.IsSupported(precision)) {
|
|
precision = CalculationsPrecision::F32_F16;
|
|
if (!env.IsSupported(precision)) {
|
|
precision = CalculationsPrecision::F32;
|
|
}
|
|
}
|
|
return precision;
|
|
}
|
|
|
|
TensorStorageType GetStorageTypeFromOptions(const Environment& env,
|
|
const InferenceOptions& options) {
|
|
// Fallback to BUFFER that should be supported by default.
|
|
std::vector<TensorStorageType> preferred_storage_types;
|
|
if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
|
|
InferencePriority::MIN_MEMORY_USAGE) ==
|
|
PriorityImportance::HIGHER) {
|
|
preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
|
|
TensorStorageType::BUFFER};
|
|
} else {
|
|
preferred_storage_types = {
|
|
GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
|
|
TensorStorageType::BUFFER};
|
|
}
|
|
|
|
for (TensorStorageType storage_type : preferred_storage_types) {
|
|
if (env.IsSupported(storage_type)) {
|
|
return storage_type;
|
|
}
|
|
}
|
|
return TensorStorageType::UNKNOWN;
|
|
}
|
|
|
|
class InferenceBuilderImpl : public InferenceBuilder {
|
|
public:
|
|
explicit InferenceBuilderImpl(Environment* environment)
|
|
: environment_(environment) {}
|
|
|
|
absl::Status Initialize(const InferenceOptions& options,
|
|
const InferenceEnvironmentOptions& env_options,
|
|
const GraphFloat32& graph) {
|
|
context_ = absl::make_unique<InferenceContext>();
|
|
InferenceContext::CreateInferenceInfo create_info;
|
|
create_info.precision = GetPrecision(*environment_, options);
|
|
create_info.storage_type =
|
|
GetStorageTypeFromOptions(*environment_, options);
|
|
if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
|
|
create_info.hints.Add(ModelHints::kReduceKernelsCount);
|
|
create_info.hints.Add(ModelHints::kFastTuning);
|
|
} else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
|
|
create_info.hints.Add(ModelHints::kAllowSpecialKernels);
|
|
}
|
|
RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
|
|
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (env_options.IsGlAware() &&
|
|
IsGlSharingSupported(environment_->device())) {
|
|
gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
|
|
env_options.egl_display, environment_);
|
|
}
|
|
tie_factory_ = absl::make_unique<TensorTieFactory>(
|
|
environment_, context_.get(), gl_interop_fabric_.get());
|
|
#else
|
|
tie_factory_ =
|
|
absl::make_unique<TensorTieFactory>(environment_, context_.get());
|
|
#endif
|
|
|
|
inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
|
|
outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
|
|
const std::vector<uint8_t>& serialized_model) {
|
|
context_ = absl::make_unique<InferenceContext>();
|
|
RETURN_IF_ERROR(
|
|
context_->RestoreDeserialized(serialized_model, environment_));
|
|
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (env_options.IsGlAware() &&
|
|
IsGlSharingSupported(environment_->device())) {
|
|
gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
|
|
env_options.egl_display, environment_);
|
|
}
|
|
tie_factory_ = absl::make_unique<TensorTieFactory>(
|
|
environment_, context_.get(), gl_interop_fabric_.get());
|
|
#else
|
|
tie_factory_ =
|
|
absl::make_unique<TensorTieFactory>(environment_, context_.get());
|
|
#endif
|
|
|
|
inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
|
|
outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
std::vector<TensorObjectDef> inputs() const override {
|
|
return GetExternalDefinitions(inputs_);
|
|
}
|
|
|
|
std::vector<TensorObjectDef> outputs() const override {
|
|
return GetExternalDefinitions(outputs_);
|
|
}
|
|
|
|
absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
|
|
if (index < 0 || index >= inputs_.size()) {
|
|
return absl::OutOfRangeError("Index is out of range");
|
|
}
|
|
return absl::UnimplementedError("Changing input shapes is not supported");
|
|
}
|
|
|
|
absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
|
|
if (index < 0 || index >= inputs_.size()) {
|
|
return absl::OutOfRangeError("Input index is out of range");
|
|
}
|
|
auto def = inputs_[index];
|
|
def.external_def.object_def = new_def;
|
|
if (!tie_factory_->IsSupported(def)) {
|
|
return absl::InvalidArgumentError(
|
|
"New input object definition is not supported.");
|
|
}
|
|
inputs_[index] = def;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
|
|
if (index < 0 || index >= outputs_.size()) {
|
|
return absl::OutOfRangeError("Output index is out of range");
|
|
}
|
|
auto def = outputs_[index];
|
|
def.external_def.object_def = new_def;
|
|
if (!tie_factory_->IsSupported(def)) {
|
|
return absl::InvalidArgumentError(
|
|
"New output object definition is not supported.");
|
|
}
|
|
outputs_[index] = def;
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (gl_interop_fabric_ && !HasGlObjects()) {
|
|
// destroy interop layer when there are no GL objects to avoid
|
|
// extra synchronization cost.
|
|
gl_interop_fabric_.reset(nullptr);
|
|
}
|
|
auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
|
|
environment_, std::move(context_), std::move(gl_interop_fabric_));
|
|
#else
|
|
auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
|
|
environment_, std::move(context_));
|
|
#endif
|
|
RETURN_IF_ERROR(
|
|
runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
|
|
*runner = std::move(runner_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
private:
|
|
// Links internal tensors with external user-facing objects.
|
|
std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
|
|
AccessType access) {
|
|
std::vector<TensorTieDef> links;
|
|
links.reserve(ids.size());
|
|
for (const auto& id : ids) {
|
|
TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
|
|
links.push_back({id, access, def, def});
|
|
}
|
|
return links;
|
|
}
|
|
|
|
bool HasGlObjects() const {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
auto is_gl = [](ObjectType t) {
|
|
return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
|
|
};
|
|
for (const TensorTieDef& def : inputs_) {
|
|
if (is_gl(def.external_def.object_def.object_type)) {
|
|
return true;
|
|
}
|
|
}
|
|
for (const TensorTieDef& def : outputs_) {
|
|
if (is_gl(def.external_def.object_def.object_type)) {
|
|
return true;
|
|
}
|
|
}
|
|
#endif
|
|
return false;
|
|
}
|
|
|
|
static std::vector<TensorObjectDef> GetExternalDefinitions(
|
|
const std::vector<TensorTieDef>& links) {
|
|
std::vector<TensorObjectDef> defs;
|
|
defs.reserve(links.size());
|
|
for (auto& desc : links) {
|
|
defs.push_back(desc.external_def);
|
|
}
|
|
return defs;
|
|
}
|
|
|
|
std::unique_ptr<InferenceContext> context_;
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
|
|
#endif
|
|
Environment* environment_;
|
|
|
|
std::vector<TensorTieDef> inputs_;
|
|
std::vector<TensorTieDef> outputs_;
|
|
std::unique_ptr<TensorTieFactory> tie_factory_;
|
|
};
|
|
|
|
class InferenceEnvironmentImpl : public InferenceEnvironment {
|
|
public:
|
|
explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
|
|
: options_(options) {}
|
|
|
|
absl::Status Init() {
|
|
RETURN_IF_ERROR(LoadOpenCL());
|
|
properties_.is_opencl_available = true;
|
|
|
|
CLDevice device;
|
|
if (options_.device) {
|
|
cl_platform_id platform;
|
|
RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
|
|
options_.device, CL_DEVICE_PLATFORM, &platform));
|
|
device = CLDevice(options_.device, platform);
|
|
} else {
|
|
RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
|
|
}
|
|
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
|
|
properties_.is_gl_to_cl_fast_sync_supported =
|
|
IsClEventFromEglSyncSupported(device);
|
|
properties_.is_cl_to_gl_fast_sync_supported =
|
|
IsEglSyncFromClEventSupported();
|
|
#endif
|
|
|
|
CLContext context;
|
|
if (options_.context) {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (options_.IsGlAware()) {
|
|
return absl::InvalidArgumentError(
|
|
"OpenCL context and EGL parameters are set in the same time.");
|
|
}
|
|
#endif
|
|
context = CLContext(options_.context, /* has_ownership = */ false);
|
|
} else {
|
|
#ifdef CL_DELEGATE_ALLOW_GL
|
|
if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
|
|
RETURN_IF_ERROR(CreateCLGLContext(
|
|
device,
|
|
reinterpret_cast<cl_context_properties>(options_.egl_context),
|
|
reinterpret_cast<cl_context_properties>(options_.egl_display),
|
|
&context));
|
|
} else {
|
|
RETURN_IF_ERROR(CreateCLContext(device, &context));
|
|
}
|
|
#else
|
|
RETURN_IF_ERROR(CreateCLContext(device, &context));
|
|
#endif
|
|
}
|
|
|
|
CLCommandQueue queue;
|
|
if (options_.command_queue) {
|
|
queue =
|
|
CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
|
|
} else {
|
|
RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
|
|
}
|
|
// Profiling queue is used for workgroup size tuning.
|
|
ProfilingCommandQueue profiling_queue;
|
|
RETURN_IF_ERROR(
|
|
CreateProfilingCommandQueue(device, context, &profiling_queue));
|
|
environment_ = Environment(std::move(device), std::move(context),
|
|
std::move(queue), std::move(profiling_queue));
|
|
return environment_.Init();
|
|
}
|
|
|
|
absl::Status BuildSerializedModel(
|
|
const InferenceOptions& options, GraphFloat32 model,
|
|
std::vector<uint8_t>* serialized_model) final {
|
|
if (!IsValid(options)) {
|
|
return absl::InvalidArgumentError("InferenceOptions are invalid.");
|
|
}
|
|
InferenceOptions resolved_options = options;
|
|
ResolveAutoPriority(&resolved_options);
|
|
if (environment_.program_cache() &&
|
|
!options_.serialized_binary_cache.empty()) {
|
|
// Ignore returned error. Cache is discarded.
|
|
environment_.program_cache()
|
|
->AddSerializedCache(environment_.context(), environment_.device(),
|
|
options_.serialized_binary_cache)
|
|
.IgnoreError();
|
|
}
|
|
|
|
RETURN_IF_ERROR(RunGraphTransforms(&model));
|
|
InferenceContext context;
|
|
InferenceContext::CreateInferenceInfo create_info;
|
|
create_info.precision = GetPrecision(environment_, options);
|
|
create_info.storage_type = GetStorageTypeFromOptions(environment_, options);
|
|
if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
|
|
create_info.hints.Add(ModelHints::kReduceKernelsCount);
|
|
create_info.hints.Add(ModelHints::kFastTuning);
|
|
} else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
|
|
create_info.hints.Add(ModelHints::kAllowSpecialKernels);
|
|
}
|
|
RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
|
|
serialized_model));
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status NewInferenceBuilder(
|
|
const InferenceOptions& options, GraphFloat32 model,
|
|
std::unique_ptr<InferenceBuilder>* builder) final {
|
|
if (!IsValid(options)) {
|
|
return absl::InvalidArgumentError("InferenceOptions are invalid.");
|
|
}
|
|
InferenceOptions resolved_options = options;
|
|
ResolveAutoPriority(&resolved_options);
|
|
if (environment_.program_cache() &&
|
|
!options_.serialized_binary_cache.empty()) {
|
|
// Ignore returned error. Cache is discarded.
|
|
environment_.program_cache()
|
|
->AddSerializedCache(environment_.context(), environment_.device(),
|
|
options_.serialized_binary_cache)
|
|
.IgnoreError();
|
|
}
|
|
|
|
RETURN_IF_ERROR(RunGraphTransforms(&model));
|
|
auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
|
|
RETURN_IF_ERROR(
|
|
builder_impl->Initialize(resolved_options, options_, model));
|
|
*builder = std::move(builder_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status NewInferenceBuilder(
|
|
const std::vector<uint8_t>& serialized_model,
|
|
std::unique_ptr<InferenceBuilder>* builder) final {
|
|
if (environment_.program_cache() &&
|
|
!options_.serialized_binary_cache.empty()) {
|
|
// Ignore returned error. Cache is discarded.
|
|
environment_.program_cache()
|
|
->AddSerializedCache(environment_.context(), environment_.device(),
|
|
options_.serialized_binary_cache)
|
|
.IgnoreError();
|
|
}
|
|
|
|
auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
|
|
RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model));
|
|
*builder = std::move(builder_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
std::vector<uint8_t> GetSerializedBinaryCache() const final {
|
|
std::vector<uint8_t> data;
|
|
// Is there was a problem, data would be empty.
|
|
environment_.program_cache()
|
|
->GetSerializedCache(environment_.device(), &data)
|
|
.IgnoreError();
|
|
return data;
|
|
}
|
|
|
|
const InferenceEnvironmentProperties& properties() const {
|
|
return properties_;
|
|
}
|
|
|
|
private:
|
|
const InferenceEnvironmentOptions options_;
|
|
Environment environment_;
|
|
InferenceEnvironmentProperties properties_;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
absl::Status NewInferenceEnvironment(
|
|
const InferenceEnvironmentOptions& options,
|
|
std::unique_ptr<InferenceEnvironment>* environment,
|
|
InferenceEnvironmentProperties* properties) {
|
|
auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
|
|
absl::Status status = env_impl->Init();
|
|
if (properties) {
|
|
*properties = env_impl->properties();
|
|
}
|
|
RETURN_IF_ERROR(status);
|
|
*environment = std::move(env_impl);
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
} // namespace cl
|
|
} // namespace gpu
|
|
} // namespace tflite
|