511 lines
20 KiB
C++
511 lines
20 KiB
C++
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
|
|
|
|
#include <EGL/egl.h>
|
|
#include <GLES3/gl31.h>
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include "absl/types/span.h"
|
|
#include "tensorflow/lite/builtin_ops.h"
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/convert.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/model_builder.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
|
#include "tensorflow/lite/delegates/gpu/common/transformations/model_transformations.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/api.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/command_queue.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/compiler.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/converters/bhwc_to_phwc4.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/converters/phwc4_to_bhwc.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/egl_environment.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/kernels/registry.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h"
|
|
#include "tensorflow/lite/delegates/gpu/gl/workgroups/best_effort_calculator.h"
|
|
#include "tensorflow/lite/minimal_logging.h"
|
|
|
|
#ifndef TFLITE_GPU_BINARY_RELEASE
|
|
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
|
#include "tensorflow/lite/delegates/gpu/gl/metadata_generated.h"
|
|
#include "tensorflow/lite/schema/schema_generated.h"
|
|
#endif // TFLITE_GPU_BINARY_RELEASE
|
|
|
|
namespace tflite {
|
|
namespace gpu {
|
|
namespace gl {
|
|
namespace {
|
|
|
|
// Forward declarations.
|
|
TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
|
|
TfLiteStatus DelegateCopyFromBufferHandle(
|
|
TfLiteContext* context, TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle, // ValueId
|
|
TfLiteTensor* tensor);
|
|
TfLiteStatus DelegateCopyToBufferHandle(
|
|
TfLiteContext* context, TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle, // ValueId
|
|
TfLiteTensor* tensor);
|
|
|
|
inline bool IsPHWC4(const BHWC& shape) {
|
|
return shape.c == 4 || (shape.h == 1 && shape.w == 1 && shape.c % 4 == 0);
|
|
}
|
|
|
|
class Delegate {
|
|
struct ValueRef {
|
|
BHWC shape;
|
|
int tensor_index;
|
|
};
|
|
|
|
public:
|
|
explicit Delegate(const TfLiteGpuDelegateOptions* options) {
|
|
if (options) {
|
|
options_ = *options;
|
|
} else {
|
|
// Default options.
|
|
options_.metadata = nullptr;
|
|
options_.compile_options.precision_loss_allowed = 0;
|
|
options_.compile_options.preferred_gl_object_type =
|
|
TFLITE_GL_OBJECT_TYPE_FASTEST;
|
|
options_.compile_options.dynamic_batch_enabled = 0;
|
|
}
|
|
}
|
|
|
|
absl::Status CopyFromBufferHandle(TfLiteBufferHandle handle,
|
|
TfLiteTensor* tensor) {
|
|
ValueRef ref;
|
|
RETURN_IF_ERROR(FindObject(handle, &ref));
|
|
auto buffer = phwc4_objects_.FindBuffer(handle);
|
|
return buffer->MappedRead<float>([&](absl::Span<const float> data) {
|
|
tensor->data_is_stale = false;
|
|
return ConvertFromPHWC4(
|
|
data, ref.shape,
|
|
absl::MakeSpan(tensor->data.f, tensor->bytes / sizeof(float)));
|
|
});
|
|
}
|
|
|
|
absl::Status CopyToBufferHandle(TfLiteBufferHandle handle,
|
|
TfLiteTensor* tensor) const {
|
|
ValueRef ref;
|
|
RETURN_IF_ERROR(FindObject(handle, &ref));
|
|
auto buffer = phwc4_objects_.FindBuffer(handle);
|
|
return buffer->MappedWrite<float>([&](absl::Span<float> data) {
|
|
return ConvertToPHWC4(
|
|
absl::MakeConstSpan(tensor->data.f, tensor->bytes / sizeof(float)),
|
|
ref.shape, data);
|
|
});
|
|
}
|
|
|
|
absl::Status BindBufferToTensor(GLuint ssbo, int tensor_index) {
|
|
int64_t bytes_size;
|
|
RETURN_IF_ERROR(GetSSBOSize(ssbo, &bytes_size));
|
|
return bhwc_objects_.RegisterBuffer(
|
|
tensor_index, GlBuffer(GL_SHADER_STORAGE_BUFFER, ssbo, bytes_size,
|
|
/* offset = */ 0,
|
|
/* has_ownership = */ false));
|
|
}
|
|
|
|
absl::Status Prepare(TfLiteContext* context,
|
|
const TfLiteDelegateParams* delegate_params) {
|
|
// Extract TFLite delegate execution plan from the context and convert it
|
|
// into GraphFloat32.
|
|
GraphFloat32 graph;
|
|
RETURN_IF_ERROR(BuildModel(context, delegate_params, &graph));
|
|
|
|
// Apply general transformations on the graph.
|
|
NullTransformationReporter reporter;
|
|
ModelTransformer transformer(&graph, &reporter);
|
|
if (!ApplyModelTransformations(&transformer)) {
|
|
return absl::InternalError("Graph transformations failed");
|
|
}
|
|
|
|
if (!env_) RETURN_IF_ERROR(EglEnvironment::NewEglEnvironment(&env_));
|
|
|
|
// TODO(impjdi): Remove code duplication.
|
|
auto values = graph.values();
|
|
auto find_value = [&](int tensor_index) -> Value* {
|
|
for (auto value : values) {
|
|
if (value->tensor.ref == tensor_index) return value;
|
|
}
|
|
return nullptr;
|
|
};
|
|
tensors_.reserve(values.back()->id + 1);
|
|
for (auto value : values) {
|
|
if (tensors_.size() <= value->id) {
|
|
tensors_.resize(value->id + 1);
|
|
}
|
|
tensors_[value->id] = {value->tensor.shape, 0};
|
|
}
|
|
|
|
std::unordered_set<int> tflite_graph_io; // NOLINT
|
|
|
|
// Prepare graph inputs.
|
|
//
|
|
// Note that graph.inputs() cannot be used directly, as the notion of
|
|
// graph input has a different meaning in public API and GPU-internal API.
|
|
{
|
|
inputs_.clear();
|
|
inputs_.reserve(delegate_params->input_tensors->size);
|
|
for (int i = 0; i < delegate_params->input_tensors->size; ++i) {
|
|
const int tensor_index = delegate_params->input_tensors->data[i];
|
|
auto* tensor = context->tensors + tensor_index;
|
|
if (tensor->allocation_type == TfLiteAllocationType::kTfLiteMmapRo) {
|
|
continue;
|
|
}
|
|
tflite_graph_io.insert(tensor_index);
|
|
const auto* input = find_value(tensor_index);
|
|
if (!input || tensor->type != TfLiteType::kTfLiteFloat32) {
|
|
return absl::NotFoundError("Input tensor is not found in the graph.");
|
|
}
|
|
|
|
inputs_.push_back(input->id);
|
|
tensor->buffer_handle = input->id;
|
|
tensor->delegate = &delegate_;
|
|
tensors_[input->id].tensor_index = tensor_index;
|
|
|
|
// Create phwc4 input buffer.
|
|
// Check whether there is externally provided object is already in
|
|
// PHWC4. If yes, we may skip conversion step.
|
|
// We need to keep same buffer in bhwc_objects_ to indicate there is
|
|
// externally provided buffer.
|
|
auto external_buffer = bhwc_objects_.FindBuffer(tensor_index);
|
|
GlBuffer buffer;
|
|
if (IsPHWC4(input->tensor.shape) && external_buffer) {
|
|
buffer = external_buffer->MakeRef();
|
|
} else {
|
|
RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer<float>(
|
|
GetElementsSizeForPHWC4(input->tensor.shape), &buffer));
|
|
}
|
|
RETURN_IF_ERROR(
|
|
phwc4_objects_.RegisterBuffer(input->id, std::move(buffer)));
|
|
}
|
|
}
|
|
|
|
// Prepare graph outputs.
|
|
//
|
|
// Note that graph.outputs() cannot be used directly, as the notion of
|
|
// graph output has a different meaning in public API and GPU-internal API.
|
|
{
|
|
outputs_.clear();
|
|
outputs_.reserve(delegate_params->output_tensors->size);
|
|
for (int i = 0; i < delegate_params->output_tensors->size; ++i) {
|
|
const int tensor_index = delegate_params->output_tensors->data[i];
|
|
auto* tensor = context->tensors + tensor_index;
|
|
tflite_graph_io.insert(tensor_index);
|
|
const auto* output = find_value(tensor_index);
|
|
if (!output || tensor->type != TfLiteType::kTfLiteFloat32) {
|
|
return absl::NotFoundError(
|
|
"Output tensor is not found in the graph.");
|
|
}
|
|
|
|
outputs_.push_back(output->id);
|
|
tensor->buffer_handle = output->id;
|
|
tensor->delegate = &delegate_;
|
|
tensors_[output->id].tensor_index = tensor_index;
|
|
|
|
// Create phwc4 output buffer.
|
|
// Check whether there is externally provided object is already in
|
|
// PHWC4. If yes, we may skip conversion step.
|
|
auto external_buffer = bhwc_objects_.FindBuffer(tensor_index);
|
|
GlBuffer buffer;
|
|
if (IsPHWC4(output->tensor.shape) && external_buffer) {
|
|
buffer = external_buffer->MakeRef();
|
|
} else {
|
|
RETURN_IF_ERROR(CreateReadWriteShaderStorageBuffer<float>(
|
|
GetElementsSizeForPHWC4(output->tensor.shape), &buffer));
|
|
}
|
|
RETURN_IF_ERROR(
|
|
phwc4_objects_.RegisterBuffer(output->id, std::move(buffer)));
|
|
}
|
|
}
|
|
|
|
// Create shaders to convert from/to phwc4.
|
|
RETURN_IF_ERROR(ConverterBhwcToPhwc4::Create(&bhwc_to_phwc4_));
|
|
RETURN_IF_ERROR(ConverterPhwc4ToBhwc::Create(&phwc4_to_bhwc_));
|
|
|
|
// Compile model.
|
|
CompilationOptions compile_options;
|
|
compile_options.allow_precision_loss =
|
|
static_cast<bool>(options_.compile_options.precision_loss_allowed);
|
|
compile_options.preferred_obj_type = static_cast<ObjectType>(
|
|
options_.compile_options.preferred_gl_object_type);
|
|
compile_options.ref_obj_type = static_cast<ObjectType>(
|
|
options_.compile_options.preferred_gl_object_type);
|
|
compile_options.dynamic_batch =
|
|
static_cast<bool>(options_.compile_options.dynamic_batch_enabled);
|
|
compile_options.inline_parameters =
|
|
static_cast<bool>(options_.compile_options.inline_parameters);
|
|
auto shaders = NewNodeShaderRegistry();
|
|
GpuInfo gpu_info;
|
|
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info));
|
|
command_queue_ = NewCommandQueue(gpu_info);
|
|
auto workgroups_calculator =
|
|
BestEffortWorkgroupsCalculator(options_.metadata, gpu_info);
|
|
std::unique_ptr<CompiledModel> compiled_model;
|
|
RETURN_IF_ERROR(Compile(compile_options, graph, tflite_graph_io, *shaders,
|
|
*workgroups_calculator, &compiled_model));
|
|
|
|
// Create inference context.
|
|
const RuntimeOptions runtime_options;
|
|
RETURN_IF_ERROR(compiled_model->NewRun(runtime_options, &phwc4_objects_,
|
|
command_queue_.get(),
|
|
&inference_context_));
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
absl::Status Invoke(TfLiteContext* context) {
|
|
const EGLContext egl_context_at_delegate_init = env_->context().context();
|
|
const EGLContext egl_context_at_delegate_invoke = eglGetCurrentContext();
|
|
if (egl_context_at_delegate_init != egl_context_at_delegate_invoke) {
|
|
return absl::FailedPreconditionError(
|
|
"Delegate should run on the same thread where it was initialized.");
|
|
}
|
|
|
|
// Push input data from a tensor to GPU.
|
|
for (ValueId id : inputs_) {
|
|
const ValueRef& ref = tensors_[id];
|
|
auto external_object = bhwc_objects_.FindBuffer(ref.tensor_index);
|
|
if (external_object) {
|
|
// Use input from GPU.
|
|
// Conversion is needed only when external object is not phwc4.
|
|
if (!IsPHWC4(tensors_[id].shape)) {
|
|
RETURN_IF_ERROR(bhwc_to_phwc4_.Convert(
|
|
ref.shape, *external_object, command_queue_.get(),
|
|
phwc4_objects_.FindBuffer(id)));
|
|
}
|
|
} else {
|
|
// Copy from CPU to GPU
|
|
TfLiteTensor& tensor = context->tensors[ref.tensor_index];
|
|
RETURN_IF_ERROR(CopyToBufferHandle(id, &tensor));
|
|
}
|
|
}
|
|
|
|
// Run inference.
|
|
RETURN_IF_ERROR(inference_context_->Reset());
|
|
RETURN_IF_ERROR(inference_context_->Execute());
|
|
|
|
// Push output data from GPU to a tensor.
|
|
bool finished_gpu_processing = false;
|
|
for (ValueId id : outputs_) {
|
|
const ValueRef& ref = tensors_[id];
|
|
auto external_object = bhwc_objects_.FindBuffer(ref.tensor_index);
|
|
if (external_object) {
|
|
// Convert data from PHWC4 to BHWC and leave it in GPU object.
|
|
// Conversion is needed only when external object is not phwc4.
|
|
if (!IsPHWC4(tensors_[id].shape)) {
|
|
RETURN_IF_ERROR(
|
|
phwc4_to_bhwc_.Convert(ref.shape, *phwc4_objects_.FindBuffer(id),
|
|
command_queue_.get(), external_object));
|
|
}
|
|
} else {
|
|
// Wait until all GPU command are completed. This call leads to a lower
|
|
// processing latency because a buffer reading below will not stall if
|
|
// data is not yet ready.
|
|
if (!finished_gpu_processing) {
|
|
RETURN_IF_ERROR(command_queue_->WaitForCompletion());
|
|
finished_gpu_processing = true;
|
|
}
|
|
// Copy from GPU to CPU.
|
|
TfLiteTensor& tensor = context->tensors[ref.tensor_index];
|
|
RETURN_IF_ERROR(CopyFromBufferHandle(id, &tensor));
|
|
}
|
|
}
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
TfLiteDelegate* tflite_delegate() { return &delegate_; }
|
|
|
|
private:
|
|
absl::Status FindObject(ValueId id, ValueRef* ref) const {
|
|
if (id >= tensors_.size()) {
|
|
return absl::InvalidArgumentError("Invalid buffer id");
|
|
}
|
|
*ref = tensors_[id];
|
|
return absl::OkStatus();
|
|
}
|
|
|
|
TfLiteDelegate delegate_ = {
|
|
reinterpret_cast<void*>(this), // .data_
|
|
DelegatePrepare, // .Prepare
|
|
DelegateCopyFromBufferHandle, // .CopyFromBufferHandle
|
|
DelegateCopyToBufferHandle, // .CopyToBufferHandle
|
|
nullptr, // .FreeBufferHandle
|
|
kTfLiteDelegateFlagsNone, // .flags
|
|
};
|
|
|
|
TfLiteGpuDelegateOptions options_;
|
|
|
|
std::unique_ptr<EglEnvironment> env_;
|
|
std::vector<ValueRef> tensors_; // indexed by ValueId
|
|
std::vector<ValueId> inputs_;
|
|
std::vector<ValueId> outputs_;
|
|
ObjectManager phwc4_objects_;
|
|
ObjectManager bhwc_objects_; // key is tensor_index
|
|
ConverterPhwc4ToBhwc phwc4_to_bhwc_;
|
|
ConverterBhwcToPhwc4 bhwc_to_phwc4_;
|
|
std::unique_ptr<CommandQueue> command_queue_;
|
|
std::unique_ptr<InferenceContext> inference_context_;
|
|
};
|
|
|
|
inline Delegate* GetGpuDelegate(TfLiteNode* node) {
|
|
return reinterpret_cast<Delegate*>(node->user_data);
|
|
}
|
|
|
|
inline Delegate* GetGpuDelegate(TfLiteDelegate* delegate) {
|
|
return reinterpret_cast<Delegate*>(delegate->data_);
|
|
}
|
|
|
|
TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
|
|
const TfLiteRegistration kRegistration = {
|
|
// .init
|
|
[](TfLiteContext* context, const char* buffer, size_t) -> void* {
|
|
const auto* params =
|
|
reinterpret_cast<const TfLiteDelegateParams*>(buffer);
|
|
auto* gpu_delegate = GetGpuDelegate(params->delegate);
|
|
// Everything below should happen in prepare function call, but TFLite
|
|
// for whatever reason forbids that.
|
|
const auto status = gpu_delegate->Prepare(context, params);
|
|
if (status.ok()) return gpu_delegate;
|
|
TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Prepare: %s",
|
|
std::string(status.message()).c_str());
|
|
return nullptr;
|
|
},
|
|
// .free
|
|
[](TfLiteContext*, void* buffer) -> void {},
|
|
// .prepare
|
|
[](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
|
|
return node->user_data ? kTfLiteOk : kTfLiteError;
|
|
},
|
|
// .invoke
|
|
[](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
|
|
const auto status = GetGpuDelegate(node)->Invoke(context);
|
|
if (status.ok()) return kTfLiteOk;
|
|
TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Invoke: %s",
|
|
std::string(status.message()).c_str());
|
|
return kTfLiteError;
|
|
},
|
|
nullptr, // .profiling_string
|
|
0, // .builtin_code
|
|
"TfLiteGpuDelegate", // .custom_name
|
|
1, // .version
|
|
};
|
|
TfLiteIntArray* ops_to_replace = GetOpsToReplace(context);
|
|
const auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
|
|
context, kRegistration, ops_to_replace, delegate);
|
|
TfLiteIntArrayFree(ops_to_replace);
|
|
return status;
|
|
}
|
|
|
|
TfLiteStatus DelegateCopyFromBufferHandle(TfLiteContext* context,
|
|
TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle,
|
|
TfLiteTensor* tensor) {
|
|
auto* gpu_delegate = GetGpuDelegate(delegate);
|
|
if (!gpu_delegate) return kTfLiteError;
|
|
const auto status = gpu_delegate->CopyFromBufferHandle(buffer_handle, tensor);
|
|
if (status.ok()) return kTfLiteOk;
|
|
TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate CopyFromBufferHandle: %s",
|
|
std::string(status.message()).c_str());
|
|
return kTfLiteError;
|
|
}
|
|
|
|
TfLiteStatus DelegateCopyToBufferHandle(TfLiteContext* context,
|
|
TfLiteDelegate* delegate,
|
|
TfLiteBufferHandle buffer_handle,
|
|
TfLiteTensor* tensor) {
|
|
auto* gpu_delegate = GetGpuDelegate(delegate);
|
|
if (!gpu_delegate) return kTfLiteError;
|
|
const auto status = gpu_delegate->CopyToBufferHandle(buffer_handle, tensor);
|
|
if (status.ok()) return kTfLiteOk;
|
|
TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate CopyToBufferHandle: %s",
|
|
std::string(status.message()).c_str());
|
|
return kTfLiteError;
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace gl
|
|
} // namespace gpu
|
|
} // namespace tflite
|
|
|
|
TfLiteGlCompileOptions TfLiteGlCompileOptionsDefault() {
|
|
TfLiteGlCompileOptions options;
|
|
options.precision_loss_allowed = 0;
|
|
options.preferred_gl_object_type = TFLITE_GL_OBJECT_TYPE_FASTEST;
|
|
options.dynamic_batch_enabled = 0;
|
|
options.inline_parameters = 0;
|
|
return options;
|
|
}
|
|
|
|
TfLiteGpuDelegateOptions TfLiteGpuDelegateOptionsDefault() {
|
|
TfLiteGpuDelegateOptions options;
|
|
options.metadata = nullptr;
|
|
options.compile_options = TfLiteGlCompileOptionsDefault();
|
|
return options;
|
|
}
|
|
|
|
TfLiteDelegate* TfLiteGpuDelegateCreate(
|
|
const TfLiteGpuDelegateOptions* options) {
|
|
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
|
"Created TensorFlow Lite delegate for GPU.");
|
|
auto* gpu_delegate = new tflite::gpu::gl::Delegate(options);
|
|
return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
|
|
}
|
|
|
|
void TfLiteGpuDelegateDelete(TfLiteDelegate* delegate) {
|
|
delete tflite::gpu::gl::GetGpuDelegate(delegate);
|
|
}
|
|
|
|
TfLiteStatus TfLiteGpuDelegateBindBufferToTensor(TfLiteDelegate* delegate,
|
|
GLuint buffer,
|
|
int tensor_index) {
|
|
auto* gpu_delegate = tflite::gpu::gl::GetGpuDelegate(delegate);
|
|
return gpu_delegate &&
|
|
gpu_delegate->BindBufferToTensor(buffer, tensor_index).ok()
|
|
? kTfLiteOk
|
|
: kTfLiteError;
|
|
}
|
|
|
|
#ifndef TFLITE_GPU_BINARY_RELEASE
|
|
const uint8_t* TfLiteGpuDelegateGetModelMetadata(const void* tflite_model) {
|
|
const auto* model = reinterpret_cast<const tflite::Model*>(tflite_model);
|
|
if (!model || !model->metadata_buffer() || !model->buffers()) return nullptr;
|
|
for (int32_t buffer_index : *model->metadata_buffer()) {
|
|
if (buffer_index < 0 && buffer_index >= model->buffers()->size()) continue;
|
|
const tflite::Buffer* buffer = model->buffers()->Get(buffer_index);
|
|
if (!buffer) continue;
|
|
const uint8_t* data = buffer->data()->data();
|
|
if (!flatbuffers::BufferHasIdentifier(
|
|
data, tflite::gpu::gl::data::FlowMetadataIdentifier())) {
|
|
continue;
|
|
}
|
|
flatbuffers::Verifier verifier(data, buffer->data()->size());
|
|
return tflite::gpu::gl::data::VerifyFlowMetadataBuffer(verifier) ? data
|
|
: nullptr;
|
|
}
|
|
return nullptr;
|
|
}
|
|
#endif // TFLITE_GPU_BINARY_RELEASE
|