Arguments splitted on Arguments and CLArguments.
PiperOrigin-RevId: 339089579 Change-Id: I9a4875b18195859c9590f79b975afc4f881d97fe
This commit is contained in:
parent
589d9fe124
commit
ae6c9dae43
@ -52,12 +52,8 @@ cc_library(
|
||||
srcs = ["arguments.cc"],
|
||||
hdrs = ["arguments.h"],
|
||||
deps = [
|
||||
":cl_device",
|
||||
":gpu_object",
|
||||
":opencl_wrapper",
|
||||
":serialization_cc_fbs",
|
||||
":tensor_type",
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
@ -67,27 +63,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "arguments_test",
|
||||
srcs = ["arguments_test.cc"],
|
||||
linkstatic = True,
|
||||
tags = tf_gpu_tests_tags() + [
|
||||
"linux",
|
||||
"local",
|
||||
],
|
||||
deps = [
|
||||
":arguments",
|
||||
":buffer",
|
||||
":device_info",
|
||||
":gpu_object",
|
||||
":tensor",
|
||||
":tensor_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "buffer",
|
||||
srcs = ["buffer.cc"],
|
||||
@ -132,6 +107,45 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cl_arguments",
|
||||
srcs = ["cl_arguments.cc"],
|
||||
hdrs = ["cl_arguments.h"],
|
||||
deps = [
|
||||
":arguments",
|
||||
":cl_context",
|
||||
":device_info",
|
||||
":gpu_object",
|
||||
":tensor",
|
||||
":tensor_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "cl_arguments_test",
|
||||
srcs = ["cl_arguments_test.cc"],
|
||||
linkstatic = True,
|
||||
tags = tf_gpu_tests_tags() + [
|
||||
"linux",
|
||||
"local",
|
||||
],
|
||||
deps = [
|
||||
":arguments",
|
||||
":buffer",
|
||||
":cl_arguments",
|
||||
":device_info",
|
||||
":gpu_object",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cl_command_queue",
|
||||
srcs = ["cl_command_queue.cc"],
|
||||
|
@ -16,12 +16,8 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_replace.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -42,75 +38,17 @@ std::string GetNextWord(const std::string& code, size_t first_position) {
|
||||
return code.substr(first_position, pos - first_position);
|
||||
}
|
||||
|
||||
size_t FindEnclosingBracket(const std::string& text, size_t first_pos,
|
||||
char bracket) {
|
||||
const std::map<char, char> brackets = {
|
||||
{'(', ')'},
|
||||
{'{', '}'},
|
||||
{'[', ']'},
|
||||
{'<', '>'},
|
||||
};
|
||||
char b_open = bracket;
|
||||
auto it = brackets.find(b_open);
|
||||
if (it == brackets.end()) {
|
||||
return -1;
|
||||
}
|
||||
char b_close = it->second;
|
||||
size_t pos = first_pos;
|
||||
int opened = 1;
|
||||
int closed = 0;
|
||||
while (opened != closed && pos < text.size()) {
|
||||
if (text[pos] == b_open) {
|
||||
opened++;
|
||||
} else if (text[pos] == b_close) {
|
||||
closed++;
|
||||
bool HasWord(const std::string& word, const std::string& text) {
|
||||
size_t pos = text.find(word);
|
||||
while (pos != std::string::npos) {
|
||||
char prev = pos == 0 ? '.' : text[pos - 1];
|
||||
char next = pos + word.size() < text.size() ? text[pos + word.size()] : '.';
|
||||
if (!IsWordSymbol(prev) & !IsWordSymbol(next)) {
|
||||
return true;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
if (opened == closed) {
|
||||
return pos;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status ParseArgsInsideBrackets(const std::string& text,
|
||||
size_t open_bracket_pos,
|
||||
size_t* close_bracket_pos,
|
||||
std::vector<std::string>* args) {
|
||||
*close_bracket_pos =
|
||||
FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
|
||||
if (*close_bracket_pos == -1) {
|
||||
return absl::NotFoundError("Not found enclosing bracket");
|
||||
}
|
||||
std::string str_args = text.substr(open_bracket_pos + 1,
|
||||
*close_bracket_pos - open_bracket_pos - 2);
|
||||
std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
|
||||
args->reserve(words.size());
|
||||
for (const auto& word : words) {
|
||||
absl::string_view arg = absl::StripAsciiWhitespace(word);
|
||||
if (!arg.empty()) {
|
||||
args->push_back(std::string(arg));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
|
||||
std::string* str) {
|
||||
size_t position = str->find(old_word);
|
||||
while (position != std::string::npos) {
|
||||
char prev = position == 0 ? '.' : (*str)[position - 1];
|
||||
char next = position + old_word.size() < str->size()
|
||||
? (*str)[position + old_word.size()]
|
||||
: '.';
|
||||
if (IsWordSymbol(prev) || IsWordSymbol(next)) {
|
||||
position = str->find(old_word, position + 1);
|
||||
continue;
|
||||
}
|
||||
str->replace(position, old_word.size(), new_word);
|
||||
position = str->find(old_word, position + new_word.size());
|
||||
pos = text.find(word, pos + 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string RenameArg(const std::vector<std::string>& object_names,
|
||||
@ -127,91 +65,8 @@ std::string RenameArg(const std::vector<std::string>& object_names,
|
||||
return arg_name + postfix;
|
||||
}
|
||||
|
||||
void AppendArgument(const std::string& arg, std::string* args) {
|
||||
if (!args->empty()) {
|
||||
absl::StrAppend(args, ",\n ");
|
||||
}
|
||||
absl::StrAppend(args, arg);
|
||||
}
|
||||
|
||||
std::string GetImageModifier(AccessType access) {
|
||||
switch (access) {
|
||||
case AccessType::READ:
|
||||
return "__read_only";
|
||||
case AccessType::WRITE:
|
||||
return "__write_only";
|
||||
case AccessType::READ_WRITE:
|
||||
return "__read_write";
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetDefaultSamplers(const DeviceInfo& device_info) {
|
||||
std::string result;
|
||||
result +=
|
||||
"__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
|
||||
if (device_info.IsAdreno3xx()) {
|
||||
// Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
|
||||
// we can observe huge register overhead when compared to other modes.
|
||||
|
||||
// While using CLK_ADDRESS_NONE with out-of-range image coordinates is
|
||||
// undefined in the OpenCL specification, we have observed that
|
||||
// CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
|
||||
// coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
|
||||
// CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
|
||||
// 3xx.
|
||||
result +=
|
||||
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
|
||||
} else {
|
||||
result +=
|
||||
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Static
|
||||
constexpr char Arguments::kArgsPrefix[];
|
||||
|
||||
Arguments::Arguments(Arguments&& args)
|
||||
: int_values_(std::move(args.int_values_)),
|
||||
shared_int4s_data_(std::move(args.shared_int4s_data_)),
|
||||
float_values_(std::move(args.float_values_)),
|
||||
shared_float4s_data_(std::move(args.shared_float4s_data_)),
|
||||
half_values_(std::move(args.half_values_)),
|
||||
shared_half4s_data_(std::move(args.shared_half4s_data_)),
|
||||
buffers_(std::move(args.buffers_)),
|
||||
images2d_(std::move(args.images2d_)),
|
||||
image2d_arrays_(std::move(args.image2d_arrays_)),
|
||||
images3d_(std::move(args.images3d_)),
|
||||
image_buffers_(std::move(args.image_buffers_)),
|
||||
custom_memories_(std::move(args.custom_memories_)),
|
||||
object_refs_(std::move(args.object_refs_)),
|
||||
objects_(std::move(args.objects_)) {}
|
||||
Arguments& Arguments::operator=(Arguments&& args) {
|
||||
if (this != &args) {
|
||||
int_values_ = std::move(args.int_values_);
|
||||
shared_int4s_data_ = std::move(args.shared_int4s_data_);
|
||||
float_values_ = std::move(args.float_values_);
|
||||
shared_float4s_data_ = std::move(args.shared_float4s_data_);
|
||||
half_values_ = std::move(args.half_values_);
|
||||
shared_half4s_data_ = std::move(args.shared_half4s_data_);
|
||||
buffers_ = std::move(args.buffers_);
|
||||
images2d_ = std::move(args.images2d_);
|
||||
image2d_arrays_ = std::move(args.image2d_arrays_);
|
||||
images3d_ = std::move(args.images3d_);
|
||||
image_buffers_ = std::move(args.image_buffers_);
|
||||
custom_memories_ = std::move(args.custom_memories_);
|
||||
object_refs_ = std::move(args.object_refs_);
|
||||
objects_ = std::move(args.objects_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Arguments::AddFloat(const std::string& name, float value) {
|
||||
float_values_[name].value = value;
|
||||
}
|
||||
@ -221,34 +76,6 @@ void Arguments::AddHalf(const std::string& name, half value) {
|
||||
void Arguments::AddInt(const std::string& name, int value) {
|
||||
int_values_[name].value = value;
|
||||
}
|
||||
void Arguments::AddBuffer(const std::string& name,
|
||||
const GPUBufferDescriptor& desc) {
|
||||
buffers_[name] = desc;
|
||||
}
|
||||
void Arguments::AddImage2D(const std::string& name,
|
||||
const GPUImage2DDescriptor& desc) {
|
||||
images2d_[name] = desc;
|
||||
}
|
||||
|
||||
void Arguments::AddImage2DArray(const std::string& name,
|
||||
const GPUImage2DArrayDescriptor& desc) {
|
||||
image2d_arrays_[name] = desc;
|
||||
}
|
||||
|
||||
void Arguments::AddImage3D(const std::string& name,
|
||||
const GPUImage3DDescriptor& desc) {
|
||||
images3d_[name] = desc;
|
||||
}
|
||||
|
||||
void Arguments::AddImageBuffer(const std::string& name,
|
||||
const GPUImageBufferDescriptor& desc) {
|
||||
image_buffers_[name] = desc;
|
||||
}
|
||||
|
||||
void Arguments::AddCustomMemory(const std::string& name,
|
||||
const GPUCustomMemoryDescriptor& desc) {
|
||||
custom_memories_[name] = desc;
|
||||
}
|
||||
|
||||
void Arguments::AddObjectRef(const std::string& name, AccessType access_type,
|
||||
GPUObjectDescriptorPtr&& descriptor_ptr) {
|
||||
@ -259,188 +86,12 @@ void Arguments::AddObjectRef(const std::string& name, AccessType access_type,
|
||||
void Arguments::AddObject(const std::string& name,
|
||||
GPUObjectDescriptorPtr&& descriptor_ptr) {
|
||||
descriptor_ptr->SetAccess(AccessType::READ);
|
||||
objects_[name] = {nullptr, std::move(descriptor_ptr)};
|
||||
}
|
||||
|
||||
void Arguments::AddGPUResources(const std::string& name,
|
||||
const GPUResources& resources) {
|
||||
for (const auto& r : resources.ints) {
|
||||
AddInt(absl::StrCat(name, "_", r));
|
||||
}
|
||||
for (const auto& r : resources.floats) {
|
||||
AddFloat(absl::StrCat(name, "_", r));
|
||||
}
|
||||
for (const auto& r : resources.buffers) {
|
||||
AddBuffer(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.images2d) {
|
||||
AddImage2D(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.image2d_arrays) {
|
||||
AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.images3d) {
|
||||
AddImage3D(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.image_buffers) {
|
||||
AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.custom_memories) {
|
||||
AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetInt(const std::string& name, int value) {
|
||||
auto it = int_values_.find(name);
|
||||
if (it == int_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No int argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
shared_int4s_data_[it->second.offset] = value;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetFloat(const std::string& name, float value) {
|
||||
auto it = float_values_.find(name);
|
||||
if (it == float_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No float argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
shared_float4s_data_[it->second.offset] = value;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetHalf(const std::string& name, half value) {
|
||||
auto it = half_values_.find(name);
|
||||
if (it == half_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No half argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
if (it->second.store_as_f32) {
|
||||
shared_float4s_data_[it->second.offset] = value;
|
||||
} else {
|
||||
shared_half4s_data_[it->second.offset] = value;
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetImage2D(const std::string& name, cl_mem memory) {
|
||||
auto it = images2d_.find(name);
|
||||
if (it == images2d_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image2D argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetBuffer(const std::string& name, cl_mem memory) {
|
||||
auto it = buffers_.find(name);
|
||||
if (it == buffers_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No buffer argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetImage2DArray(const std::string& name,
|
||||
cl_mem memory) {
|
||||
auto it = image2d_arrays_.find(name);
|
||||
if (it == image2d_arrays_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image2D array argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetImage3D(const std::string& name, cl_mem memory) {
|
||||
auto it = images3d_.find(name);
|
||||
if (it == images3d_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image3D argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetImageBuffer(const std::string& name, cl_mem memory) {
|
||||
auto it = image_buffers_.find(name);
|
||||
if (it == image_buffers_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image buffer argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetCustomMemory(const std::string& name,
|
||||
cl_mem memory) {
|
||||
auto it = custom_memories_.find(name);
|
||||
if (it == custom_memories_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No custom memory argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetObjectRef(const std::string& name,
|
||||
const GPUObject* object) {
|
||||
auto it = object_refs_.find(name);
|
||||
if (it == object_refs_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No object ref with name - ", name));
|
||||
}
|
||||
GPUResourcesWithValue resources;
|
||||
RETURN_IF_ERROR(
|
||||
object->GetGPUResources(it->second.descriptor.get(), &resources));
|
||||
return SetGPUResources(name, resources);
|
||||
}
|
||||
|
||||
absl::Status Arguments::SetGPUResources(
|
||||
const std::string& name, const GPUResourcesWithValue& resources) {
|
||||
for (const auto& r : resources.ints) {
|
||||
RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.floats) {
|
||||
RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.buffers) {
|
||||
RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.images2d) {
|
||||
RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.image2d_arrays) {
|
||||
RETURN_IF_ERROR(
|
||||
SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.images3d) {
|
||||
RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.image_buffers) {
|
||||
RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.custom_memories) {
|
||||
RETURN_IF_ERROR(
|
||||
SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
objects_[name] = {std::move(descriptor_ptr)};
|
||||
}
|
||||
|
||||
void Arguments::RenameArgs(const std::string& postfix,
|
||||
std::string* code) const {
|
||||
static constexpr char kArgsPrefix[] = "args.";
|
||||
size_t next_position = code->find(kArgsPrefix);
|
||||
while (next_position != std::string::npos) {
|
||||
size_t arg_pos = next_position + strlen(kArgsPrefix);
|
||||
@ -460,7 +111,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
|
||||
return absl::InvalidArgumentError(
|
||||
absl::StrCat("Object reference name collision. Name - ", name));
|
||||
}
|
||||
object_refs_[name] = {std::move(v.second.descriptor)};
|
||||
object_refs_[name] = {std::move(v.second)};
|
||||
}
|
||||
for (auto& v : args.objects_) {
|
||||
object_names.push_back(v.first);
|
||||
@ -469,8 +120,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
|
||||
return absl::InvalidArgumentError(
|
||||
absl::StrCat("Object name collision. Name - ", name));
|
||||
}
|
||||
objects_[name] = {std::move(v.second.obj_ptr),
|
||||
std::move(v.second.descriptor)};
|
||||
objects_[name] = {std::move(v.second)};
|
||||
}
|
||||
for (const auto& v : args.int_values_) {
|
||||
AddInt(RenameArg(object_names, postfix, v.first), v.second.value);
|
||||
@ -481,412 +131,26 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
|
||||
for (const auto& v : args.half_values_) {
|
||||
AddHalf(RenameArg(object_names, postfix, v.first), v.second.value);
|
||||
}
|
||||
for (const auto& v : args.buffers_) {
|
||||
AddBuffer(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
for (const auto& v : args.images2d_) {
|
||||
AddImage2D(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
for (const auto& v : args.image2d_arrays_) {
|
||||
AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
for (const auto& v : args.images3d_) {
|
||||
AddImage3D(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
for (const auto& v : args.image_buffers_) {
|
||||
AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
for (const auto& v : args.custom_memories_) {
|
||||
AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::TransformToCLCode(
|
||||
const DeviceInfo& device_info,
|
||||
const std::map<std::string, std::string>& linkables, std::string* code) {
|
||||
RETURN_IF_ERROR(AddObjectArgs());
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code));
|
||||
ResolveArgsPass(device_info, code);
|
||||
*code = absl::Substitute(*code, GetListOfArgs());
|
||||
*code = GetDefaultSamplers(device_info) + *code;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
std::string Arguments::GetListOfArgs() {
|
||||
std::string result;
|
||||
for (auto& t : buffers_) {
|
||||
const std::string type_name =
|
||||
t.second.data_type == DataType::FLOAT32 ? "float" : "half";
|
||||
std::string attributes;
|
||||
for (const auto& attr : t.second.attributes) {
|
||||
attributes += absl::StrCat(" __attribute__((", attr, "))");
|
||||
}
|
||||
AppendArgument(
|
||||
absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
|
||||
ToCLDataType(t.second.data_type, t.second.element_size),
|
||||
"* ", t.first, attributes),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : image_buffers_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image1d_buffer_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : images2d_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image2d_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : image2d_arrays_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image2d_array_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : images3d_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image3d_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : custom_memories_) {
|
||||
AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
absl::Status Arguments::Bind(cl_kernel kernel, int offset) {
|
||||
for (auto& t : buffers_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : image_buffers_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : images2d_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : image2d_arrays_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : images3d_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : custom_memories_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
|
||||
&shared_int4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
|
||||
&shared_float4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
|
||||
&shared_half4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
std::string Arguments::AddActiveArgument(const std::string& arg_name,
|
||||
bool use_f32_for_halfs) {
|
||||
{
|
||||
auto it = int_values_.find(arg_name);
|
||||
if (it != int_values_.end()) {
|
||||
int int_index;
|
||||
if (it->second.active) {
|
||||
int_index = it->second.offset;
|
||||
} else {
|
||||
it->second.active = true;
|
||||
it->second.offset = shared_int4s_data_.size();
|
||||
int_index = it->second.offset;
|
||||
shared_int4s_data_.push_back(it->second.value);
|
||||
}
|
||||
std::string index = std::to_string(int_index / 4);
|
||||
std::string postfixes[4] = {"x", "y", "z", "w"};
|
||||
return "shared_int4_" + index + "." + postfixes[int_index % 4];
|
||||
}
|
||||
}
|
||||
{
|
||||
auto it = float_values_.find(arg_name);
|
||||
if (it != float_values_.end()) {
|
||||
int float_index;
|
||||
if (it->second.active) {
|
||||
float_index = it->second.offset;
|
||||
} else {
|
||||
it->second.active = true;
|
||||
it->second.offset = shared_float4s_data_.size();
|
||||
float_index = it->second.offset;
|
||||
shared_float4s_data_.push_back(it->second.value);
|
||||
}
|
||||
std::string index = std::to_string(float_index / 4);
|
||||
std::string postfixes[4] = {"x", "y", "z", "w"};
|
||||
return "shared_float4_" + index + "." + postfixes[float_index % 4];
|
||||
}
|
||||
}
|
||||
{
|
||||
auto it = half_values_.find(arg_name);
|
||||
if (it != half_values_.end()) {
|
||||
int half_index;
|
||||
if (it->second.active) {
|
||||
half_index = it->second.offset;
|
||||
} else {
|
||||
it->second.active = true;
|
||||
if (use_f32_for_halfs) {
|
||||
it->second.store_as_f32 = true;
|
||||
it->second.offset = shared_float4s_data_.size();
|
||||
shared_float4s_data_.push_back(it->second.value);
|
||||
} else {
|
||||
it->second.offset = shared_half4s_data_.size();
|
||||
shared_half4s_data_.push_back(it->second.value);
|
||||
}
|
||||
half_index = it->second.offset;
|
||||
}
|
||||
std::string index = std::to_string(half_index / 4);
|
||||
std::string postfixes[4] = {"x", "y", "z", "w"};
|
||||
if (it->second.store_as_f32) {
|
||||
return "(half)(shared_float4_" + index + "." +
|
||||
postfixes[half_index % 4] + ")";
|
||||
}
|
||||
return "shared_half4_" + index + "." + postfixes[half_index % 4];
|
||||
}
|
||||
}
|
||||
return arg_name;
|
||||
}
|
||||
|
||||
void Arguments::ResolveArgsPass(const DeviceInfo& device_info,
|
||||
std::string* code) {
|
||||
bool use_f32_for_half_arguments = device_info.IsPowerVR();
|
||||
size_t position = 0;
|
||||
size_t next_position = code->find(kArgsPrefix);
|
||||
while (next_position != std::string::npos) {
|
||||
size_t arg_pos = next_position;
|
||||
next_position += strlen(kArgsPrefix);
|
||||
std::string object_name = GetNextWord(*code, next_position);
|
||||
std::string new_name =
|
||||
AddActiveArgument(object_name, use_f32_for_half_arguments);
|
||||
code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
|
||||
position = arg_pos + new_name.size();
|
||||
next_position = code->find(kArgsPrefix, position);
|
||||
}
|
||||
|
||||
int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
|
||||
shared_int4s_data_.resize(shared_int4s_aligned_size);
|
||||
int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
|
||||
shared_float4s_data_.resize(shared_float4s_aligned_size);
|
||||
int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
|
||||
shared_half4s_data_.resize(shared_half4s_aligned_size);
|
||||
}
|
||||
|
||||
void Arguments::ResolveObjectNames(const std::string& object_name,
|
||||
const std::vector<std::string>& member_names,
|
||||
std::string* code) {
|
||||
for (const auto& member_name : member_names) {
|
||||
const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
|
||||
ReplaceAllWords(member_name, new_name, code);
|
||||
}
|
||||
}
|
||||
|
||||
GPUObjectDescriptor* Arguments::GetObjectDescriptor(
|
||||
const std::string& object_name) const {
|
||||
{
|
||||
auto it = object_refs_.find(object_name);
|
||||
if (it != object_refs_.end()) {
|
||||
return it->second.descriptor.get();
|
||||
}
|
||||
}
|
||||
{
|
||||
auto it = objects_.find(object_name);
|
||||
if (it != objects_.end()) {
|
||||
return it->second.descriptor.get();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
absl::Status Arguments::ResolveSelector(
|
||||
const std::map<std::string, std::string>& linkables,
|
||||
const std::string& object_name, const std::string& selector,
|
||||
const std::vector<std::string>& args,
|
||||
const std::vector<std::string>& template_args, std::string* result) {
|
||||
const GPUObjectDescriptor* desc_ptr = GetObjectDescriptor(object_name);
|
||||
if (!desc_ptr) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No object with name - ", object_name));
|
||||
}
|
||||
auto names = desc_ptr->GetGPUResources().GetNames();
|
||||
const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc_ptr);
|
||||
if (tensor_desc && selector == "Write") {
|
||||
auto it = linkables.find(object_name);
|
||||
if (it != linkables.end()) {
|
||||
if (desc_ptr->GetAccess() != AccessType::WRITE &&
|
||||
desc_ptr->GetAccess() != AccessType::READ_WRITE) {
|
||||
return absl::FailedPreconditionError(absl::StrCat(
|
||||
"Object with name - ", object_name, " should have Write access."));
|
||||
}
|
||||
std::string value_name, x_coord, y_coord, s_coord;
|
||||
RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(
|
||||
args, &value_name, &x_coord, &y_coord, &s_coord));
|
||||
// x_coord can have batch size property of link_object
|
||||
ResolveObjectNames(object_name, names, &x_coord);
|
||||
*result = it->second;
|
||||
ReplaceAllWords("in_out_value", value_name, result);
|
||||
ReplaceAllWords("X_COORD", x_coord, result);
|
||||
ReplaceAllWords("Y_COORD", y_coord, result);
|
||||
ReplaceAllWords("S_COORD", s_coord, result);
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass({}, result));
|
||||
}
|
||||
}
|
||||
std::string patch;
|
||||
RETURN_IF_ERROR(
|
||||
desc_ptr->PerformSelector(selector, args, template_args, &patch));
|
||||
ResolveObjectNames(object_name, names, &patch);
|
||||
*result += patch;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::ResolveSelectorsPass(
|
||||
const std::map<std::string, std::string>& linkables, std::string* code) {
|
||||
std::string result;
|
||||
size_t position = 0;
|
||||
size_t next_position = code->find(kArgsPrefix);
|
||||
while (next_position != std::string::npos) {
|
||||
size_t arg_pos = next_position;
|
||||
next_position += strlen(kArgsPrefix);
|
||||
std::string object_name = GetNextWord(*code, next_position);
|
||||
char next = (*code)[next_position + object_name.size()];
|
||||
if (next == '.') {
|
||||
next_position += object_name.size() + 1;
|
||||
std::string selector_name = GetNextWord(*code, next_position);
|
||||
next_position += selector_name.size();
|
||||
next = (*code)[next_position];
|
||||
std::vector<std::string> template_args;
|
||||
if (next == '<') {
|
||||
size_t close_bracket_pos;
|
||||
RETURN_IF_ERROR(ParseArgsInsideBrackets(
|
||||
*code, next_position, &close_bracket_pos, &template_args));
|
||||
next_position = close_bracket_pos;
|
||||
next = (*code)[next_position];
|
||||
}
|
||||
if (next != '(') {
|
||||
return absl::NotFoundError(absl::StrCat(
|
||||
"Expected ( after ", object_name, ".", selector_name, " call"));
|
||||
}
|
||||
std::vector<std::string> args;
|
||||
size_t close_bracket_pos;
|
||||
RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position,
|
||||
&close_bracket_pos, &args));
|
||||
for (auto& arg : args) {
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg));
|
||||
}
|
||||
std::string patch;
|
||||
RETURN_IF_ERROR(ResolveSelector(linkables, object_name, selector_name,
|
||||
args, template_args, &patch));
|
||||
code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
|
||||
position = arg_pos + patch.size();
|
||||
} else {
|
||||
position = arg_pos + strlen(kArgsPrefix);
|
||||
}
|
||||
next_position = code->find(kArgsPrefix, position);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Arguments::AllocateObjects(CLContext* context) {
|
||||
for (auto& t : objects_) {
|
||||
RETURN_IF_ERROR(
|
||||
t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void Arguments::ReleaseCPURepresentation() {
|
||||
for (auto& t : objects_) {
|
||||
t.second.descriptor->Release();
|
||||
t.second->Release();
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status Arguments::AddObjectArgs() {
|
||||
for (auto& t : objects_) {
|
||||
AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
|
||||
GPUResourcesWithValue resources;
|
||||
RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(),
|
||||
&resources));
|
||||
RETURN_IF_ERROR(SetGPUResources(t.first, resources));
|
||||
void Arguments::GetActiveArguments(const std::string& args_prefix,
|
||||
const std::string& code) {
|
||||
for (auto& float_val : float_values_) {
|
||||
float_val.second.active = HasWord(args_prefix + float_val.first, code);
|
||||
}
|
||||
for (auto& t : object_refs_) {
|
||||
AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
|
||||
for (auto& int_val : int_values_) {
|
||||
int_val.second.active = HasWord(args_prefix + int_val.first, code);
|
||||
}
|
||||
for (auto& half_val : half_values_) {
|
||||
half_val.second.active = HasWord(args_prefix + half_val.first, code);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
|
@ -20,11 +20,8 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
@ -39,12 +36,22 @@ class ArgumentsBinder {
|
||||
virtual absl::Status SetInt(const std::string& name, int value) = 0;
|
||||
virtual absl::Status SetFloat(const std::string& name, float value) = 0;
|
||||
virtual absl::Status SetHalf(const std::string& name, half value) = 0;
|
||||
virtual absl::Status SetObjectRef(const std::string& name,
|
||||
const GPUObject* object) = 0;
|
||||
virtual ~ArgumentsBinder() = default;
|
||||
};
|
||||
|
||||
class Arguments : public ArgumentsBinder {
|
||||
class Arguments {
|
||||
public:
|
||||
Arguments() = default;
|
||||
~Arguments() = default;
|
||||
|
||||
// Move only
|
||||
Arguments(Arguments&& args) = default;
|
||||
Arguments& operator=(Arguments&& args) = default;
|
||||
Arguments(const Arguments&) = delete;
|
||||
Arguments& operator=(const Arguments&) = delete;
|
||||
|
||||
void AddFloat(const std::string& name, float value = 0.0f);
|
||||
void AddHalf(const std::string& name, half value = half(0.0f));
|
||||
void AddInt(const std::string& name, int value = 0);
|
||||
@ -53,82 +60,19 @@ class Arguments : public ArgumentsBinder {
|
||||
void AddObject(const std::string& name,
|
||||
GPUObjectDescriptorPtr&& descriptor_ptr);
|
||||
|
||||
absl::Status SetInt(const std::string& name, int value) override;
|
||||
absl::Status SetFloat(const std::string& name, float value) override;
|
||||
absl::Status SetHalf(const std::string& name, half value) override;
|
||||
absl::Status SetObjectRef(const std::string& name, const GPUObject* object);
|
||||
|
||||
absl::Status Bind(cl_kernel kernel, int offset = 0);
|
||||
|
||||
void RenameArgs(const std::string& postfix, std::string* code) const;
|
||||
absl::Status Merge(Arguments&& args, const std::string& postfix);
|
||||
|
||||
absl::Status AllocateObjects(CLContext* context);
|
||||
void ReleaseCPURepresentation();
|
||||
absl::Status TransformToCLCode(
|
||||
const DeviceInfo& device_info,
|
||||
const std::map<std::string, std::string>& linkables, std::string* code);
|
||||
|
||||
// Move only
|
||||
Arguments(Arguments&& args);
|
||||
Arguments& operator=(Arguments&& args);
|
||||
Arguments(const Arguments&) = delete;
|
||||
Arguments& operator=(const Arguments&) = delete;
|
||||
|
||||
~Arguments() override = default;
|
||||
|
||||
private:
|
||||
friend flatbuffers::Offset<data::Arguments> Encode(
|
||||
const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
Arguments* args);
|
||||
friend absl::Status Decode(const data::Arguments* fb_args, Arguments* args);
|
||||
|
||||
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
|
||||
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
|
||||
void AddImage2DArray(const std::string& name,
|
||||
const GPUImage2DArrayDescriptor& desc);
|
||||
void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc);
|
||||
void AddImageBuffer(const std::string& name,
|
||||
const GPUImageBufferDescriptor& desc);
|
||||
void AddCustomMemory(const std::string& name,
|
||||
const GPUCustomMemoryDescriptor& desc);
|
||||
|
||||
absl::Status SetImage2D(const std::string& name, cl_mem memory);
|
||||
absl::Status SetBuffer(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImage2DArray(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImage3D(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImageBuffer(const std::string& name, cl_mem memory);
|
||||
absl::Status SetCustomMemory(const std::string& name, cl_mem memory);
|
||||
|
||||
std::string GetListOfArgs();
|
||||
|
||||
std::string AddActiveArgument(const std::string& arg_name,
|
||||
bool use_f32_for_halfs);
|
||||
void AddGPUResources(const std::string& name, const GPUResources& resources);
|
||||
|
||||
absl::Status SetGPUResources(const std::string& name,
|
||||
const GPUResourcesWithValue& resources);
|
||||
|
||||
absl::Status AddObjectArgs();
|
||||
|
||||
void ResolveArgsPass(const DeviceInfo& device_info, std::string* code);
|
||||
absl::Status ResolveSelectorsPass(
|
||||
const std::map<std::string, std::string>& linkables, std::string* code);
|
||||
|
||||
absl::Status ResolveSelector(
|
||||
const std::map<std::string, std::string>& linkables,
|
||||
const std::string& object_name, const std::string& selector,
|
||||
const std::vector<std::string>& args,
|
||||
const std::vector<std::string>& template_args, std::string* result);
|
||||
|
||||
void ResolveObjectNames(const std::string& object_name,
|
||||
const std::vector<std::string>& member_names,
|
||||
std::string* code);
|
||||
|
||||
GPUObjectDescriptor* GetObjectDescriptor(
|
||||
const std::string& object_name) const;
|
||||
|
||||
static constexpr char kArgsPrefix[] = "args.";
|
||||
friend class CLArguments;
|
||||
void GetActiveArguments(const std::string& args_prefix,
|
||||
const std::string& code);
|
||||
|
||||
struct IntValue {
|
||||
int value;
|
||||
@ -136,12 +80,8 @@ class Arguments : public ArgumentsBinder {
|
||||
// many uniforms generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// offset to shared uniform storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, IntValue> int_values_;
|
||||
std::vector<int32_t> shared_int4s_data_;
|
||||
|
||||
struct FloatValue {
|
||||
float value;
|
||||
@ -149,12 +89,8 @@ class Arguments : public ArgumentsBinder {
|
||||
// many uniforms generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// offset to shared uniform storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, FloatValue> float_values_;
|
||||
std::vector<float> shared_float4s_data_;
|
||||
|
||||
struct HalfValue {
|
||||
half value;
|
||||
@ -162,33 +98,11 @@ class Arguments : public ArgumentsBinder {
|
||||
// many uniforms generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// some devices have issues with half parameters.
|
||||
bool store_as_f32 = false;
|
||||
|
||||
// offset to shared uniform storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, HalfValue> half_values_;
|
||||
std::vector<half> shared_half4s_data_;
|
||||
|
||||
std::map<std::string, GPUBufferDescriptor> buffers_;
|
||||
std::map<std::string, GPUImage2DDescriptor> images2d_;
|
||||
std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
|
||||
std::map<std::string, GPUImage3DDescriptor> images3d_;
|
||||
std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
|
||||
std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
|
||||
|
||||
struct ObjectRefArg {
|
||||
GPUObjectDescriptorPtr descriptor;
|
||||
};
|
||||
std::map<std::string, ObjectRefArg> object_refs_;
|
||||
|
||||
struct ObjectArg {
|
||||
GPUObjectPtr obj_ptr;
|
||||
GPUObjectDescriptorPtr descriptor;
|
||||
};
|
||||
std::map<std::string, ObjectArg> objects_;
|
||||
std::map<std::string, GPUObjectDescriptorPtr> object_refs_;
|
||||
std::map<std::string, GPUObjectDescriptorPtr> objects_;
|
||||
};
|
||||
|
||||
} // namespace cl
|
||||
|
788
tensorflow/lite/delegates/gpu/cl/cl_arguments.cc
Normal file
788
tensorflow/lite/delegates/gpu/cl/cl_arguments.cc
Normal file
@ -0,0 +1,788 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
bool IsWordSymbol(char symbol) {
|
||||
return absl::ascii_isalnum(symbol) || symbol == '_';
|
||||
}
|
||||
|
||||
void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
|
||||
std::string* str) {
|
||||
size_t position = str->find(old_word);
|
||||
while (position != std::string::npos) {
|
||||
char prev = position == 0 ? '.' : (*str)[position - 1];
|
||||
char next = position + old_word.size() < str->size()
|
||||
? (*str)[position + old_word.size()]
|
||||
: '.';
|
||||
if (IsWordSymbol(prev) || IsWordSymbol(next)) {
|
||||
position = str->find(old_word, position + 1);
|
||||
continue;
|
||||
}
|
||||
str->replace(position, old_word.size(), new_word);
|
||||
position = str->find(old_word, position + new_word.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetNextWord(const std::string& code, size_t first_position) {
|
||||
size_t pos = first_position;
|
||||
char t = code[pos];
|
||||
while (IsWordSymbol(t)) {
|
||||
pos++;
|
||||
t = code[pos];
|
||||
}
|
||||
return code.substr(first_position, pos - first_position);
|
||||
}
|
||||
|
||||
size_t FindEnclosingBracket(const std::string& text, size_t first_pos,
|
||||
char bracket) {
|
||||
const std::map<char, char> brackets = {
|
||||
{'(', ')'},
|
||||
{'{', '}'},
|
||||
{'[', ']'},
|
||||
{'<', '>'},
|
||||
};
|
||||
char b_open = bracket;
|
||||
auto it = brackets.find(b_open);
|
||||
if (it == brackets.end()) {
|
||||
return -1;
|
||||
}
|
||||
char b_close = it->second;
|
||||
size_t pos = first_pos;
|
||||
int opened = 1;
|
||||
int closed = 0;
|
||||
while (opened != closed && pos < text.size()) {
|
||||
if (text[pos] == b_open) {
|
||||
opened++;
|
||||
} else if (text[pos] == b_close) {
|
||||
closed++;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
if (opened == closed) {
|
||||
return pos;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status ParseArgsInsideBrackets(const std::string& text,
|
||||
size_t open_bracket_pos,
|
||||
size_t* close_bracket_pos,
|
||||
std::vector<std::string>* args) {
|
||||
*close_bracket_pos =
|
||||
FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
|
||||
if (*close_bracket_pos == -1) {
|
||||
return absl::NotFoundError("Not found enclosing bracket");
|
||||
}
|
||||
std::string str_args = text.substr(open_bracket_pos + 1,
|
||||
*close_bracket_pos - open_bracket_pos - 2);
|
||||
std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
|
||||
args->reserve(words.size());
|
||||
for (const auto& word : words) {
|
||||
absl::string_view arg = absl::StripAsciiWhitespace(word);
|
||||
if (!arg.empty()) {
|
||||
args->push_back(std::string(arg));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void AppendArgument(const std::string& arg, std::string* args) {
|
||||
if (!args->empty()) {
|
||||
absl::StrAppend(args, ",\n ");
|
||||
}
|
||||
absl::StrAppend(args, arg);
|
||||
}
|
||||
|
||||
std::string GetImageModifier(AccessType access) {
|
||||
switch (access) {
|
||||
case AccessType::READ:
|
||||
return "__read_only";
|
||||
case AccessType::WRITE:
|
||||
return "__write_only";
|
||||
case AccessType::READ_WRITE:
|
||||
return "__read_write";
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetDefaultSamplers(const DeviceInfo& device_info) {
|
||||
std::string result;
|
||||
result +=
|
||||
"__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
|
||||
if (device_info.IsAdreno3xx()) {
|
||||
// Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
|
||||
// we can observe huge register overhead when compared to other modes.
|
||||
|
||||
// While using CLK_ADDRESS_NONE with out-of-range image coordinates is
|
||||
// undefined in the OpenCL specification, we have observed that
|
||||
// CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
|
||||
// coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
|
||||
// CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
|
||||
// 3xx.
|
||||
result +=
|
||||
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
|
||||
} else {
|
||||
result +=
|
||||
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
|
||||
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Static
|
||||
constexpr char CLArguments::kArgsPrefix[];
|
||||
|
||||
absl::Status CLArguments::Init(
|
||||
const DeviceInfo& device_info,
|
||||
const std::map<std::string, std::string>& linkables, CLContext* context,
|
||||
Arguments* args, std::string* code) {
|
||||
RETURN_IF_ERROR(AllocateObjects(*args, context));
|
||||
RETURN_IF_ERROR(AddObjectArgs(args));
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass(*args, linkables, code));
|
||||
object_refs_ = std::move(args->object_refs_);
|
||||
args->GetActiveArguments(kArgsPrefix, *code);
|
||||
const bool use_f32_for_halfs = device_info.IsPowerVR();
|
||||
CopyArguments(*args, use_f32_for_halfs);
|
||||
RETURN_IF_ERROR(SetObjectsResources(*args));
|
||||
RenameArgumentsInCode(code);
|
||||
ResolveArgsPass(code);
|
||||
*code = absl::Substitute(*code, GetListOfArgs());
|
||||
*code = GetDefaultSamplers(device_info) + *code;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::Init(const DeviceInfo& device_info, Arguments* args,
|
||||
CLContext* context) {
|
||||
RETURN_IF_ERROR(AllocateObjects(*args, context));
|
||||
RETURN_IF_ERROR(AddObjectArgs(args));
|
||||
object_refs_ = std::move(args->object_refs_);
|
||||
const bool use_f32_for_halfs = device_info.IsPowerVR();
|
||||
CopyArguments(*args, use_f32_for_halfs);
|
||||
RETURN_IF_ERROR(SetObjectsResources(*args));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::AllocateObjects(const Arguments& args,
|
||||
CLContext* context) {
|
||||
objects_.resize(args.objects_.size());
|
||||
int i = 0;
|
||||
for (auto& t : args.objects_) {
|
||||
RETURN_IF_ERROR(t.second->CreateGPUObject(context, &objects_[i]));
|
||||
i++;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::AddObjectArgs(Arguments* args) {
|
||||
for (auto& t : args->objects_) {
|
||||
AddGPUResources(t.first, t.second->GetGPUResources(), args);
|
||||
}
|
||||
for (auto& t : args->object_refs_) {
|
||||
AddGPUResources(t.first, t.second->GetGPUResources(), args);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetObjectsResources(const Arguments& args) {
|
||||
int i = 0;
|
||||
for (const auto& t : args.objects_) {
|
||||
GPUResourcesWithValue resources;
|
||||
RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources));
|
||||
RETURN_IF_ERROR(SetGPUResources(t.first, resources));
|
||||
i++;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::ResolveSelectorsPass(
|
||||
const Arguments& args, const std::map<std::string, std::string>& linkables,
|
||||
std::string* code) {
|
||||
std::string result;
|
||||
size_t position = 0;
|
||||
size_t next_position = code->find(kArgsPrefix);
|
||||
while (next_position != std::string::npos) {
|
||||
size_t arg_pos = next_position;
|
||||
next_position += strlen(kArgsPrefix);
|
||||
std::string object_name = GetNextWord(*code, next_position);
|
||||
char next = (*code)[next_position + object_name.size()];
|
||||
if (next == '.') {
|
||||
next_position += object_name.size() + 1;
|
||||
std::string selector_name = GetNextWord(*code, next_position);
|
||||
next_position += selector_name.size();
|
||||
next = (*code)[next_position];
|
||||
std::vector<std::string> template_args;
|
||||
if (next == '<') {
|
||||
size_t close_bracket_pos;
|
||||
RETURN_IF_ERROR(ParseArgsInsideBrackets(
|
||||
*code, next_position, &close_bracket_pos, &template_args));
|
||||
next_position = close_bracket_pos;
|
||||
next = (*code)[next_position];
|
||||
}
|
||||
if (next != '(') {
|
||||
return absl::NotFoundError(absl::StrCat(
|
||||
"Expected ( after ", object_name, ".", selector_name, " call"));
|
||||
}
|
||||
std::vector<std::string> function_args;
|
||||
size_t close_bracket_pos;
|
||||
RETURN_IF_ERROR(ParseArgsInsideBrackets(
|
||||
*code, next_position, &close_bracket_pos, &function_args));
|
||||
for (auto& arg : function_args) {
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, &arg));
|
||||
}
|
||||
std::string patch;
|
||||
RETURN_IF_ERROR(ResolveSelector(args, linkables, object_name,
|
||||
selector_name, function_args,
|
||||
template_args, &patch));
|
||||
code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
|
||||
position = arg_pos + patch.size();
|
||||
} else {
|
||||
position = arg_pos + strlen(kArgsPrefix);
|
||||
}
|
||||
next_position = code->find(kArgsPrefix, position);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void CLArguments::ResolveObjectNames(
|
||||
const std::string& object_name,
|
||||
const std::vector<std::string>& member_names, std::string* code) {
|
||||
for (const auto& member_name : member_names) {
|
||||
const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
|
||||
ReplaceAllWords(member_name, new_name, code);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CLArguments::ResolveSelector(
|
||||
const Arguments& args, const std::map<std::string, std::string>& linkables,
|
||||
const std::string& object_name, const std::string& selector,
|
||||
const std::vector<std::string>& function_args,
|
||||
const std::vector<std::string>& template_args, std::string* result) {
|
||||
const GPUObjectDescriptor* desc_ptr;
|
||||
auto it_ref = args.object_refs_.find(object_name);
|
||||
auto it_obj = args.objects_.find(object_name);
|
||||
if (it_ref != args.object_refs_.end()) {
|
||||
desc_ptr = it_ref->second.get();
|
||||
} else if (it_obj != args.objects_.end()) {
|
||||
desc_ptr = it_obj->second.get();
|
||||
} else {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No object with name - ", object_name));
|
||||
}
|
||||
auto names = desc_ptr->GetGPUResources().GetNames();
|
||||
const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc_ptr);
|
||||
if (tensor_desc && selector == "Write") {
|
||||
auto it = linkables.find(object_name);
|
||||
if (it != linkables.end()) {
|
||||
if (desc_ptr->GetAccess() != AccessType::WRITE &&
|
||||
desc_ptr->GetAccess() != AccessType::READ_WRITE) {
|
||||
return absl::FailedPreconditionError(absl::StrCat(
|
||||
"Object with name - ", object_name, " should have Write access."));
|
||||
}
|
||||
std::string value_name, x_coord, y_coord, s_coord;
|
||||
RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(
|
||||
function_args, &value_name, &x_coord, &y_coord, &s_coord));
|
||||
// x_coord can have batch size property of link_object
|
||||
ResolveObjectNames(object_name, names, &x_coord);
|
||||
*result = it->second;
|
||||
ReplaceAllWords("in_out_value", value_name, result);
|
||||
ReplaceAllWords("X_COORD", x_coord, result);
|
||||
ReplaceAllWords("Y_COORD", y_coord, result);
|
||||
ReplaceAllWords("S_COORD", s_coord, result);
|
||||
RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, result));
|
||||
}
|
||||
}
|
||||
std::string patch;
|
||||
RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, function_args,
|
||||
template_args, &patch));
|
||||
ResolveObjectNames(object_name, names, &patch);
|
||||
*result += patch;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void CLArguments::ResolveArgsPass(std::string* code) {
|
||||
size_t position = 0;
|
||||
size_t next_position = code->find(kArgsPrefix);
|
||||
while (next_position != std::string::npos) {
|
||||
size_t arg_pos = next_position;
|
||||
next_position += strlen(kArgsPrefix);
|
||||
std::string object_name = GetNextWord(*code, next_position);
|
||||
std::string new_name = object_name;
|
||||
code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
|
||||
position = arg_pos + new_name.size();
|
||||
next_position = code->find(kArgsPrefix, position);
|
||||
}
|
||||
}
|
||||
|
||||
void CLArguments::CopyScalarValues(Arguments* args) const {
|
||||
for (const auto& fvalue : float_values_) {
|
||||
args->float_values_[fvalue.first].value = fvalue.second.value;
|
||||
}
|
||||
for (const auto& ivalue : int_values_) {
|
||||
args->int_values_[ivalue.first].value = ivalue.second.value;
|
||||
}
|
||||
for (const auto& hfvalue : half_values_) {
|
||||
args->half_values_[hfvalue.first].value = hfvalue.second.value;
|
||||
}
|
||||
}
|
||||
|
||||
void CLArguments::CopyArguments(const Arguments& args, bool use_f32_for_halfs) {
|
||||
for (const auto& fvalue : args.float_values_) {
|
||||
auto& new_val = float_values_[fvalue.first];
|
||||
new_val.value = fvalue.second.value;
|
||||
new_val.active = fvalue.second.active;
|
||||
if (fvalue.second.active) {
|
||||
new_val.offset = shared_float4s_data_.size();
|
||||
shared_float4s_data_.push_back(new_val.value);
|
||||
}
|
||||
}
|
||||
for (const auto& ivalue : args.int_values_) {
|
||||
auto& new_val = int_values_[ivalue.first];
|
||||
new_val.value = ivalue.second.value;
|
||||
new_val.active = ivalue.second.active;
|
||||
if (ivalue.second.active) {
|
||||
new_val.offset = shared_int4s_data_.size();
|
||||
shared_int4s_data_.push_back(new_val.value);
|
||||
}
|
||||
}
|
||||
for (const auto& hfvalue : args.half_values_) {
|
||||
auto& new_val = half_values_[hfvalue.first];
|
||||
new_val.value = hfvalue.second.value;
|
||||
new_val.active = hfvalue.second.active;
|
||||
if (hfvalue.second.active) {
|
||||
if (use_f32_for_halfs) {
|
||||
new_val.store_as_f32 = true;
|
||||
new_val.offset = shared_float4s_data_.size();
|
||||
shared_float4s_data_.push_back(new_val.value);
|
||||
} else {
|
||||
new_val.store_as_f32 = false;
|
||||
new_val.offset = shared_half4s_data_.size();
|
||||
shared_half4s_data_.push_back(new_val.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
|
||||
shared_int4s_data_.resize(shared_int4s_aligned_size);
|
||||
int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
|
||||
shared_float4s_data_.resize(shared_float4s_aligned_size);
|
||||
int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
|
||||
shared_half4s_data_.resize(shared_half4s_aligned_size);
|
||||
}
|
||||
|
||||
void CLArguments::RenameArgumentsInCode(std::string* code) {
|
||||
const std::string postfixes[4] = {"x", "y", "z", "w"};
|
||||
for (const auto& fvalue : float_values_) {
|
||||
if (fvalue.second.active) {
|
||||
std::string index = std::to_string(fvalue.second.offset / 4);
|
||||
std::string new_name =
|
||||
"shared_float4_" + index + "." + postfixes[fvalue.second.offset % 4];
|
||||
ReplaceAllWords(kArgsPrefix + fvalue.first, new_name, code);
|
||||
}
|
||||
}
|
||||
for (const auto& ivalue : int_values_) {
|
||||
if (ivalue.second.active) {
|
||||
std::string index = std::to_string(ivalue.second.offset / 4);
|
||||
std::string new_name =
|
||||
"shared_int4_" + index + "." + postfixes[ivalue.second.offset % 4];
|
||||
ReplaceAllWords(kArgsPrefix + ivalue.first, new_name, code);
|
||||
}
|
||||
}
|
||||
for (const auto& hfvalue : half_values_) {
|
||||
if (hfvalue.second.active) {
|
||||
std::string index = std::to_string(hfvalue.second.offset / 4);
|
||||
std::string new_name;
|
||||
if (hfvalue.second.store_as_f32) {
|
||||
new_name = "(half)(shared_float4_" + index + "." +
|
||||
postfixes[hfvalue.second.offset % 4] + ")";
|
||||
} else {
|
||||
new_name = "shared_half4_" + index + "." +
|
||||
postfixes[hfvalue.second.offset % 4];
|
||||
}
|
||||
ReplaceAllWords(kArgsPrefix + hfvalue.first, new_name, code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CLArguments::AddBuffer(const std::string& name,
|
||||
const GPUBufferDescriptor& desc) {
|
||||
buffers_[name] = desc;
|
||||
}
|
||||
void CLArguments::AddImage2D(const std::string& name,
|
||||
const GPUImage2DDescriptor& desc) {
|
||||
images2d_[name] = desc;
|
||||
}
|
||||
|
||||
void CLArguments::AddImage2DArray(const std::string& name,
|
||||
const GPUImage2DArrayDescriptor& desc) {
|
||||
image2d_arrays_[name] = desc;
|
||||
}
|
||||
|
||||
void CLArguments::AddImage3D(const std::string& name,
|
||||
const GPUImage3DDescriptor& desc) {
|
||||
images3d_[name] = desc;
|
||||
}
|
||||
|
||||
void CLArguments::AddImageBuffer(const std::string& name,
|
||||
const GPUImageBufferDescriptor& desc) {
|
||||
image_buffers_[name] = desc;
|
||||
}
|
||||
|
||||
void CLArguments::AddCustomMemory(const std::string& name,
|
||||
const GPUCustomMemoryDescriptor& desc) {
|
||||
custom_memories_[name] = desc;
|
||||
}
|
||||
|
||||
void CLArguments::AddGPUResources(const std::string& name,
|
||||
const GPUResources& resources,
|
||||
Arguments* args) {
|
||||
for (const auto& r : resources.ints) {
|
||||
args->AddInt(absl::StrCat(name, "_", r));
|
||||
}
|
||||
for (const auto& r : resources.floats) {
|
||||
args->AddFloat(absl::StrCat(name, "_", r));
|
||||
}
|
||||
for (const auto& r : resources.buffers) {
|
||||
AddBuffer(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.images2d) {
|
||||
AddImage2D(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.image2d_arrays) {
|
||||
AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.images3d) {
|
||||
AddImage3D(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.image_buffers) {
|
||||
AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
for (const auto& r : resources.custom_memories) {
|
||||
AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetInt(const std::string& name, int value) {
|
||||
auto it = int_values_.find(name);
|
||||
if (it == int_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No int argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
shared_int4s_data_[it->second.offset] = value;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
absl::Status CLArguments::SetFloat(const std::string& name, float value) {
|
||||
auto it = float_values_.find(name);
|
||||
if (it == float_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No float argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
shared_float4s_data_[it->second.offset] = value;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetHalf(const std::string& name, half value) {
|
||||
auto it = half_values_.find(name);
|
||||
if (it == half_values_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No half argument with name - ", name));
|
||||
}
|
||||
it->second.value = value;
|
||||
if (it->second.active) {
|
||||
if (it->second.store_as_f32) {
|
||||
shared_float4s_data_[it->second.offset] = value;
|
||||
} else {
|
||||
shared_half4s_data_[it->second.offset] = value;
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetImage2D(const std::string& name, cl_mem memory) {
|
||||
auto it = images2d_.find(name);
|
||||
if (it == images2d_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image2D argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetBuffer(const std::string& name, cl_mem memory) {
|
||||
auto it = buffers_.find(name);
|
||||
if (it == buffers_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No buffer argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetImage2DArray(const std::string& name,
|
||||
cl_mem memory) {
|
||||
auto it = image2d_arrays_.find(name);
|
||||
if (it == image2d_arrays_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image2D array argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetImage3D(const std::string& name, cl_mem memory) {
|
||||
auto it = images3d_.find(name);
|
||||
if (it == images3d_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image3D argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetImageBuffer(const std::string& name,
|
||||
cl_mem memory) {
|
||||
auto it = image_buffers_.find(name);
|
||||
if (it == image_buffers_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No image buffer argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetCustomMemory(const std::string& name,
|
||||
cl_mem memory) {
|
||||
auto it = custom_memories_.find(name);
|
||||
if (it == custom_memories_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No custom memory argument with name - ", name));
|
||||
}
|
||||
it->second.memory = memory;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetObjectRef(const std::string& name,
|
||||
const GPUObject* object) {
|
||||
auto it = object_refs_.find(name);
|
||||
if (it == object_refs_.end()) {
|
||||
return absl::NotFoundError(
|
||||
absl::StrCat("No object ref with name - ", name));
|
||||
}
|
||||
GPUResourcesWithValue resources;
|
||||
RETURN_IF_ERROR(object->GetGPUResources(it->second.get(), &resources));
|
||||
return SetGPUResources(name, resources);
|
||||
}
|
||||
|
||||
absl::Status CLArguments::SetGPUResources(
|
||||
const std::string& name, const GPUResourcesWithValue& resources) {
|
||||
for (const auto& r : resources.ints) {
|
||||
RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.floats) {
|
||||
RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.buffers) {
|
||||
RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.images2d) {
|
||||
RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.image2d_arrays) {
|
||||
RETURN_IF_ERROR(
|
||||
SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.images3d) {
|
||||
RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.image_buffers) {
|
||||
RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
for (const auto& r : resources.custom_memories) {
|
||||
RETURN_IF_ERROR(
|
||||
SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
std::string CLArguments::GetListOfArgs() {
|
||||
std::string result;
|
||||
for (auto& t : buffers_) {
|
||||
const std::string type_name =
|
||||
t.second.data_type == DataType::FLOAT32 ? "float" : "half";
|
||||
std::string attributes;
|
||||
for (const auto& attr : t.second.attributes) {
|
||||
attributes += absl::StrCat(" __attribute__((", attr, "))");
|
||||
}
|
||||
AppendArgument(
|
||||
absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
|
||||
ToCLDataType(t.second.data_type, t.second.element_size),
|
||||
"* ", t.first, attributes),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : image_buffers_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image1d_buffer_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : images2d_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image2d_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : image2d_arrays_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image2d_array_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : images3d_) {
|
||||
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
|
||||
" image3d_t ", t.first),
|
||||
&result);
|
||||
}
|
||||
for (auto& t : custom_memories_) {
|
||||
AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
|
||||
}
|
||||
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
|
||||
AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
absl::Status CLArguments::Bind(cl_kernel kernel, int offset) {
|
||||
for (auto& t : buffers_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : image_buffers_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : images2d_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : image2d_arrays_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : images3d_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (auto& t : custom_memories_) {
|
||||
const int error_code =
|
||||
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
|
||||
&shared_int4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
|
||||
&shared_float4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
|
||||
const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
|
||||
&shared_half4s_data_[i * 4]);
|
||||
if (error_code != CL_SUCCESS) {
|
||||
return absl::UnknownError(absl::StrCat(
|
||||
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
|
||||
"(at index - ", offset, ")"));
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
168
tensorflow/lite/delegates/gpu/cl/cl_arguments.h
Normal file
168
tensorflow/lite/delegates/gpu/cl/cl_arguments.h
Normal file
@ -0,0 +1,168 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
class CLArguments : public ArgumentsBinder {
|
||||
public:
|
||||
CLArguments() = default;
|
||||
|
||||
absl::Status Init(const DeviceInfo& device_info,
|
||||
const std::map<std::string, std::string>& linkables,
|
||||
CLContext* context, Arguments* args, std::string* code);
|
||||
absl::Status Init(const DeviceInfo& device_info, Arguments* args,
|
||||
CLContext* context);
|
||||
|
||||
// Temporary, will be resolved later
|
||||
void MoveObjectRefsIn(Arguments* args) {
|
||||
object_refs_ = std::move(args->object_refs_);
|
||||
}
|
||||
void MoveObjectRefsOut(Arguments* args) {
|
||||
args->object_refs_ = std::move(object_refs_);
|
||||
}
|
||||
void CopyScalarValues(Arguments* args) const;
|
||||
|
||||
// Move only
|
||||
CLArguments(CLArguments&& args) = default;
|
||||
CLArguments& operator=(CLArguments&& args) = default;
|
||||
CLArguments(const CLArguments&) = delete;
|
||||
CLArguments& operator=(const CLArguments&) = delete;
|
||||
|
||||
absl::Status SetInt(const std::string& name, int value) override;
|
||||
absl::Status SetFloat(const std::string& name, float value) override;
|
||||
absl::Status SetHalf(const std::string& name, half value) override;
|
||||
absl::Status SetObjectRef(const std::string& name,
|
||||
const GPUObject* object) override;
|
||||
|
||||
absl::Status Bind(cl_kernel kernel, int offset = 0);
|
||||
|
||||
private:
|
||||
absl::Status AllocateObjects(const Arguments& args, CLContext* context);
|
||||
absl::Status AddObjectArgs(Arguments* args);
|
||||
|
||||
absl::Status ResolveSelectorsPass(
|
||||
const Arguments& args,
|
||||
const std::map<std::string, std::string>& linkables, std::string* code);
|
||||
absl::Status ResolveSelector(
|
||||
const Arguments& args,
|
||||
const std::map<std::string, std::string>& linkables,
|
||||
const std::string& object_name, const std::string& selector,
|
||||
const std::vector<std::string>& function_args,
|
||||
const std::vector<std::string>& template_args, std::string* result);
|
||||
void ResolveObjectNames(const std::string& object_name,
|
||||
const std::vector<std::string>& member_names,
|
||||
std::string* code);
|
||||
void ResolveArgsPass(std::string* code);
|
||||
|
||||
void CopyArguments(const Arguments& args, bool use_f32_for_halfs);
|
||||
void RenameArgumentsInCode(std::string* code);
|
||||
std::string GetListOfArgs();
|
||||
|
||||
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
|
||||
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
|
||||
void AddImage2DArray(const std::string& name,
|
||||
const GPUImage2DArrayDescriptor& desc);
|
||||
void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc);
|
||||
void AddImageBuffer(const std::string& name,
|
||||
const GPUImageBufferDescriptor& desc);
|
||||
void AddCustomMemory(const std::string& name,
|
||||
const GPUCustomMemoryDescriptor& desc);
|
||||
void AddGPUResources(const std::string& name, const GPUResources& resources,
|
||||
Arguments* args);
|
||||
absl::Status SetObjectsResources(const Arguments& args);
|
||||
absl::Status SetGPUResources(const std::string& name,
|
||||
const GPUResourcesWithValue& resources);
|
||||
|
||||
absl::Status SetImage2D(const std::string& name, cl_mem memory);
|
||||
absl::Status SetBuffer(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImage2DArray(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImage3D(const std::string& name, cl_mem memory);
|
||||
absl::Status SetImageBuffer(const std::string& name, cl_mem memory);
|
||||
absl::Status SetCustomMemory(const std::string& name, cl_mem memory);
|
||||
|
||||
static constexpr char kArgsPrefix[] = "args.";
|
||||
struct IntValue {
|
||||
int value;
|
||||
|
||||
// many arguments generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// offset to shared storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, IntValue> int_values_;
|
||||
std::vector<int32_t> shared_int4s_data_;
|
||||
|
||||
struct FloatValue {
|
||||
float value;
|
||||
|
||||
// many arguments generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// offset to shared storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, FloatValue> float_values_;
|
||||
std::vector<float> shared_float4s_data_;
|
||||
|
||||
struct HalfValue {
|
||||
half value;
|
||||
|
||||
// many arguments generated automatically and not used
|
||||
// to reduce amount of data transferred we adding this optimization
|
||||
bool active = false;
|
||||
|
||||
// some devices have issues with half parameters.
|
||||
bool store_as_f32 = false;
|
||||
|
||||
// offset to shared uniform storage.
|
||||
uint32_t offset = -1;
|
||||
};
|
||||
std::map<std::string, HalfValue> half_values_;
|
||||
std::vector<half> shared_half4s_data_;
|
||||
|
||||
std::map<std::string, GPUBufferDescriptor> buffers_;
|
||||
std::map<std::string, GPUImage2DDescriptor> images2d_;
|
||||
std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
|
||||
std::map<std::string, GPUImage3DDescriptor> images3d_;
|
||||
std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
|
||||
std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
|
||||
|
||||
std::map<std::string, GPUObjectDescriptorPtr> object_refs_;
|
||||
std::vector<GPUObjectPtr> objects_;
|
||||
};
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
|
@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "absl/strings/match.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
@ -27,7 +28,7 @@ limitations under the License.
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
TEST(ArgumentsTest, TestSelectorResolve) {
|
||||
TEST(CLArgumentsTest, TestSelectorResolve) {
|
||||
BufferDescriptor desc;
|
||||
desc.element_type = DataType::FLOAT32;
|
||||
desc.element_size = 4;
|
||||
@ -43,14 +44,15 @@ __kernel void main_function($0) {
|
||||
}
|
||||
})";
|
||||
|
||||
CLArguments cl_args;
|
||||
DeviceInfo device_info;
|
||||
ASSERT_OK(args.TransformToCLCode(device_info, {}, &sample_code));
|
||||
ASSERT_OK(cl_args.Init(device_info, {}, nullptr, &args, &sample_code));
|
||||
EXPECT_TRUE(absl::StrContains(sample_code, "value = weights_buffer[id];"));
|
||||
EXPECT_TRUE(
|
||||
absl::StrContains(sample_code, "__global float4* weights_buffer"));
|
||||
}
|
||||
|
||||
TEST(ArgumentsTest, TestNoSelector) {
|
||||
TEST(CLArgumentsTest, TestNoSelector) {
|
||||
BufferDescriptor desc;
|
||||
desc.element_type = DataType::FLOAT32;
|
||||
desc.element_size = 4;
|
||||
@ -64,16 +66,10 @@ TEST(ArgumentsTest, TestNoSelector) {
|
||||
value = args.weights.UnknownSelector(id);
|
||||
}
|
||||
)";
|
||||
CLArguments cl_args;
|
||||
DeviceInfo device_info;
|
||||
EXPECT_FALSE(args.TransformToCLCode(device_info, {}, &sample_code).ok());
|
||||
}
|
||||
|
||||
TEST(ArgumentsTest, TestRenameArgs) {
|
||||
Arguments linkable_args;
|
||||
linkable_args.AddFloat("alpha", 0.5f);
|
||||
std::string linkable_code = "in_out_value += args.alpha;\n";
|
||||
linkable_args.RenameArgs("_link0", &linkable_code);
|
||||
EXPECT_EQ(linkable_code, "in_out_value += args.alpha_link0;\n");
|
||||
EXPECT_FALSE(
|
||||
cl_args.Init(device_info, {}, nullptr, &args, &sample_code).ok());
|
||||
}
|
||||
|
||||
} // namespace cl
|
@ -199,12 +199,21 @@ absl::Status InferenceContext::InitFromGraph(
|
||||
RETURN_IF_ERROR(Tune(tuning_parameters));
|
||||
|
||||
if (serialized_model) {
|
||||
// Temporary, will be resolved later, now we don't have complete
|
||||
// intermediate representation
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->MoveObjectRefsFromCLToGeneric();
|
||||
node.operation->SyncScalarValues();
|
||||
}
|
||||
flatbuffers::FlatBufferBuilder builder;
|
||||
auto encoded_fb = Encode(*this, &builder);
|
||||
data::FinishInferenceContextBuffer(builder, encoded_fb);
|
||||
serialized_model->resize(builder.GetSize());
|
||||
std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
|
||||
builder.GetSize());
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->MoveObjectRefsFromGenericToCL();
|
||||
}
|
||||
}
|
||||
for (auto& node : nodes_) {
|
||||
node.operation->args_.ReleaseCPURepresentation();
|
||||
@ -220,7 +229,7 @@ absl::Status InferenceContext::RestoreDeserialized(
|
||||
return absl::DataLossError("Deserialization failed.");
|
||||
}
|
||||
auto decoded_fb = data::GetInferenceContext(serialized_model.data());
|
||||
RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
|
||||
RETURN_IF_ERROR(Decode(decoded_fb, this));
|
||||
|
||||
CreationContext creation_context;
|
||||
creation_context.device = env->GetDevicePtr();
|
||||
|
@ -103,8 +103,7 @@ class InferenceContext {
|
||||
friend flatbuffers::Offset<data::InferenceContext> Encode(
|
||||
const InferenceContext& inference,
|
||||
flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context,
|
||||
const data::InferenceContext* fb_inference,
|
||||
friend absl::Status Decode(const data::InferenceContext* fb_inference,
|
||||
InferenceContext* inference);
|
||||
|
||||
void CopyInAndOutIds(const GraphFloat32& graph);
|
||||
|
@ -263,6 +263,7 @@ cc_library(
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu:spi",
|
||||
"//tensorflow/lite/delegates/gpu/cl:arguments",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_arguments",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_errors",
|
||||
"//tensorflow/lite/delegates/gpu/cl:environment",
|
||||
@ -643,6 +644,7 @@ cc_library(
|
||||
":work_group_picking",
|
||||
"//tensorflow/lite/delegates/gpu/cl:arguments",
|
||||
"//tensorflow/lite/delegates/gpu/cl:buffer",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_arguments",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_context",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_device",
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
||||
@ -44,8 +45,9 @@ class OpenClConverterImpl : public TensorObjectConverter {
|
||||
absl::Status DispatchKernel(cl_mem buffer_mem, Tensor* tensor) {
|
||||
kernel_.ResetBindingCounter();
|
||||
RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem));
|
||||
RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor));
|
||||
RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
|
||||
RETURN_IF_ERROR(cl_args_.SetObjectRef("tensor", tensor));
|
||||
RETURN_IF_ERROR(
|
||||
cl_args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
|
||||
const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(),
|
||||
tensor->Slices());
|
||||
const int3 work_group_size = {16, 8, 1};
|
||||
@ -53,7 +55,7 @@ class OpenClConverterImpl : public TensorObjectConverter {
|
||||
return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
|
||||
}
|
||||
|
||||
Arguments args_;
|
||||
CLArguments cl_args_;
|
||||
BHWC shape_;
|
||||
CLKernel kernel_;
|
||||
TensorDescriptor tensor_descriptor_;
|
||||
@ -115,7 +117,8 @@ class TensorToTensorConverter : public OpenClConverterImpl {
|
||||
src_tensor_descriptor_.storage_type = ToTensorStorageType(
|
||||
input_def.object_def.object_type, input_def.object_def.data_layout);
|
||||
src_tensor_descriptor_.data_type = input_def.object_def.data_type;
|
||||
args_.AddObjectRef(
|
||||
Arguments args;
|
||||
args.AddObjectRef(
|
||||
"src_tensor", AccessType::READ,
|
||||
absl::make_unique<TensorDescriptor>(src_tensor_descriptor_));
|
||||
|
||||
@ -123,7 +126,7 @@ class TensorToTensorConverter : public OpenClConverterImpl {
|
||||
dst_tensor_descriptor_.storage_type = ToTensorStorageType(
|
||||
output_def.object_def.object_type, output_def.object_def.data_layout);
|
||||
dst_tensor_descriptor_.data_type = output_def.object_def.data_type;
|
||||
args_.AddObjectRef(
|
||||
args.AddObjectRef(
|
||||
"dst_tensor", AccessType::WRITE,
|
||||
absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_));
|
||||
|
||||
@ -152,8 +155,8 @@ class TensorToTensorConverter : public OpenClConverterImpl {
|
||||
context_ = &environment->context();
|
||||
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
|
||||
input_def.dimensions.w, input_def.dimensions.c);
|
||||
RETURN_IF_ERROR(
|
||||
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
|
||||
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
|
||||
&args, &shader_src));
|
||||
return environment->program_cache()->GetOrCreateCLKernel(
|
||||
shader_src, "tensor_to_tensor", environment->context(),
|
||||
environment->device(), &kernel_);
|
||||
@ -172,9 +175,9 @@ class TensorToTensorConverter : public OpenClConverterImpl {
|
||||
Tensor dst_tensor;
|
||||
RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_,
|
||||
dst_tensor_descriptor_, &dst_tensor));
|
||||
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor));
|
||||
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor));
|
||||
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
|
||||
RETURN_IF_ERROR(cl_args_.SetObjectRef("src_tensor", &src_tensor));
|
||||
RETURN_IF_ERROR(cl_args_.SetObjectRef("dst_tensor", &dst_tensor));
|
||||
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
|
||||
const int3 grid = int3(dst_tensor.Width() * dst_tensor.Batch(),
|
||||
dst_tensor.Height(), dst_tensor.Slices());
|
||||
const int3 work_group_size = {16, 8, 1};
|
||||
@ -203,8 +206,9 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl {
|
||||
tensor_descriptor_.layout = Layout::BHWC;
|
||||
tensor_descriptor_.storage_type = src_tensor_type;
|
||||
tensor_descriptor_.data_type = input_def.object_def.data_type;
|
||||
args_.AddObjectRef("tensor", AccessType::READ,
|
||||
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
|
||||
Arguments args;
|
||||
args.AddObjectRef("tensor", AccessType::READ,
|
||||
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
|
||||
|
||||
const bool need_fp16_support =
|
||||
input_def.object_def.data_type == DataType::FLOAT16 ||
|
||||
@ -244,8 +248,8 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl {
|
||||
context_ = &environment->context();
|
||||
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
|
||||
input_def.dimensions.w, input_def.dimensions.c);
|
||||
RETURN_IF_ERROR(
|
||||
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
|
||||
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
|
||||
&args, &shader_src));
|
||||
return environment->program_cache()->GetOrCreateCLKernel(
|
||||
shader_src, "tensor_to_bhwc", environment->context(),
|
||||
environment->device(), &kernel_);
|
||||
@ -300,8 +304,9 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl {
|
||||
tensor_descriptor_.layout = Layout::BHWC;
|
||||
tensor_descriptor_.storage_type = dst_tensor_type;
|
||||
tensor_descriptor_.data_type = output_def.object_def.data_type;
|
||||
args_.AddObjectRef("tensor", AccessType::WRITE,
|
||||
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
|
||||
Arguments args;
|
||||
args.AddObjectRef("tensor", AccessType::WRITE,
|
||||
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
|
||||
|
||||
const bool need_fp16_support =
|
||||
input_def.object_def.data_type == DataType::FLOAT16 ||
|
||||
@ -338,8 +343,8 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl {
|
||||
context_ = &environment->context();
|
||||
shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h,
|
||||
output_def.dimensions.w, output_def.dimensions.c);
|
||||
RETURN_IF_ERROR(
|
||||
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
|
||||
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
|
||||
&args, &shader_src));
|
||||
return environment->program_cache()->GetOrCreateCLKernel(
|
||||
shader_src, "bhwc_to_tensor", environment->context(),
|
||||
environment->device(), &kernel_);
|
||||
|
@ -132,12 +132,13 @@ GPUOperation::GPUOperation(GPUOperation&& operation)
|
||||
definition_(std::move(operation.definition_)),
|
||||
src_(std::move(operation.src_)),
|
||||
dst_(std::move(operation.dst_)),
|
||||
kernel_(std::move(operation.kernel_)),
|
||||
grid_dimension_(operation.grid_dimension_),
|
||||
work_group_launch_order_(operation.work_group_launch_order_),
|
||||
grid_size_(operation.grid_size_),
|
||||
src_tensors_names_(std::move(operation.src_tensors_names_)),
|
||||
dst_tensors_names_(std::move(operation.dst_tensors_names_)),
|
||||
kernel_(std::move(operation.kernel_)),
|
||||
cl_args_(std::move(operation.cl_args_)),
|
||||
work_groups_count_(operation.work_groups_count_),
|
||||
linkable_count_(operation.linkable_count_),
|
||||
elementwise_code_(std::move(operation.elementwise_code_)) {}
|
||||
@ -155,12 +156,13 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
|
||||
definition_ = std::move(operation.definition_);
|
||||
src_ = std::move(operation.src_);
|
||||
dst_ = std::move(operation.dst_);
|
||||
kernel_ = std::move(operation.kernel_);
|
||||
std::swap(grid_dimension_, operation.grid_dimension_);
|
||||
std::swap(work_group_launch_order_, operation.work_group_launch_order_);
|
||||
std::swap(grid_size_, operation.grid_size_);
|
||||
src_tensors_names_ = std::move(operation.src_tensors_names_);
|
||||
dst_tensors_names_ = std::move(operation.dst_tensors_names_);
|
||||
kernel_ = std::move(operation.kernel_);
|
||||
cl_args_ = std::move(operation.cl_args_);
|
||||
std::swap(work_groups_count_, operation.work_groups_count_);
|
||||
std::swap(linkable_count_, operation.linkable_count_);
|
||||
elementwise_code_ = std::move(operation.elementwise_code_);
|
||||
@ -211,12 +213,12 @@ void GPUOperation::AddDstTensor(const std::string& tensor_name,
|
||||
|
||||
absl::Status GPUOperation::UpdateParams() {
|
||||
for (int i = 0; i < src_tensors_names_.size(); ++i) {
|
||||
RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i]));
|
||||
RETURN_IF_ERROR(cl_args_.SetObjectRef(src_tensors_names_[i], src_[i]));
|
||||
}
|
||||
for (int i = 0; i < dst_tensors_names_.size(); ++i) {
|
||||
RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
|
||||
RETURN_IF_ERROR(cl_args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
|
||||
}
|
||||
RETURN_IF_ERROR(BindArguments(&args_));
|
||||
RETURN_IF_ERROR(BindArguments(&cl_args_));
|
||||
grid_size_ = GetGridSize();
|
||||
work_groups_count_ = GetWorkGroupsCount(
|
||||
grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
|
||||
@ -244,15 +246,10 @@ absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
|
||||
|
||||
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
|
||||
code_ = GetElementWiseCode(definition_, check_src_channels_size_);
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||
} else {
|
||||
RETURN_IF_ERROR(args_.AllocateObjects(context));
|
||||
RETURN_IF_ERROR(args_.TransformToCLCode(
|
||||
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
|
||||
}
|
||||
return absl::OkStatus();
|
||||
return cl_args_.Init(device_info,
|
||||
{{dst_tensors_names_[0], elementwise_code_}}, context,
|
||||
&args_, &code_);
|
||||
}
|
||||
|
||||
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
@ -266,6 +263,8 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
|
||||
|
||||
absl::Status GPUOperation::CompileDeserialized(
|
||||
const CreationContext& creation_context) {
|
||||
RETURN_IF_ERROR(cl_args_.Init(creation_context.GetDeviceInfo(), &args_,
|
||||
creation_context.context));
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
code_, "main_function", compiler_options_, *creation_context.context,
|
||||
*creation_context.device, &kernel_);
|
||||
@ -299,7 +298,7 @@ absl::Status GPUOperation::Tune(const TuningParameters& params) {
|
||||
GetWorkGroupsCount(grid_dimension_, grid_size_,
|
||||
possible_work_groups[i], work_group_launch_order_);
|
||||
}
|
||||
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
|
||||
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
|
||||
int best_work_group_index;
|
||||
RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
|
||||
kernel_, *params.info, work_groups_count, possible_work_groups,
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
@ -120,7 +121,7 @@ class GPUOperation {
|
||||
absl::Status UpdateParams();
|
||||
|
||||
absl::Status AddToQueue(CLCommandQueue* queue) {
|
||||
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
|
||||
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
|
||||
return queue->Dispatch(kernel_, work_groups_count_, work_group_size_);
|
||||
}
|
||||
|
||||
@ -168,11 +169,15 @@ class GPUOperation {
|
||||
// applicable only with elementwise_ = true;
|
||||
bool check_src_channels_size_ = false;
|
||||
|
||||
// Temporary, will be resolved later
|
||||
void MoveObjectRefsFromCLToGeneric() { cl_args_.MoveObjectRefsOut(&args_); }
|
||||
void MoveObjectRefsFromGenericToCL() { cl_args_.MoveObjectRefsIn(&args_); }
|
||||
void SyncScalarValues() { cl_args_.CopyScalarValues(&args_); }
|
||||
|
||||
protected:
|
||||
friend flatbuffers::Offset<data::GPUOperation> Encode(
|
||||
const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
|
||||
friend absl::Status Decode(CLContext* context,
|
||||
const data::GPUOperation* fb_op, GPUOperation* op);
|
||||
friend absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op);
|
||||
|
||||
virtual absl::Status BindArguments(ArgumentsBinder* args) {
|
||||
return absl::OkStatus();
|
||||
@ -183,7 +188,6 @@ class GPUOperation {
|
||||
OperationDef definition_;
|
||||
std::vector<Tensor*> src_;
|
||||
std::vector<Tensor*> dst_;
|
||||
CLKernel kernel_;
|
||||
int grid_dimension_ = 3; // can be 1, 2 or 3
|
||||
int3 work_group_launch_order_ = int3(0, 1, 2);
|
||||
int3 grid_size_ = int3(0, 0, 0);
|
||||
@ -191,6 +195,8 @@ class GPUOperation {
|
||||
std::vector<std::string> dst_tensors_names_;
|
||||
|
||||
private:
|
||||
CLKernel kernel_;
|
||||
CLArguments cl_args_;
|
||||
int3 work_groups_count_ = int3(0, 0, 0);
|
||||
int linkable_count_ = 0;
|
||||
std::string elementwise_code_; // temporary, used during op construction
|
||||
|
@ -508,30 +508,11 @@ void Decode(const data::TensorDescWithId* fb_desc, TensorDescriptor* desc,
|
||||
*id = fb_desc->id();
|
||||
}
|
||||
|
||||
absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
Arguments* args) {
|
||||
args->shared_int4s_data_ = std::vector<int32_t>(
|
||||
fb_args->shared_int4s()->data(),
|
||||
fb_args->shared_int4s()->data() + fb_args->shared_int4s()->size());
|
||||
|
||||
args->shared_float4s_data_ = std::vector<float>(
|
||||
fb_args->shared_float4s()->data(),
|
||||
fb_args->shared_float4s()->data() + fb_args->shared_float4s()->size());
|
||||
|
||||
std::vector<float> tmp = std::vector<float>(
|
||||
fb_args->shared_half4s()->data(),
|
||||
fb_args->shared_half4s()->data() + fb_args->shared_half4s()->size());
|
||||
|
||||
args->shared_half4s_data_.resize(tmp.size());
|
||||
for (int i = 0; i < tmp.size(); ++i) {
|
||||
args->shared_half4s_data_[i] = tmp[i];
|
||||
}
|
||||
|
||||
absl::Status Decode(const data::Arguments* fb_args, Arguments* args) {
|
||||
args->int_values_.clear();
|
||||
for (auto int_values_fb : *fb_args->int_values()) {
|
||||
Arguments::IntValue value;
|
||||
value.value = int_values_fb->value();
|
||||
value.offset = int_values_fb->offset();
|
||||
value.active = int_values_fb->active();
|
||||
std::string name(int_values_fb->name()->c_str(),
|
||||
int_values_fb->name()->size());
|
||||
@ -542,7 +523,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
for (auto float_values_fb : *fb_args->float_values()) {
|
||||
Arguments::FloatValue value;
|
||||
value.value = float_values_fb->value();
|
||||
value.offset = float_values_fb->offset();
|
||||
value.active = float_values_fb->active();
|
||||
std::string name(float_values_fb->name()->c_str(),
|
||||
float_values_fb->name()->size());
|
||||
@ -553,9 +533,7 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
for (auto half_values_fb : *fb_args->half_values()) {
|
||||
Arguments::HalfValue value;
|
||||
value.value = half_values_fb->value();
|
||||
value.offset = half_values_fb->offset();
|
||||
value.active = half_values_fb->active();
|
||||
value.store_as_f32 = half_values_fb->store_as_f32();
|
||||
std::string name(half_values_fb->name()->c_str(),
|
||||
half_values_fb->name()->size());
|
||||
args->half_values_[name] = value;
|
||||
@ -635,9 +613,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
|
||||
args->AddObjectRef(key, access_type,
|
||||
absl::make_unique<TensorDescriptor>(std::move(desc)));
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(args->AllocateObjects(context));
|
||||
RETURN_IF_ERROR(args->AddObjectArgs());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
@ -649,7 +624,6 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
data::IntValueBuilder value_builder(*builder);
|
||||
value_builder.add_name(name_fb);
|
||||
value_builder.add_value(value.second.value);
|
||||
value_builder.add_offset(value.second.offset);
|
||||
value_builder.add_active(value.second.active);
|
||||
int_values_fb.push_back(value_builder.Finish());
|
||||
}
|
||||
@ -660,7 +634,6 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
data::FloatValueBuilder value_builder(*builder);
|
||||
value_builder.add_name(name_fb);
|
||||
value_builder.add_value(value.second.value);
|
||||
value_builder.add_offset(value.second.offset);
|
||||
value_builder.add_active(value.second.active);
|
||||
float_values_fb.push_back(value_builder.Finish());
|
||||
}
|
||||
@ -671,9 +644,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
data::HalfValueBuilder value_builder(*builder);
|
||||
value_builder.add_name(name_fb);
|
||||
value_builder.add_value(value.second.value);
|
||||
value_builder.add_offset(value.second.offset);
|
||||
value_builder.add_active(value.second.active);
|
||||
value_builder.add_store_as_f32(value.second.store_as_f32);
|
||||
half_values_fb.push_back(value_builder.Finish());
|
||||
}
|
||||
|
||||
@ -681,7 +652,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
buffer_objs_fb;
|
||||
for (auto& value : args.objects_) {
|
||||
const auto* buffer_desc =
|
||||
dynamic_cast<const BufferDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const BufferDescriptor*>(value.second.get());
|
||||
if (!buffer_desc) continue;
|
||||
auto desc_fb = Encode(*buffer_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -694,7 +665,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
texture2d_objs_fb;
|
||||
for (auto& value : args.objects_) {
|
||||
const auto* texture_desc =
|
||||
dynamic_cast<const Texture2DDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const Texture2DDescriptor*>(value.second.get());
|
||||
if (!texture_desc) continue;
|
||||
auto desc_fb = Encode(*texture_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -706,8 +677,8 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
std::vector<flatbuffers::Offset<data::TensorLinearDescriptorMapValue>>
|
||||
tensor_linear_objs_fb;
|
||||
for (auto& value : args.objects_) {
|
||||
const auto* tensor_desc = dynamic_cast<const TensorLinearDescriptor*>(
|
||||
value.second.descriptor.get());
|
||||
const auto* tensor_desc =
|
||||
dynamic_cast<const TensorLinearDescriptor*>(value.second.get());
|
||||
if (!tensor_desc) continue;
|
||||
auto desc_fb = Encode(*tensor_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -720,7 +691,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
tensor_objs_fb;
|
||||
for (auto& value : args.objects_) {
|
||||
const auto* tensor_desc =
|
||||
dynamic_cast<const TensorDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const TensorDescriptor*>(value.second.get());
|
||||
if (!tensor_desc) continue;
|
||||
auto desc_fb = Encode(*tensor_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -734,7 +705,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
buffer_refs_fb;
|
||||
for (auto& value : args.object_refs_) {
|
||||
const auto* buffer_desc =
|
||||
dynamic_cast<const BufferDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const BufferDescriptor*>(value.second.get());
|
||||
if (!buffer_desc) continue;
|
||||
auto desc_fb = Encode(*buffer_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -747,7 +718,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
texture2d_refs_fb;
|
||||
for (auto& value : args.object_refs_) {
|
||||
const auto* texture_desc =
|
||||
dynamic_cast<const Texture2DDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const Texture2DDescriptor*>(value.second.get());
|
||||
if (!texture_desc) continue;
|
||||
auto desc_fb = Encode(*texture_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -759,8 +730,8 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
std::vector<flatbuffers::Offset<data::TensorLinearDescriptorMapValue>>
|
||||
tensor_linear_refs_fb;
|
||||
for (auto& value : args.object_refs_) {
|
||||
const auto* tensor_desc = dynamic_cast<const TensorLinearDescriptor*>(
|
||||
value.second.descriptor.get());
|
||||
const auto* tensor_desc =
|
||||
dynamic_cast<const TensorLinearDescriptor*>(value.second.get());
|
||||
if (!tensor_desc) continue;
|
||||
auto desc_fb = Encode(*tensor_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -773,7 +744,7 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
tensor_refs_fb;
|
||||
for (auto& value : args.object_refs_) {
|
||||
const auto* tensor_desc =
|
||||
dynamic_cast<const TensorDescriptor*>(value.second.descriptor.get());
|
||||
dynamic_cast<const TensorDescriptor*>(value.second.get());
|
||||
if (!tensor_desc) continue;
|
||||
auto desc_fb = Encode(*tensor_desc, builder);
|
||||
auto key_fb = builder->CreateString(value.first);
|
||||
@ -783,14 +754,6 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
tensor_refs_fb.push_back(ten_map_builder.Finish());
|
||||
}
|
||||
|
||||
auto shared_int4s_data_fb = builder->CreateVector(args.shared_int4s_data_);
|
||||
auto shared_float4s_data_fb =
|
||||
builder->CreateVector(args.shared_float4s_data_);
|
||||
std::vector<float> tmp(args.shared_half4s_data_.size());
|
||||
for (int i = 0; i < tmp.size(); ++i) {
|
||||
tmp[i] = args.shared_half4s_data_[i];
|
||||
}
|
||||
auto shared_half4s_data_fb = builder->CreateVector(tmp);
|
||||
auto int_values_fb_vec = builder->CreateVector(int_values_fb);
|
||||
auto float_values_fb_vec = builder->CreateVector(float_values_fb);
|
||||
auto half_values_fb_vec = builder->CreateVector(half_values_fb);
|
||||
@ -803,9 +766,6 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
auto tensor_linear_refs_fb_vec = builder->CreateVector(tensor_linear_refs_fb);
|
||||
auto tensor_refs_fb_vec = builder->CreateVector(tensor_refs_fb);
|
||||
data::ArgumentsBuilder arguments_builder(*builder);
|
||||
arguments_builder.add_shared_int4s(shared_int4s_data_fb);
|
||||
arguments_builder.add_shared_float4s(shared_float4s_data_fb);
|
||||
arguments_builder.add_shared_half4s(shared_half4s_data_fb);
|
||||
arguments_builder.add_int_values(int_values_fb_vec);
|
||||
arguments_builder.add_float_values(float_values_fb_vec);
|
||||
arguments_builder.add_half_values(half_values_fb_vec);
|
||||
@ -820,9 +780,8 @@ flatbuffers::Offset<data::Arguments> Encode(
|
||||
return arguments_builder.Finish();
|
||||
}
|
||||
|
||||
absl::Status Decode(CLContext* context, const data::GPUOperation* fb_op,
|
||||
GPUOperation* op) {
|
||||
RETURN_IF_ERROR(Decode(context, fb_op->arguments(), &op->args_));
|
||||
absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op) {
|
||||
RETURN_IF_ERROR(Decode(fb_op->arguments(), &op->args_));
|
||||
op->code_ = std::string(fb_op->code()->c_str(), fb_op->code()->size());
|
||||
op->work_group_size_.x = fb_op->work_group_size()->x();
|
||||
op->work_group_size_.y = fb_op->work_group_size()->y();
|
||||
@ -935,10 +894,9 @@ flatbuffers::Offset<data::CLNode> Encode(
|
||||
return node_builder.Finish();
|
||||
}
|
||||
|
||||
absl::Status Decode(CLContext* context, const data::CLNode* fb_node,
|
||||
CLNode* node) {
|
||||
absl::Status Decode(const data::CLNode* fb_node, CLNode* node) {
|
||||
GPUOperation op;
|
||||
RETURN_IF_ERROR(Decode(context, fb_node->gpu_op(), &op));
|
||||
RETURN_IF_ERROR(Decode(fb_node->gpu_op(), &op));
|
||||
node->operation = absl::make_unique<GPUOperation>(std::move(op));
|
||||
for (auto in_fb : *fb_node->input_ids()) {
|
||||
node->inputs.push_back(in_fb);
|
||||
@ -1006,8 +964,7 @@ flatbuffers::Offset<data::InferenceContext> Encode(
|
||||
return inf_builder.Finish();
|
||||
}
|
||||
|
||||
absl::Status Decode(CLContext* context,
|
||||
const data::InferenceContext* fb_inference,
|
||||
absl::Status Decode(const data::InferenceContext* fb_inference,
|
||||
InferenceContext* inference) {
|
||||
inference->need_flush_ = fb_inference->need_flush();
|
||||
inference->flush_periodically_ = fb_inference->flush_periodically();
|
||||
@ -1019,7 +976,7 @@ absl::Status Decode(CLContext* context,
|
||||
inference->nodes_.resize(fb_inference->nodes()->size());
|
||||
int counter = 0;
|
||||
for (auto node_fb : *fb_inference->nodes()) {
|
||||
RETURN_IF_ERROR(Decode(context, node_fb, &inference->nodes_[counter]));
|
||||
RETURN_IF_ERROR(Decode(node_fb, &inference->nodes_[counter]));
|
||||
counter++;
|
||||
}
|
||||
|
||||
|
@ -36,22 +36,18 @@ table IntValue {
|
||||
name:string;
|
||||
value:int32;
|
||||
active:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
table FloatValue {
|
||||
name:string;
|
||||
value:float;
|
||||
active:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
table HalfValue {
|
||||
name:string;
|
||||
value:float;
|
||||
active:bool;
|
||||
store_as_f32:bool;
|
||||
offset:uint32;
|
||||
}
|
||||
|
||||
enum AccessType : byte {
|
||||
@ -172,13 +168,8 @@ table TensorDescriptorMapValue {
|
||||
|
||||
table Arguments {
|
||||
int_values:[IntValue];
|
||||
shared_int4s:[int32];
|
||||
|
||||
float_values:[FloatValue];
|
||||
shared_float4s:[float];
|
||||
|
||||
half_values:[HalfValue];
|
||||
shared_half4s:[float];
|
||||
|
||||
buffer_refs:[BufferDescriptorMapValue];
|
||||
texture2d_refs:[Texture2DDescriptorMapValue];
|
||||
|
Loading…
Reference in New Issue
Block a user