Arguments splitted on Arguments and CLArguments.

PiperOrigin-RevId: 339089579
Change-Id: I9a4875b18195859c9590f79b975afc4f881d97fe
This commit is contained in:
Raman Sarokin 2020-10-26 12:10:53 -07:00 committed by TensorFlower Gardener
parent 589d9fe124
commit ae6c9dae43
14 changed files with 1121 additions and 1009 deletions

View File

@ -52,12 +52,8 @@ cc_library(
srcs = ["arguments.cc"],
hdrs = ["arguments.h"],
deps = [
":cl_device",
":gpu_object",
":opencl_wrapper",
":serialization_cc_fbs",
":tensor_type",
":util",
"//tensorflow/lite/delegates/gpu/common:access_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:status",
@ -67,27 +63,6 @@ cc_library(
],
)
cc_test(
name = "arguments_test",
srcs = ["arguments_test.cc"],
linkstatic = True,
tags = tf_gpu_tests_tags() + [
"linux",
"local",
],
deps = [
":arguments",
":buffer",
":device_info",
":gpu_object",
":tensor",
":tensor_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "buffer",
srcs = ["buffer.cc"],
@ -132,6 +107,45 @@ cc_library(
],
)
cc_library(
name = "cl_arguments",
srcs = ["cl_arguments.cc"],
hdrs = ["cl_arguments.h"],
deps = [
":arguments",
":cl_context",
":device_info",
":gpu_object",
":tensor",
":tensor_type",
"//tensorflow/lite/delegates/gpu/common:access_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common:util",
"@com_google_absl//absl/strings",
],
)
cc_test(
name = "cl_arguments_test",
srcs = ["cl_arguments_test.cc"],
linkstatic = True,
tags = tf_gpu_tests_tags() + [
"linux",
"local",
],
deps = [
":arguments",
":buffer",
":cl_arguments",
":device_info",
":gpu_object",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
)
cc_library(
name = "cl_command_queue",
srcs = ["cl_command_queue.cc"],

View File

@ -16,12 +16,8 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_replace.h"
#include "absl/strings/str_split.h"
#include "absl/strings/substitute.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace tflite {
@ -42,75 +38,17 @@ std::string GetNextWord(const std::string& code, size_t first_position) {
return code.substr(first_position, pos - first_position);
}
size_t FindEnclosingBracket(const std::string& text, size_t first_pos,
char bracket) {
const std::map<char, char> brackets = {
{'(', ')'},
{'{', '}'},
{'[', ']'},
{'<', '>'},
};
char b_open = bracket;
auto it = brackets.find(b_open);
if (it == brackets.end()) {
return -1;
}
char b_close = it->second;
size_t pos = first_pos;
int opened = 1;
int closed = 0;
while (opened != closed && pos < text.size()) {
if (text[pos] == b_open) {
opened++;
} else if (text[pos] == b_close) {
closed++;
bool HasWord(const std::string& word, const std::string& text) {
size_t pos = text.find(word);
while (pos != std::string::npos) {
char prev = pos == 0 ? '.' : text[pos - 1];
char next = pos + word.size() < text.size() ? text[pos + word.size()] : '.';
if (!IsWordSymbol(prev) & !IsWordSymbol(next)) {
return true;
}
pos++;
}
if (opened == closed) {
return pos;
} else {
return -1;
}
}
absl::Status ParseArgsInsideBrackets(const std::string& text,
size_t open_bracket_pos,
size_t* close_bracket_pos,
std::vector<std::string>* args) {
*close_bracket_pos =
FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
if (*close_bracket_pos == -1) {
return absl::NotFoundError("Not found enclosing bracket");
}
std::string str_args = text.substr(open_bracket_pos + 1,
*close_bracket_pos - open_bracket_pos - 2);
std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
args->reserve(words.size());
for (const auto& word : words) {
absl::string_view arg = absl::StripAsciiWhitespace(word);
if (!arg.empty()) {
args->push_back(std::string(arg));
}
}
return absl::OkStatus();
}
void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
std::string* str) {
size_t position = str->find(old_word);
while (position != std::string::npos) {
char prev = position == 0 ? '.' : (*str)[position - 1];
char next = position + old_word.size() < str->size()
? (*str)[position + old_word.size()]
: '.';
if (IsWordSymbol(prev) || IsWordSymbol(next)) {
position = str->find(old_word, position + 1);
continue;
}
str->replace(position, old_word.size(), new_word);
position = str->find(old_word, position + new_word.size());
pos = text.find(word, pos + 1);
}
return false;
}
std::string RenameArg(const std::vector<std::string>& object_names,
@ -127,91 +65,8 @@ std::string RenameArg(const std::vector<std::string>& object_names,
return arg_name + postfix;
}
void AppendArgument(const std::string& arg, std::string* args) {
if (!args->empty()) {
absl::StrAppend(args, ",\n ");
}
absl::StrAppend(args, arg);
}
std::string GetImageModifier(AccessType access) {
switch (access) {
case AccessType::READ:
return "__read_only";
case AccessType::WRITE:
return "__write_only";
case AccessType::READ_WRITE:
return "__read_write";
}
}
std::string GetDefaultSamplers(const DeviceInfo& device_info) {
std::string result;
result +=
"__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
if (device_info.IsAdreno3xx()) {
// Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
// we can observe huge register overhead when compared to other modes.
// While using CLK_ADDRESS_NONE with out-of-range image coordinates is
// undefined in the OpenCL specification, we have observed that
// CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
// coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
// CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
// 3xx.
result +=
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
} else {
result +=
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
}
return result;
}
} // namespace
// Static
constexpr char Arguments::kArgsPrefix[];
Arguments::Arguments(Arguments&& args)
: int_values_(std::move(args.int_values_)),
shared_int4s_data_(std::move(args.shared_int4s_data_)),
float_values_(std::move(args.float_values_)),
shared_float4s_data_(std::move(args.shared_float4s_data_)),
half_values_(std::move(args.half_values_)),
shared_half4s_data_(std::move(args.shared_half4s_data_)),
buffers_(std::move(args.buffers_)),
images2d_(std::move(args.images2d_)),
image2d_arrays_(std::move(args.image2d_arrays_)),
images3d_(std::move(args.images3d_)),
image_buffers_(std::move(args.image_buffers_)),
custom_memories_(std::move(args.custom_memories_)),
object_refs_(std::move(args.object_refs_)),
objects_(std::move(args.objects_)) {}
Arguments& Arguments::operator=(Arguments&& args) {
if (this != &args) {
int_values_ = std::move(args.int_values_);
shared_int4s_data_ = std::move(args.shared_int4s_data_);
float_values_ = std::move(args.float_values_);
shared_float4s_data_ = std::move(args.shared_float4s_data_);
half_values_ = std::move(args.half_values_);
shared_half4s_data_ = std::move(args.shared_half4s_data_);
buffers_ = std::move(args.buffers_);
images2d_ = std::move(args.images2d_);
image2d_arrays_ = std::move(args.image2d_arrays_);
images3d_ = std::move(args.images3d_);
image_buffers_ = std::move(args.image_buffers_);
custom_memories_ = std::move(args.custom_memories_);
object_refs_ = std::move(args.object_refs_);
objects_ = std::move(args.objects_);
}
return *this;
}
void Arguments::AddFloat(const std::string& name, float value) {
float_values_[name].value = value;
}
@ -221,34 +76,6 @@ void Arguments::AddHalf(const std::string& name, half value) {
void Arguments::AddInt(const std::string& name, int value) {
int_values_[name].value = value;
}
void Arguments::AddBuffer(const std::string& name,
const GPUBufferDescriptor& desc) {
buffers_[name] = desc;
}
void Arguments::AddImage2D(const std::string& name,
const GPUImage2DDescriptor& desc) {
images2d_[name] = desc;
}
void Arguments::AddImage2DArray(const std::string& name,
const GPUImage2DArrayDescriptor& desc) {
image2d_arrays_[name] = desc;
}
void Arguments::AddImage3D(const std::string& name,
const GPUImage3DDescriptor& desc) {
images3d_[name] = desc;
}
void Arguments::AddImageBuffer(const std::string& name,
const GPUImageBufferDescriptor& desc) {
image_buffers_[name] = desc;
}
void Arguments::AddCustomMemory(const std::string& name,
const GPUCustomMemoryDescriptor& desc) {
custom_memories_[name] = desc;
}
void Arguments::AddObjectRef(const std::string& name, AccessType access_type,
GPUObjectDescriptorPtr&& descriptor_ptr) {
@ -259,188 +86,12 @@ void Arguments::AddObjectRef(const std::string& name, AccessType access_type,
void Arguments::AddObject(const std::string& name,
GPUObjectDescriptorPtr&& descriptor_ptr) {
descriptor_ptr->SetAccess(AccessType::READ);
objects_[name] = {nullptr, std::move(descriptor_ptr)};
}
void Arguments::AddGPUResources(const std::string& name,
const GPUResources& resources) {
for (const auto& r : resources.ints) {
AddInt(absl::StrCat(name, "_", r));
}
for (const auto& r : resources.floats) {
AddFloat(absl::StrCat(name, "_", r));
}
for (const auto& r : resources.buffers) {
AddBuffer(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.images2d) {
AddImage2D(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.image2d_arrays) {
AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.images3d) {
AddImage3D(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.image_buffers) {
AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.custom_memories) {
AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
}
}
absl::Status Arguments::SetInt(const std::string& name, int value) {
auto it = int_values_.find(name);
if (it == int_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No int argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
shared_int4s_data_[it->second.offset] = value;
}
return absl::OkStatus();
}
absl::Status Arguments::SetFloat(const std::string& name, float value) {
auto it = float_values_.find(name);
if (it == float_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No float argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
shared_float4s_data_[it->second.offset] = value;
}
return absl::OkStatus();
}
absl::Status Arguments::SetHalf(const std::string& name, half value) {
auto it = half_values_.find(name);
if (it == half_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No half argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
if (it->second.store_as_f32) {
shared_float4s_data_[it->second.offset] = value;
} else {
shared_half4s_data_[it->second.offset] = value;
}
}
return absl::OkStatus();
}
absl::Status Arguments::SetImage2D(const std::string& name, cl_mem memory) {
auto it = images2d_.find(name);
if (it == images2d_.end()) {
return absl::NotFoundError(
absl::StrCat("No image2D argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetBuffer(const std::string& name, cl_mem memory) {
auto it = buffers_.find(name);
if (it == buffers_.end()) {
return absl::NotFoundError(
absl::StrCat("No buffer argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetImage2DArray(const std::string& name,
cl_mem memory) {
auto it = image2d_arrays_.find(name);
if (it == image2d_arrays_.end()) {
return absl::NotFoundError(
absl::StrCat("No image2D array argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetImage3D(const std::string& name, cl_mem memory) {
auto it = images3d_.find(name);
if (it == images3d_.end()) {
return absl::NotFoundError(
absl::StrCat("No image3D argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetImageBuffer(const std::string& name, cl_mem memory) {
auto it = image_buffers_.find(name);
if (it == image_buffers_.end()) {
return absl::NotFoundError(
absl::StrCat("No image buffer argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetCustomMemory(const std::string& name,
cl_mem memory) {
auto it = custom_memories_.find(name);
if (it == custom_memories_.end()) {
return absl::NotFoundError(
absl::StrCat("No custom memory argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status Arguments::SetObjectRef(const std::string& name,
const GPUObject* object) {
auto it = object_refs_.find(name);
if (it == object_refs_.end()) {
return absl::NotFoundError(
absl::StrCat("No object ref with name - ", name));
}
GPUResourcesWithValue resources;
RETURN_IF_ERROR(
object->GetGPUResources(it->second.descriptor.get(), &resources));
return SetGPUResources(name, resources);
}
absl::Status Arguments::SetGPUResources(
const std::string& name, const GPUResourcesWithValue& resources) {
for (const auto& r : resources.ints) {
RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.floats) {
RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.buffers) {
RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.images2d) {
RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.image2d_arrays) {
RETURN_IF_ERROR(
SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.images3d) {
RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.image_buffers) {
RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.custom_memories) {
RETURN_IF_ERROR(
SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
}
return absl::OkStatus();
objects_[name] = {std::move(descriptor_ptr)};
}
void Arguments::RenameArgs(const std::string& postfix,
std::string* code) const {
static constexpr char kArgsPrefix[] = "args.";
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
size_t arg_pos = next_position + strlen(kArgsPrefix);
@ -460,7 +111,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
return absl::InvalidArgumentError(
absl::StrCat("Object reference name collision. Name - ", name));
}
object_refs_[name] = {std::move(v.second.descriptor)};
object_refs_[name] = {std::move(v.second)};
}
for (auto& v : args.objects_) {
object_names.push_back(v.first);
@ -469,8 +120,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
return absl::InvalidArgumentError(
absl::StrCat("Object name collision. Name - ", name));
}
objects_[name] = {std::move(v.second.obj_ptr),
std::move(v.second.descriptor)};
objects_[name] = {std::move(v.second)};
}
for (const auto& v : args.int_values_) {
AddInt(RenameArg(object_names, postfix, v.first), v.second.value);
@ -481,412 +131,26 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) {
for (const auto& v : args.half_values_) {
AddHalf(RenameArg(object_names, postfix, v.first), v.second.value);
}
for (const auto& v : args.buffers_) {
AddBuffer(RenameArg(object_names, postfix, v.first), v.second);
}
for (const auto& v : args.images2d_) {
AddImage2D(RenameArg(object_names, postfix, v.first), v.second);
}
for (const auto& v : args.image2d_arrays_) {
AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second);
}
for (const auto& v : args.images3d_) {
AddImage3D(RenameArg(object_names, postfix, v.first), v.second);
}
for (const auto& v : args.image_buffers_) {
AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second);
}
for (const auto& v : args.custom_memories_) {
AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second);
}
return absl::OkStatus();
}
absl::Status Arguments::TransformToCLCode(
const DeviceInfo& device_info,
const std::map<std::string, std::string>& linkables, std::string* code) {
RETURN_IF_ERROR(AddObjectArgs());
RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code));
ResolveArgsPass(device_info, code);
*code = absl::Substitute(*code, GetListOfArgs());
*code = GetDefaultSamplers(device_info) + *code;
return absl::OkStatus();
}
std::string Arguments::GetListOfArgs() {
std::string result;
for (auto& t : buffers_) {
const std::string type_name =
t.second.data_type == DataType::FLOAT32 ? "float" : "half";
std::string attributes;
for (const auto& attr : t.second.attributes) {
attributes += absl::StrCat(" __attribute__((", attr, "))");
}
AppendArgument(
absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
ToCLDataType(t.second.data_type, t.second.element_size),
"* ", t.first, attributes),
&result);
}
for (auto& t : image_buffers_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image1d_buffer_t ", t.first),
&result);
}
for (auto& t : images2d_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image2d_t ", t.first),
&result);
}
for (auto& t : image2d_arrays_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image2d_array_t ", t.first),
&result);
}
for (auto& t : images3d_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image3d_t ", t.first),
&result);
}
for (auto& t : custom_memories_) {
AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
}
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
}
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
}
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
}
return result;
}
absl::Status Arguments::Bind(cl_kernel kernel, int offset) {
for (auto& t : buffers_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : image_buffers_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : images2d_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : image2d_arrays_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : images3d_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : custom_memories_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
&shared_int4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
&shared_float4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
&shared_half4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
return absl::OkStatus();
}
std::string Arguments::AddActiveArgument(const std::string& arg_name,
bool use_f32_for_halfs) {
{
auto it = int_values_.find(arg_name);
if (it != int_values_.end()) {
int int_index;
if (it->second.active) {
int_index = it->second.offset;
} else {
it->second.active = true;
it->second.offset = shared_int4s_data_.size();
int_index = it->second.offset;
shared_int4s_data_.push_back(it->second.value);
}
std::string index = std::to_string(int_index / 4);
std::string postfixes[4] = {"x", "y", "z", "w"};
return "shared_int4_" + index + "." + postfixes[int_index % 4];
}
}
{
auto it = float_values_.find(arg_name);
if (it != float_values_.end()) {
int float_index;
if (it->second.active) {
float_index = it->second.offset;
} else {
it->second.active = true;
it->second.offset = shared_float4s_data_.size();
float_index = it->second.offset;
shared_float4s_data_.push_back(it->second.value);
}
std::string index = std::to_string(float_index / 4);
std::string postfixes[4] = {"x", "y", "z", "w"};
return "shared_float4_" + index + "." + postfixes[float_index % 4];
}
}
{
auto it = half_values_.find(arg_name);
if (it != half_values_.end()) {
int half_index;
if (it->second.active) {
half_index = it->second.offset;
} else {
it->second.active = true;
if (use_f32_for_halfs) {
it->second.store_as_f32 = true;
it->second.offset = shared_float4s_data_.size();
shared_float4s_data_.push_back(it->second.value);
} else {
it->second.offset = shared_half4s_data_.size();
shared_half4s_data_.push_back(it->second.value);
}
half_index = it->second.offset;
}
std::string index = std::to_string(half_index / 4);
std::string postfixes[4] = {"x", "y", "z", "w"};
if (it->second.store_as_f32) {
return "(half)(shared_float4_" + index + "." +
postfixes[half_index % 4] + ")";
}
return "shared_half4_" + index + "." + postfixes[half_index % 4];
}
}
return arg_name;
}
void Arguments::ResolveArgsPass(const DeviceInfo& device_info,
std::string* code) {
bool use_f32_for_half_arguments = device_info.IsPowerVR();
size_t position = 0;
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
size_t arg_pos = next_position;
next_position += strlen(kArgsPrefix);
std::string object_name = GetNextWord(*code, next_position);
std::string new_name =
AddActiveArgument(object_name, use_f32_for_half_arguments);
code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
position = arg_pos + new_name.size();
next_position = code->find(kArgsPrefix, position);
}
int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
shared_int4s_data_.resize(shared_int4s_aligned_size);
int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
shared_float4s_data_.resize(shared_float4s_aligned_size);
int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
shared_half4s_data_.resize(shared_half4s_aligned_size);
}
void Arguments::ResolveObjectNames(const std::string& object_name,
const std::vector<std::string>& member_names,
std::string* code) {
for (const auto& member_name : member_names) {
const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
ReplaceAllWords(member_name, new_name, code);
}
}
GPUObjectDescriptor* Arguments::GetObjectDescriptor(
const std::string& object_name) const {
{
auto it = object_refs_.find(object_name);
if (it != object_refs_.end()) {
return it->second.descriptor.get();
}
}
{
auto it = objects_.find(object_name);
if (it != objects_.end()) {
return it->second.descriptor.get();
}
}
return nullptr;
}
absl::Status Arguments::ResolveSelector(
const std::map<std::string, std::string>& linkables,
const std::string& object_name, const std::string& selector,
const std::vector<std::string>& args,
const std::vector<std::string>& template_args, std::string* result) {
const GPUObjectDescriptor* desc_ptr = GetObjectDescriptor(object_name);
if (!desc_ptr) {
return absl::NotFoundError(
absl::StrCat("No object with name - ", object_name));
}
auto names = desc_ptr->GetGPUResources().GetNames();
const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc_ptr);
if (tensor_desc && selector == "Write") {
auto it = linkables.find(object_name);
if (it != linkables.end()) {
if (desc_ptr->GetAccess() != AccessType::WRITE &&
desc_ptr->GetAccess() != AccessType::READ_WRITE) {
return absl::FailedPreconditionError(absl::StrCat(
"Object with name - ", object_name, " should have Write access."));
}
std::string value_name, x_coord, y_coord, s_coord;
RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(
args, &value_name, &x_coord, &y_coord, &s_coord));
// x_coord can have batch size property of link_object
ResolveObjectNames(object_name, names, &x_coord);
*result = it->second;
ReplaceAllWords("in_out_value", value_name, result);
ReplaceAllWords("X_COORD", x_coord, result);
ReplaceAllWords("Y_COORD", y_coord, result);
ReplaceAllWords("S_COORD", s_coord, result);
RETURN_IF_ERROR(ResolveSelectorsPass({}, result));
}
}
std::string patch;
RETURN_IF_ERROR(
desc_ptr->PerformSelector(selector, args, template_args, &patch));
ResolveObjectNames(object_name, names, &patch);
*result += patch;
return absl::OkStatus();
}
absl::Status Arguments::ResolveSelectorsPass(
const std::map<std::string, std::string>& linkables, std::string* code) {
std::string result;
size_t position = 0;
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
size_t arg_pos = next_position;
next_position += strlen(kArgsPrefix);
std::string object_name = GetNextWord(*code, next_position);
char next = (*code)[next_position + object_name.size()];
if (next == '.') {
next_position += object_name.size() + 1;
std::string selector_name = GetNextWord(*code, next_position);
next_position += selector_name.size();
next = (*code)[next_position];
std::vector<std::string> template_args;
if (next == '<') {
size_t close_bracket_pos;
RETURN_IF_ERROR(ParseArgsInsideBrackets(
*code, next_position, &close_bracket_pos, &template_args));
next_position = close_bracket_pos;
next = (*code)[next_position];
}
if (next != '(') {
return absl::NotFoundError(absl::StrCat(
"Expected ( after ", object_name, ".", selector_name, " call"));
}
std::vector<std::string> args;
size_t close_bracket_pos;
RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position,
&close_bracket_pos, &args));
for (auto& arg : args) {
RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg));
}
std::string patch;
RETURN_IF_ERROR(ResolveSelector(linkables, object_name, selector_name,
args, template_args, &patch));
code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
position = arg_pos + patch.size();
} else {
position = arg_pos + strlen(kArgsPrefix);
}
next_position = code->find(kArgsPrefix, position);
}
return absl::OkStatus();
}
absl::Status Arguments::AllocateObjects(CLContext* context) {
for (auto& t : objects_) {
RETURN_IF_ERROR(
t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr));
}
return absl::OkStatus();
}
void Arguments::ReleaseCPURepresentation() {
for (auto& t : objects_) {
t.second.descriptor->Release();
t.second->Release();
}
}
absl::Status Arguments::AddObjectArgs() {
for (auto& t : objects_) {
AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
GPUResourcesWithValue resources;
RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(),
&resources));
RETURN_IF_ERROR(SetGPUResources(t.first, resources));
void Arguments::GetActiveArguments(const std::string& args_prefix,
const std::string& code) {
for (auto& float_val : float_values_) {
float_val.second.active = HasWord(args_prefix + float_val.first, code);
}
for (auto& t : object_refs_) {
AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
for (auto& int_val : int_values_) {
int_val.second.active = HasWord(args_prefix + int_val.first, code);
}
for (auto& half_val : half_values_) {
half_val.second.active = HasWord(args_prefix + half_val.first, code);
}
return absl::OkStatus();
}
} // namespace cl

View File

@ -20,11 +20,8 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/access_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
@ -39,12 +36,22 @@ class ArgumentsBinder {
virtual absl::Status SetInt(const std::string& name, int value) = 0;
virtual absl::Status SetFloat(const std::string& name, float value) = 0;
virtual absl::Status SetHalf(const std::string& name, half value) = 0;
virtual absl::Status SetObjectRef(const std::string& name,
const GPUObject* object) = 0;
virtual ~ArgumentsBinder() = default;
};
class Arguments : public ArgumentsBinder {
class Arguments {
public:
Arguments() = default;
~Arguments() = default;
// Move only
Arguments(Arguments&& args) = default;
Arguments& operator=(Arguments&& args) = default;
Arguments(const Arguments&) = delete;
Arguments& operator=(const Arguments&) = delete;
void AddFloat(const std::string& name, float value = 0.0f);
void AddHalf(const std::string& name, half value = half(0.0f));
void AddInt(const std::string& name, int value = 0);
@ -53,82 +60,19 @@ class Arguments : public ArgumentsBinder {
void AddObject(const std::string& name,
GPUObjectDescriptorPtr&& descriptor_ptr);
absl::Status SetInt(const std::string& name, int value) override;
absl::Status SetFloat(const std::string& name, float value) override;
absl::Status SetHalf(const std::string& name, half value) override;
absl::Status SetObjectRef(const std::string& name, const GPUObject* object);
absl::Status Bind(cl_kernel kernel, int offset = 0);
void RenameArgs(const std::string& postfix, std::string* code) const;
absl::Status Merge(Arguments&& args, const std::string& postfix);
absl::Status AllocateObjects(CLContext* context);
void ReleaseCPURepresentation();
absl::Status TransformToCLCode(
const DeviceInfo& device_info,
const std::map<std::string, std::string>& linkables, std::string* code);
// Move only
Arguments(Arguments&& args);
Arguments& operator=(Arguments&& args);
Arguments(const Arguments&) = delete;
Arguments& operator=(const Arguments&) = delete;
~Arguments() override = default;
private:
friend flatbuffers::Offset<data::Arguments> Encode(
const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
Arguments* args);
friend absl::Status Decode(const data::Arguments* fb_args, Arguments* args);
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
void AddImage2DArray(const std::string& name,
const GPUImage2DArrayDescriptor& desc);
void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc);
void AddImageBuffer(const std::string& name,
const GPUImageBufferDescriptor& desc);
void AddCustomMemory(const std::string& name,
const GPUCustomMemoryDescriptor& desc);
absl::Status SetImage2D(const std::string& name, cl_mem memory);
absl::Status SetBuffer(const std::string& name, cl_mem memory);
absl::Status SetImage2DArray(const std::string& name, cl_mem memory);
absl::Status SetImage3D(const std::string& name, cl_mem memory);
absl::Status SetImageBuffer(const std::string& name, cl_mem memory);
absl::Status SetCustomMemory(const std::string& name, cl_mem memory);
std::string GetListOfArgs();
std::string AddActiveArgument(const std::string& arg_name,
bool use_f32_for_halfs);
void AddGPUResources(const std::string& name, const GPUResources& resources);
absl::Status SetGPUResources(const std::string& name,
const GPUResourcesWithValue& resources);
absl::Status AddObjectArgs();
void ResolveArgsPass(const DeviceInfo& device_info, std::string* code);
absl::Status ResolveSelectorsPass(
const std::map<std::string, std::string>& linkables, std::string* code);
absl::Status ResolveSelector(
const std::map<std::string, std::string>& linkables,
const std::string& object_name, const std::string& selector,
const std::vector<std::string>& args,
const std::vector<std::string>& template_args, std::string* result);
void ResolveObjectNames(const std::string& object_name,
const std::vector<std::string>& member_names,
std::string* code);
GPUObjectDescriptor* GetObjectDescriptor(
const std::string& object_name) const;
static constexpr char kArgsPrefix[] = "args.";
friend class CLArguments;
void GetActiveArguments(const std::string& args_prefix,
const std::string& code);
struct IntValue {
int value;
@ -136,12 +80,8 @@ class Arguments : public ArgumentsBinder {
// many uniforms generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// offset to shared uniform storage.
uint32_t offset = -1;
};
std::map<std::string, IntValue> int_values_;
std::vector<int32_t> shared_int4s_data_;
struct FloatValue {
float value;
@ -149,12 +89,8 @@ class Arguments : public ArgumentsBinder {
// many uniforms generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// offset to shared uniform storage.
uint32_t offset = -1;
};
std::map<std::string, FloatValue> float_values_;
std::vector<float> shared_float4s_data_;
struct HalfValue {
half value;
@ -162,33 +98,11 @@ class Arguments : public ArgumentsBinder {
// many uniforms generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// some devices have issues with half parameters.
bool store_as_f32 = false;
// offset to shared uniform storage.
uint32_t offset = -1;
};
std::map<std::string, HalfValue> half_values_;
std::vector<half> shared_half4s_data_;
std::map<std::string, GPUBufferDescriptor> buffers_;
std::map<std::string, GPUImage2DDescriptor> images2d_;
std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
std::map<std::string, GPUImage3DDescriptor> images3d_;
std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
struct ObjectRefArg {
GPUObjectDescriptorPtr descriptor;
};
std::map<std::string, ObjectRefArg> object_refs_;
struct ObjectArg {
GPUObjectPtr obj_ptr;
GPUObjectDescriptorPtr descriptor;
};
std::map<std::string, ObjectArg> objects_;
std::map<std::string, GPUObjectDescriptorPtr> object_refs_;
std::map<std::string, GPUObjectDescriptorPtr> objects_;
};
} // namespace cl

View File

@ -0,0 +1,788 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
#include <string>
#include "absl/strings/ascii.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/substitute.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/common/util.h"
namespace tflite {
namespace gpu {
namespace cl {
namespace {
bool IsWordSymbol(char symbol) {
return absl::ascii_isalnum(symbol) || symbol == '_';
}
void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
std::string* str) {
size_t position = str->find(old_word);
while (position != std::string::npos) {
char prev = position == 0 ? '.' : (*str)[position - 1];
char next = position + old_word.size() < str->size()
? (*str)[position + old_word.size()]
: '.';
if (IsWordSymbol(prev) || IsWordSymbol(next)) {
position = str->find(old_word, position + 1);
continue;
}
str->replace(position, old_word.size(), new_word);
position = str->find(old_word, position + new_word.size());
}
}
std::string GetNextWord(const std::string& code, size_t first_position) {
size_t pos = first_position;
char t = code[pos];
while (IsWordSymbol(t)) {
pos++;
t = code[pos];
}
return code.substr(first_position, pos - first_position);
}
size_t FindEnclosingBracket(const std::string& text, size_t first_pos,
char bracket) {
const std::map<char, char> brackets = {
{'(', ')'},
{'{', '}'},
{'[', ']'},
{'<', '>'},
};
char b_open = bracket;
auto it = brackets.find(b_open);
if (it == brackets.end()) {
return -1;
}
char b_close = it->second;
size_t pos = first_pos;
int opened = 1;
int closed = 0;
while (opened != closed && pos < text.size()) {
if (text[pos] == b_open) {
opened++;
} else if (text[pos] == b_close) {
closed++;
}
pos++;
}
if (opened == closed) {
return pos;
} else {
return -1;
}
}
absl::Status ParseArgsInsideBrackets(const std::string& text,
size_t open_bracket_pos,
size_t* close_bracket_pos,
std::vector<std::string>* args) {
*close_bracket_pos =
FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
if (*close_bracket_pos == -1) {
return absl::NotFoundError("Not found enclosing bracket");
}
std::string str_args = text.substr(open_bracket_pos + 1,
*close_bracket_pos - open_bracket_pos - 2);
std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
args->reserve(words.size());
for (const auto& word : words) {
absl::string_view arg = absl::StripAsciiWhitespace(word);
if (!arg.empty()) {
args->push_back(std::string(arg));
}
}
return absl::OkStatus();
}
void AppendArgument(const std::string& arg, std::string* args) {
if (!args->empty()) {
absl::StrAppend(args, ",\n ");
}
absl::StrAppend(args, arg);
}
std::string GetImageModifier(AccessType access) {
switch (access) {
case AccessType::READ:
return "__read_only";
case AccessType::WRITE:
return "__write_only";
case AccessType::READ_WRITE:
return "__read_write";
}
}
std::string GetDefaultSamplers(const DeviceInfo& device_info) {
std::string result;
result +=
"__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
if (device_info.IsAdreno3xx()) {
// Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
// we can observe huge register overhead when compared to other modes.
// While using CLK_ADDRESS_NONE with out-of-range image coordinates is
// undefined in the OpenCL specification, we have observed that
// CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
// coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
// CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
// 3xx.
result +=
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
} else {
result +=
"__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
"CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
}
return result;
}
} // namespace
// Static
constexpr char CLArguments::kArgsPrefix[];
absl::Status CLArguments::Init(
const DeviceInfo& device_info,
const std::map<std::string, std::string>& linkables, CLContext* context,
Arguments* args, std::string* code) {
RETURN_IF_ERROR(AllocateObjects(*args, context));
RETURN_IF_ERROR(AddObjectArgs(args));
RETURN_IF_ERROR(ResolveSelectorsPass(*args, linkables, code));
object_refs_ = std::move(args->object_refs_);
args->GetActiveArguments(kArgsPrefix, *code);
const bool use_f32_for_halfs = device_info.IsPowerVR();
CopyArguments(*args, use_f32_for_halfs);
RETURN_IF_ERROR(SetObjectsResources(*args));
RenameArgumentsInCode(code);
ResolveArgsPass(code);
*code = absl::Substitute(*code, GetListOfArgs());
*code = GetDefaultSamplers(device_info) + *code;
return absl::OkStatus();
}
absl::Status CLArguments::Init(const DeviceInfo& device_info, Arguments* args,
CLContext* context) {
RETURN_IF_ERROR(AllocateObjects(*args, context));
RETURN_IF_ERROR(AddObjectArgs(args));
object_refs_ = std::move(args->object_refs_);
const bool use_f32_for_halfs = device_info.IsPowerVR();
CopyArguments(*args, use_f32_for_halfs);
RETURN_IF_ERROR(SetObjectsResources(*args));
return absl::OkStatus();
}
absl::Status CLArguments::AllocateObjects(const Arguments& args,
CLContext* context) {
objects_.resize(args.objects_.size());
int i = 0;
for (auto& t : args.objects_) {
RETURN_IF_ERROR(t.second->CreateGPUObject(context, &objects_[i]));
i++;
}
return absl::OkStatus();
}
absl::Status CLArguments::AddObjectArgs(Arguments* args) {
for (auto& t : args->objects_) {
AddGPUResources(t.first, t.second->GetGPUResources(), args);
}
for (auto& t : args->object_refs_) {
AddGPUResources(t.first, t.second->GetGPUResources(), args);
}
return absl::OkStatus();
}
absl::Status CLArguments::SetObjectsResources(const Arguments& args) {
int i = 0;
for (const auto& t : args.objects_) {
GPUResourcesWithValue resources;
RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources));
RETURN_IF_ERROR(SetGPUResources(t.first, resources));
i++;
}
return absl::OkStatus();
}
absl::Status CLArguments::ResolveSelectorsPass(
const Arguments& args, const std::map<std::string, std::string>& linkables,
std::string* code) {
std::string result;
size_t position = 0;
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
size_t arg_pos = next_position;
next_position += strlen(kArgsPrefix);
std::string object_name = GetNextWord(*code, next_position);
char next = (*code)[next_position + object_name.size()];
if (next == '.') {
next_position += object_name.size() + 1;
std::string selector_name = GetNextWord(*code, next_position);
next_position += selector_name.size();
next = (*code)[next_position];
std::vector<std::string> template_args;
if (next == '<') {
size_t close_bracket_pos;
RETURN_IF_ERROR(ParseArgsInsideBrackets(
*code, next_position, &close_bracket_pos, &template_args));
next_position = close_bracket_pos;
next = (*code)[next_position];
}
if (next != '(') {
return absl::NotFoundError(absl::StrCat(
"Expected ( after ", object_name, ".", selector_name, " call"));
}
std::vector<std::string> function_args;
size_t close_bracket_pos;
RETURN_IF_ERROR(ParseArgsInsideBrackets(
*code, next_position, &close_bracket_pos, &function_args));
for (auto& arg : function_args) {
RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, &arg));
}
std::string patch;
RETURN_IF_ERROR(ResolveSelector(args, linkables, object_name,
selector_name, function_args,
template_args, &patch));
code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
position = arg_pos + patch.size();
} else {
position = arg_pos + strlen(kArgsPrefix);
}
next_position = code->find(kArgsPrefix, position);
}
return absl::OkStatus();
}
void CLArguments::ResolveObjectNames(
const std::string& object_name,
const std::vector<std::string>& member_names, std::string* code) {
for (const auto& member_name : member_names) {
const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
ReplaceAllWords(member_name, new_name, code);
}
}
absl::Status CLArguments::ResolveSelector(
const Arguments& args, const std::map<std::string, std::string>& linkables,
const std::string& object_name, const std::string& selector,
const std::vector<std::string>& function_args,
const std::vector<std::string>& template_args, std::string* result) {
const GPUObjectDescriptor* desc_ptr;
auto it_ref = args.object_refs_.find(object_name);
auto it_obj = args.objects_.find(object_name);
if (it_ref != args.object_refs_.end()) {
desc_ptr = it_ref->second.get();
} else if (it_obj != args.objects_.end()) {
desc_ptr = it_obj->second.get();
} else {
return absl::NotFoundError(
absl::StrCat("No object with name - ", object_name));
}
auto names = desc_ptr->GetGPUResources().GetNames();
const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc_ptr);
if (tensor_desc && selector == "Write") {
auto it = linkables.find(object_name);
if (it != linkables.end()) {
if (desc_ptr->GetAccess() != AccessType::WRITE &&
desc_ptr->GetAccess() != AccessType::READ_WRITE) {
return absl::FailedPreconditionError(absl::StrCat(
"Object with name - ", object_name, " should have Write access."));
}
std::string value_name, x_coord, y_coord, s_coord;
RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(
function_args, &value_name, &x_coord, &y_coord, &s_coord));
// x_coord can have batch size property of link_object
ResolveObjectNames(object_name, names, &x_coord);
*result = it->second;
ReplaceAllWords("in_out_value", value_name, result);
ReplaceAllWords("X_COORD", x_coord, result);
ReplaceAllWords("Y_COORD", y_coord, result);
ReplaceAllWords("S_COORD", s_coord, result);
RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, result));
}
}
std::string patch;
RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, function_args,
template_args, &patch));
ResolveObjectNames(object_name, names, &patch);
*result += patch;
return absl::OkStatus();
}
void CLArguments::ResolveArgsPass(std::string* code) {
size_t position = 0;
size_t next_position = code->find(kArgsPrefix);
while (next_position != std::string::npos) {
size_t arg_pos = next_position;
next_position += strlen(kArgsPrefix);
std::string object_name = GetNextWord(*code, next_position);
std::string new_name = object_name;
code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
position = arg_pos + new_name.size();
next_position = code->find(kArgsPrefix, position);
}
}
void CLArguments::CopyScalarValues(Arguments* args) const {
for (const auto& fvalue : float_values_) {
args->float_values_[fvalue.first].value = fvalue.second.value;
}
for (const auto& ivalue : int_values_) {
args->int_values_[ivalue.first].value = ivalue.second.value;
}
for (const auto& hfvalue : half_values_) {
args->half_values_[hfvalue.first].value = hfvalue.second.value;
}
}
void CLArguments::CopyArguments(const Arguments& args, bool use_f32_for_halfs) {
for (const auto& fvalue : args.float_values_) {
auto& new_val = float_values_[fvalue.first];
new_val.value = fvalue.second.value;
new_val.active = fvalue.second.active;
if (fvalue.second.active) {
new_val.offset = shared_float4s_data_.size();
shared_float4s_data_.push_back(new_val.value);
}
}
for (const auto& ivalue : args.int_values_) {
auto& new_val = int_values_[ivalue.first];
new_val.value = ivalue.second.value;
new_val.active = ivalue.second.active;
if (ivalue.second.active) {
new_val.offset = shared_int4s_data_.size();
shared_int4s_data_.push_back(new_val.value);
}
}
for (const auto& hfvalue : args.half_values_) {
auto& new_val = half_values_[hfvalue.first];
new_val.value = hfvalue.second.value;
new_val.active = hfvalue.second.active;
if (hfvalue.second.active) {
if (use_f32_for_halfs) {
new_val.store_as_f32 = true;
new_val.offset = shared_float4s_data_.size();
shared_float4s_data_.push_back(new_val.value);
} else {
new_val.store_as_f32 = false;
new_val.offset = shared_half4s_data_.size();
shared_half4s_data_.push_back(new_val.value);
}
}
}
int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
shared_int4s_data_.resize(shared_int4s_aligned_size);
int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
shared_float4s_data_.resize(shared_float4s_aligned_size);
int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
shared_half4s_data_.resize(shared_half4s_aligned_size);
}
void CLArguments::RenameArgumentsInCode(std::string* code) {
const std::string postfixes[4] = {"x", "y", "z", "w"};
for (const auto& fvalue : float_values_) {
if (fvalue.second.active) {
std::string index = std::to_string(fvalue.second.offset / 4);
std::string new_name =
"shared_float4_" + index + "." + postfixes[fvalue.second.offset % 4];
ReplaceAllWords(kArgsPrefix + fvalue.first, new_name, code);
}
}
for (const auto& ivalue : int_values_) {
if (ivalue.second.active) {
std::string index = std::to_string(ivalue.second.offset / 4);
std::string new_name =
"shared_int4_" + index + "." + postfixes[ivalue.second.offset % 4];
ReplaceAllWords(kArgsPrefix + ivalue.first, new_name, code);
}
}
for (const auto& hfvalue : half_values_) {
if (hfvalue.second.active) {
std::string index = std::to_string(hfvalue.second.offset / 4);
std::string new_name;
if (hfvalue.second.store_as_f32) {
new_name = "(half)(shared_float4_" + index + "." +
postfixes[hfvalue.second.offset % 4] + ")";
} else {
new_name = "shared_half4_" + index + "." +
postfixes[hfvalue.second.offset % 4];
}
ReplaceAllWords(kArgsPrefix + hfvalue.first, new_name, code);
}
}
}
void CLArguments::AddBuffer(const std::string& name,
const GPUBufferDescriptor& desc) {
buffers_[name] = desc;
}
void CLArguments::AddImage2D(const std::string& name,
const GPUImage2DDescriptor& desc) {
images2d_[name] = desc;
}
void CLArguments::AddImage2DArray(const std::string& name,
const GPUImage2DArrayDescriptor& desc) {
image2d_arrays_[name] = desc;
}
void CLArguments::AddImage3D(const std::string& name,
const GPUImage3DDescriptor& desc) {
images3d_[name] = desc;
}
void CLArguments::AddImageBuffer(const std::string& name,
const GPUImageBufferDescriptor& desc) {
image_buffers_[name] = desc;
}
void CLArguments::AddCustomMemory(const std::string& name,
const GPUCustomMemoryDescriptor& desc) {
custom_memories_[name] = desc;
}
void CLArguments::AddGPUResources(const std::string& name,
const GPUResources& resources,
Arguments* args) {
for (const auto& r : resources.ints) {
args->AddInt(absl::StrCat(name, "_", r));
}
for (const auto& r : resources.floats) {
args->AddFloat(absl::StrCat(name, "_", r));
}
for (const auto& r : resources.buffers) {
AddBuffer(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.images2d) {
AddImage2D(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.image2d_arrays) {
AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.images3d) {
AddImage3D(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.image_buffers) {
AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
}
for (const auto& r : resources.custom_memories) {
AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
}
}
absl::Status CLArguments::SetInt(const std::string& name, int value) {
auto it = int_values_.find(name);
if (it == int_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No int argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
shared_int4s_data_[it->second.offset] = value;
}
return absl::OkStatus();
}
absl::Status CLArguments::SetFloat(const std::string& name, float value) {
auto it = float_values_.find(name);
if (it == float_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No float argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
shared_float4s_data_[it->second.offset] = value;
}
return absl::OkStatus();
}
absl::Status CLArguments::SetHalf(const std::string& name, half value) {
auto it = half_values_.find(name);
if (it == half_values_.end()) {
return absl::NotFoundError(
absl::StrCat("No half argument with name - ", name));
}
it->second.value = value;
if (it->second.active) {
if (it->second.store_as_f32) {
shared_float4s_data_[it->second.offset] = value;
} else {
shared_half4s_data_[it->second.offset] = value;
}
}
return absl::OkStatus();
}
absl::Status CLArguments::SetImage2D(const std::string& name, cl_mem memory) {
auto it = images2d_.find(name);
if (it == images2d_.end()) {
return absl::NotFoundError(
absl::StrCat("No image2D argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetBuffer(const std::string& name, cl_mem memory) {
auto it = buffers_.find(name);
if (it == buffers_.end()) {
return absl::NotFoundError(
absl::StrCat("No buffer argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetImage2DArray(const std::string& name,
cl_mem memory) {
auto it = image2d_arrays_.find(name);
if (it == image2d_arrays_.end()) {
return absl::NotFoundError(
absl::StrCat("No image2D array argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetImage3D(const std::string& name, cl_mem memory) {
auto it = images3d_.find(name);
if (it == images3d_.end()) {
return absl::NotFoundError(
absl::StrCat("No image3D argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetImageBuffer(const std::string& name,
cl_mem memory) {
auto it = image_buffers_.find(name);
if (it == image_buffers_.end()) {
return absl::NotFoundError(
absl::StrCat("No image buffer argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetCustomMemory(const std::string& name,
cl_mem memory) {
auto it = custom_memories_.find(name);
if (it == custom_memories_.end()) {
return absl::NotFoundError(
absl::StrCat("No custom memory argument with name - ", name));
}
it->second.memory = memory;
return absl::OkStatus();
}
absl::Status CLArguments::SetObjectRef(const std::string& name,
const GPUObject* object) {
auto it = object_refs_.find(name);
if (it == object_refs_.end()) {
return absl::NotFoundError(
absl::StrCat("No object ref with name - ", name));
}
GPUResourcesWithValue resources;
RETURN_IF_ERROR(object->GetGPUResources(it->second.get(), &resources));
return SetGPUResources(name, resources);
}
absl::Status CLArguments::SetGPUResources(
const std::string& name, const GPUResourcesWithValue& resources) {
for (const auto& r : resources.ints) {
RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.floats) {
RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.buffers) {
RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.images2d) {
RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.image2d_arrays) {
RETURN_IF_ERROR(
SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.images3d) {
RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.image_buffers) {
RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
}
for (const auto& r : resources.custom_memories) {
RETURN_IF_ERROR(
SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
}
return absl::OkStatus();
}
std::string CLArguments::GetListOfArgs() {
std::string result;
for (auto& t : buffers_) {
const std::string type_name =
t.second.data_type == DataType::FLOAT32 ? "float" : "half";
std::string attributes;
for (const auto& attr : t.second.attributes) {
attributes += absl::StrCat(" __attribute__((", attr, "))");
}
AppendArgument(
absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
ToCLDataType(t.second.data_type, t.second.element_size),
"* ", t.first, attributes),
&result);
}
for (auto& t : image_buffers_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image1d_buffer_t ", t.first),
&result);
}
for (auto& t : images2d_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image2d_t ", t.first),
&result);
}
for (auto& t : image2d_arrays_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image2d_array_t ", t.first),
&result);
}
for (auto& t : images3d_) {
AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type),
" image3d_t ", t.first),
&result);
}
for (auto& t : custom_memories_) {
AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
}
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
}
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
}
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
}
return result;
}
absl::Status CLArguments::Bind(cl_kernel kernel, int offset) {
for (auto& t : buffers_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : image_buffers_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : images2d_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : image2d_arrays_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : images3d_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (auto& t : custom_memories_) {
const int error_code =
clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
&shared_int4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
&shared_float4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
&shared_half4s_data_[i * 4]);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(absl::StrCat(
"Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
"(at index - ", offset, ")"));
}
offset++;
}
return absl::OkStatus();
}
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -0,0 +1,168 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
#include <map>
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace tflite {
namespace gpu {
namespace cl {
class CLArguments : public ArgumentsBinder {
public:
CLArguments() = default;
absl::Status Init(const DeviceInfo& device_info,
const std::map<std::string, std::string>& linkables,
CLContext* context, Arguments* args, std::string* code);
absl::Status Init(const DeviceInfo& device_info, Arguments* args,
CLContext* context);
// Temporary, will be resolved later
void MoveObjectRefsIn(Arguments* args) {
object_refs_ = std::move(args->object_refs_);
}
void MoveObjectRefsOut(Arguments* args) {
args->object_refs_ = std::move(object_refs_);
}
void CopyScalarValues(Arguments* args) const;
// Move only
CLArguments(CLArguments&& args) = default;
CLArguments& operator=(CLArguments&& args) = default;
CLArguments(const CLArguments&) = delete;
CLArguments& operator=(const CLArguments&) = delete;
absl::Status SetInt(const std::string& name, int value) override;
absl::Status SetFloat(const std::string& name, float value) override;
absl::Status SetHalf(const std::string& name, half value) override;
absl::Status SetObjectRef(const std::string& name,
const GPUObject* object) override;
absl::Status Bind(cl_kernel kernel, int offset = 0);
private:
absl::Status AllocateObjects(const Arguments& args, CLContext* context);
absl::Status AddObjectArgs(Arguments* args);
absl::Status ResolveSelectorsPass(
const Arguments& args,
const std::map<std::string, std::string>& linkables, std::string* code);
absl::Status ResolveSelector(
const Arguments& args,
const std::map<std::string, std::string>& linkables,
const std::string& object_name, const std::string& selector,
const std::vector<std::string>& function_args,
const std::vector<std::string>& template_args, std::string* result);
void ResolveObjectNames(const std::string& object_name,
const std::vector<std::string>& member_names,
std::string* code);
void ResolveArgsPass(std::string* code);
void CopyArguments(const Arguments& args, bool use_f32_for_halfs);
void RenameArgumentsInCode(std::string* code);
std::string GetListOfArgs();
void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
void AddImage2DArray(const std::string& name,
const GPUImage2DArrayDescriptor& desc);
void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc);
void AddImageBuffer(const std::string& name,
const GPUImageBufferDescriptor& desc);
void AddCustomMemory(const std::string& name,
const GPUCustomMemoryDescriptor& desc);
void AddGPUResources(const std::string& name, const GPUResources& resources,
Arguments* args);
absl::Status SetObjectsResources(const Arguments& args);
absl::Status SetGPUResources(const std::string& name,
const GPUResourcesWithValue& resources);
absl::Status SetImage2D(const std::string& name, cl_mem memory);
absl::Status SetBuffer(const std::string& name, cl_mem memory);
absl::Status SetImage2DArray(const std::string& name, cl_mem memory);
absl::Status SetImage3D(const std::string& name, cl_mem memory);
absl::Status SetImageBuffer(const std::string& name, cl_mem memory);
absl::Status SetCustomMemory(const std::string& name, cl_mem memory);
static constexpr char kArgsPrefix[] = "args.";
struct IntValue {
int value;
// many arguments generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// offset to shared storage.
uint32_t offset = -1;
};
std::map<std::string, IntValue> int_values_;
std::vector<int32_t> shared_int4s_data_;
struct FloatValue {
float value;
// many arguments generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// offset to shared storage.
uint32_t offset = -1;
};
std::map<std::string, FloatValue> float_values_;
std::vector<float> shared_float4s_data_;
struct HalfValue {
half value;
// many arguments generated automatically and not used
// to reduce amount of data transferred we adding this optimization
bool active = false;
// some devices have issues with half parameters.
bool store_as_f32 = false;
// offset to shared uniform storage.
uint32_t offset = -1;
};
std::map<std::string, HalfValue> half_values_;
std::vector<half> shared_half4s_data_;
std::map<std::string, GPUBufferDescriptor> buffers_;
std::map<std::string, GPUImage2DDescriptor> images2d_;
std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
std::map<std::string, GPUImage3DDescriptor> images3d_;
std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
std::map<std::string, GPUObjectDescriptorPtr> object_refs_;
std::vector<GPUObjectPtr> objects_;
};
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_

View File

@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
#include <cstdint>
#include <string>
@ -20,6 +20,7 @@ limitations under the License.
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/strings/match.h"
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
@ -27,7 +28,7 @@ limitations under the License.
namespace tflite {
namespace gpu {
namespace cl {
TEST(ArgumentsTest, TestSelectorResolve) {
TEST(CLArgumentsTest, TestSelectorResolve) {
BufferDescriptor desc;
desc.element_type = DataType::FLOAT32;
desc.element_size = 4;
@ -43,14 +44,15 @@ __kernel void main_function($0) {
}
})";
CLArguments cl_args;
DeviceInfo device_info;
ASSERT_OK(args.TransformToCLCode(device_info, {}, &sample_code));
ASSERT_OK(cl_args.Init(device_info, {}, nullptr, &args, &sample_code));
EXPECT_TRUE(absl::StrContains(sample_code, "value = weights_buffer[id];"));
EXPECT_TRUE(
absl::StrContains(sample_code, "__global float4* weights_buffer"));
}
TEST(ArgumentsTest, TestNoSelector) {
TEST(CLArgumentsTest, TestNoSelector) {
BufferDescriptor desc;
desc.element_type = DataType::FLOAT32;
desc.element_size = 4;
@ -64,16 +66,10 @@ TEST(ArgumentsTest, TestNoSelector) {
value = args.weights.UnknownSelector(id);
}
)";
CLArguments cl_args;
DeviceInfo device_info;
EXPECT_FALSE(args.TransformToCLCode(device_info, {}, &sample_code).ok());
}
TEST(ArgumentsTest, TestRenameArgs) {
Arguments linkable_args;
linkable_args.AddFloat("alpha", 0.5f);
std::string linkable_code = "in_out_value += args.alpha;\n";
linkable_args.RenameArgs("_link0", &linkable_code);
EXPECT_EQ(linkable_code, "in_out_value += args.alpha_link0;\n");
EXPECT_FALSE(
cl_args.Init(device_info, {}, nullptr, &args, &sample_code).ok());
}
} // namespace cl

View File

@ -199,12 +199,21 @@ absl::Status InferenceContext::InitFromGraph(
RETURN_IF_ERROR(Tune(tuning_parameters));
if (serialized_model) {
// Temporary, will be resolved later, now we don't have complete
// intermediate representation
for (auto& node : nodes_) {
node.operation->MoveObjectRefsFromCLToGeneric();
node.operation->SyncScalarValues();
}
flatbuffers::FlatBufferBuilder builder;
auto encoded_fb = Encode(*this, &builder);
data::FinishInferenceContextBuffer(builder, encoded_fb);
serialized_model->resize(builder.GetSize());
std::memcpy(serialized_model->data(), builder.GetBufferPointer(),
builder.GetSize());
for (auto& node : nodes_) {
node.operation->MoveObjectRefsFromGenericToCL();
}
}
for (auto& node : nodes_) {
node.operation->args_.ReleaseCPURepresentation();
@ -220,7 +229,7 @@ absl::Status InferenceContext::RestoreDeserialized(
return absl::DataLossError("Deserialization failed.");
}
auto decoded_fb = data::GetInferenceContext(serialized_model.data());
RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this));
RETURN_IF_ERROR(Decode(decoded_fb, this));
CreationContext creation_context;
creation_context.device = env->GetDevicePtr();

View File

@ -103,8 +103,7 @@ class InferenceContext {
friend flatbuffers::Offset<data::InferenceContext> Encode(
const InferenceContext& inference,
flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context,
const data::InferenceContext* fb_inference,
friend absl::Status Decode(const data::InferenceContext* fb_inference,
InferenceContext* inference);
void CopyInAndOutIds(const GraphFloat32& graph);

View File

@ -263,6 +263,7 @@ cc_library(
":util",
"//tensorflow/lite/delegates/gpu:spi",
"//tensorflow/lite/delegates/gpu/cl:arguments",
"//tensorflow/lite/delegates/gpu/cl:cl_arguments",
"//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
"//tensorflow/lite/delegates/gpu/cl:cl_errors",
"//tensorflow/lite/delegates/gpu/cl:environment",
@ -643,6 +644,7 @@ cc_library(
":work_group_picking",
"//tensorflow/lite/delegates/gpu/cl:arguments",
"//tensorflow/lite/delegates/gpu/cl:buffer",
"//tensorflow/lite/delegates/gpu/cl:cl_arguments",
"//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
"//tensorflow/lite/delegates/gpu/cl:cl_context",
"//tensorflow/lite/delegates/gpu/cl:cl_device",

View File

@ -20,6 +20,7 @@ limitations under the License.
#include <string>
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
@ -44,8 +45,9 @@ class OpenClConverterImpl : public TensorObjectConverter {
absl::Status DispatchKernel(cl_mem buffer_mem, Tensor* tensor) {
kernel_.ResetBindingCounter();
RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem));
RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor));
RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
RETURN_IF_ERROR(cl_args_.SetObjectRef("tensor", tensor));
RETURN_IF_ERROR(
cl_args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(),
tensor->Slices());
const int3 work_group_size = {16, 8, 1};
@ -53,7 +55,7 @@ class OpenClConverterImpl : public TensorObjectConverter {
return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
}
Arguments args_;
CLArguments cl_args_;
BHWC shape_;
CLKernel kernel_;
TensorDescriptor tensor_descriptor_;
@ -115,7 +117,8 @@ class TensorToTensorConverter : public OpenClConverterImpl {
src_tensor_descriptor_.storage_type = ToTensorStorageType(
input_def.object_def.object_type, input_def.object_def.data_layout);
src_tensor_descriptor_.data_type = input_def.object_def.data_type;
args_.AddObjectRef(
Arguments args;
args.AddObjectRef(
"src_tensor", AccessType::READ,
absl::make_unique<TensorDescriptor>(src_tensor_descriptor_));
@ -123,7 +126,7 @@ class TensorToTensorConverter : public OpenClConverterImpl {
dst_tensor_descriptor_.storage_type = ToTensorStorageType(
output_def.object_def.object_type, output_def.object_def.data_layout);
dst_tensor_descriptor_.data_type = output_def.object_def.data_type;
args_.AddObjectRef(
args.AddObjectRef(
"dst_tensor", AccessType::WRITE,
absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_));
@ -152,8 +155,8 @@ class TensorToTensorConverter : public OpenClConverterImpl {
context_ = &environment->context();
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
input_def.dimensions.w, input_def.dimensions.c);
RETURN_IF_ERROR(
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
&args, &shader_src));
return environment->program_cache()->GetOrCreateCLKernel(
shader_src, "tensor_to_tensor", environment->context(),
environment->device(), &kernel_);
@ -172,9 +175,9 @@ class TensorToTensorConverter : public OpenClConverterImpl {
Tensor dst_tensor;
RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_,
dst_tensor_descriptor_, &dst_tensor));
RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor));
RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor));
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
RETURN_IF_ERROR(cl_args_.SetObjectRef("src_tensor", &src_tensor));
RETURN_IF_ERROR(cl_args_.SetObjectRef("dst_tensor", &dst_tensor));
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
const int3 grid = int3(dst_tensor.Width() * dst_tensor.Batch(),
dst_tensor.Height(), dst_tensor.Slices());
const int3 work_group_size = {16, 8, 1};
@ -203,8 +206,9 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl {
tensor_descriptor_.layout = Layout::BHWC;
tensor_descriptor_.storage_type = src_tensor_type;
tensor_descriptor_.data_type = input_def.object_def.data_type;
args_.AddObjectRef("tensor", AccessType::READ,
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
Arguments args;
args.AddObjectRef("tensor", AccessType::READ,
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
const bool need_fp16_support =
input_def.object_def.data_type == DataType::FLOAT16 ||
@ -244,8 +248,8 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl {
context_ = &environment->context();
shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h,
input_def.dimensions.w, input_def.dimensions.c);
RETURN_IF_ERROR(
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
&args, &shader_src));
return environment->program_cache()->GetOrCreateCLKernel(
shader_src, "tensor_to_bhwc", environment->context(),
environment->device(), &kernel_);
@ -300,8 +304,9 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl {
tensor_descriptor_.layout = Layout::BHWC;
tensor_descriptor_.storage_type = dst_tensor_type;
tensor_descriptor_.data_type = output_def.object_def.data_type;
args_.AddObjectRef("tensor", AccessType::WRITE,
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
Arguments args;
args.AddObjectRef("tensor", AccessType::WRITE,
absl::make_unique<TensorDescriptor>(tensor_descriptor_));
const bool need_fp16_support =
input_def.object_def.data_type == DataType::FLOAT16 ||
@ -338,8 +343,8 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl {
context_ = &environment->context();
shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h,
output_def.dimensions.w, output_def.dimensions.c);
RETURN_IF_ERROR(
args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr,
&args, &shader_src));
return environment->program_cache()->GetOrCreateCLKernel(
shader_src, "bhwc_to_tensor", environment->context(),
environment->device(), &kernel_);

View File

@ -132,12 +132,13 @@ GPUOperation::GPUOperation(GPUOperation&& operation)
definition_(std::move(operation.definition_)),
src_(std::move(operation.src_)),
dst_(std::move(operation.dst_)),
kernel_(std::move(operation.kernel_)),
grid_dimension_(operation.grid_dimension_),
work_group_launch_order_(operation.work_group_launch_order_),
grid_size_(operation.grid_size_),
src_tensors_names_(std::move(operation.src_tensors_names_)),
dst_tensors_names_(std::move(operation.dst_tensors_names_)),
kernel_(std::move(operation.kernel_)),
cl_args_(std::move(operation.cl_args_)),
work_groups_count_(operation.work_groups_count_),
linkable_count_(operation.linkable_count_),
elementwise_code_(std::move(operation.elementwise_code_)) {}
@ -155,12 +156,13 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
definition_ = std::move(operation.definition_);
src_ = std::move(operation.src_);
dst_ = std::move(operation.dst_);
kernel_ = std::move(operation.kernel_);
std::swap(grid_dimension_, operation.grid_dimension_);
std::swap(work_group_launch_order_, operation.work_group_launch_order_);
std::swap(grid_size_, operation.grid_size_);
src_tensors_names_ = std::move(operation.src_tensors_names_);
dst_tensors_names_ = std::move(operation.dst_tensors_names_);
kernel_ = std::move(operation.kernel_);
cl_args_ = std::move(operation.cl_args_);
std::swap(work_groups_count_, operation.work_groups_count_);
std::swap(linkable_count_, operation.linkable_count_);
elementwise_code_ = std::move(operation.elementwise_code_);
@ -211,12 +213,12 @@ void GPUOperation::AddDstTensor(const std::string& tensor_name,
absl::Status GPUOperation::UpdateParams() {
for (int i = 0; i < src_tensors_names_.size(); ++i) {
RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i]));
RETURN_IF_ERROR(cl_args_.SetObjectRef(src_tensors_names_[i], src_[i]));
}
for (int i = 0; i < dst_tensors_names_.size(); ++i) {
RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
RETURN_IF_ERROR(cl_args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
}
RETURN_IF_ERROR(BindArguments(&args_));
RETURN_IF_ERROR(BindArguments(&cl_args_));
grid_size_ = GetGridSize();
work_groups_count_ = GetWorkGroupsCount(
grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
@ -244,15 +246,10 @@ absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info,
elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
code_ = GetElementWiseCode(definition_, check_src_channels_size_);
RETURN_IF_ERROR(args_.AllocateObjects(context));
RETURN_IF_ERROR(args_.TransformToCLCode(
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
} else {
RETURN_IF_ERROR(args_.AllocateObjects(context));
RETURN_IF_ERROR(args_.TransformToCLCode(
device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
}
return absl::OkStatus();
return cl_args_.Init(device_info,
{{dst_tensors_names_[0], elementwise_code_}}, context,
&args_, &code_);
}
absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
@ -266,6 +263,8 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) {
absl::Status GPUOperation::CompileDeserialized(
const CreationContext& creation_context) {
RETURN_IF_ERROR(cl_args_.Init(creation_context.GetDeviceInfo(), &args_,
creation_context.context));
return creation_context.cache->GetOrCreateCLKernel(
code_, "main_function", compiler_options_, *creation_context.context,
*creation_context.device, &kernel_);
@ -299,7 +298,7 @@ absl::Status GPUOperation::Tune(const TuningParameters& params) {
GetWorkGroupsCount(grid_dimension_, grid_size_,
possible_work_groups[i], work_group_launch_order_);
}
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
int best_work_group_index;
RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
kernel_, *params.info, work_groups_count, possible_work_groups,

View File

@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
@ -120,7 +121,7 @@ class GPUOperation {
absl::Status UpdateParams();
absl::Status AddToQueue(CLCommandQueue* queue) {
RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
return queue->Dispatch(kernel_, work_groups_count_, work_group_size_);
}
@ -168,11 +169,15 @@ class GPUOperation {
// applicable only with elementwise_ = true;
bool check_src_channels_size_ = false;
// Temporary, will be resolved later
void MoveObjectRefsFromCLToGeneric() { cl_args_.MoveObjectRefsOut(&args_); }
void MoveObjectRefsFromGenericToCL() { cl_args_.MoveObjectRefsIn(&args_); }
void SyncScalarValues() { cl_args_.CopyScalarValues(&args_); }
protected:
friend flatbuffers::Offset<data::GPUOperation> Encode(
const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
friend absl::Status Decode(CLContext* context,
const data::GPUOperation* fb_op, GPUOperation* op);
friend absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op);
virtual absl::Status BindArguments(ArgumentsBinder* args) {
return absl::OkStatus();
@ -183,7 +188,6 @@ class GPUOperation {
OperationDef definition_;
std::vector<Tensor*> src_;
std::vector<Tensor*> dst_;
CLKernel kernel_;
int grid_dimension_ = 3; // can be 1, 2 or 3
int3 work_group_launch_order_ = int3(0, 1, 2);
int3 grid_size_ = int3(0, 0, 0);
@ -191,6 +195,8 @@ class GPUOperation {
std::vector<std::string> dst_tensors_names_;
private:
CLKernel kernel_;
CLArguments cl_args_;
int3 work_groups_count_ = int3(0, 0, 0);
int linkable_count_ = 0;
std::string elementwise_code_; // temporary, used during op construction

View File

@ -508,30 +508,11 @@ void Decode(const data::TensorDescWithId* fb_desc, TensorDescriptor* desc,
*id = fb_desc->id();
}
absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
Arguments* args) {
args->shared_int4s_data_ = std::vector<int32_t>(
fb_args->shared_int4s()->data(),
fb_args->shared_int4s()->data() + fb_args->shared_int4s()->size());
args->shared_float4s_data_ = std::vector<float>(
fb_args->shared_float4s()->data(),
fb_args->shared_float4s()->data() + fb_args->shared_float4s()->size());
std::vector<float> tmp = std::vector<float>(
fb_args->shared_half4s()->data(),
fb_args->shared_half4s()->data() + fb_args->shared_half4s()->size());
args->shared_half4s_data_.resize(tmp.size());
for (int i = 0; i < tmp.size(); ++i) {
args->shared_half4s_data_[i] = tmp[i];
}
absl::Status Decode(const data::Arguments* fb_args, Arguments* args) {
args->int_values_.clear();
for (auto int_values_fb : *fb_args->int_values()) {
Arguments::IntValue value;
value.value = int_values_fb->value();
value.offset = int_values_fb->offset();
value.active = int_values_fb->active();
std::string name(int_values_fb->name()->c_str(),
int_values_fb->name()->size());
@ -542,7 +523,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
for (auto float_values_fb : *fb_args->float_values()) {
Arguments::FloatValue value;
value.value = float_values_fb->value();
value.offset = float_values_fb->offset();
value.active = float_values_fb->active();
std::string name(float_values_fb->name()->c_str(),
float_values_fb->name()->size());
@ -553,9 +533,7 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
for (auto half_values_fb : *fb_args->half_values()) {
Arguments::HalfValue value;
value.value = half_values_fb->value();
value.offset = half_values_fb->offset();
value.active = half_values_fb->active();
value.store_as_f32 = half_values_fb->store_as_f32();
std::string name(half_values_fb->name()->c_str(),
half_values_fb->name()->size());
args->half_values_[name] = value;
@ -635,9 +613,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
args->AddObjectRef(key, access_type,
absl::make_unique<TensorDescriptor>(std::move(desc)));
}
RETURN_IF_ERROR(args->AllocateObjects(context));
RETURN_IF_ERROR(args->AddObjectArgs());
return absl::OkStatus();
}
@ -649,7 +624,6 @@ flatbuffers::Offset<data::Arguments> Encode(
data::IntValueBuilder value_builder(*builder);
value_builder.add_name(name_fb);
value_builder.add_value(value.second.value);
value_builder.add_offset(value.second.offset);
value_builder.add_active(value.second.active);
int_values_fb.push_back(value_builder.Finish());
}
@ -660,7 +634,6 @@ flatbuffers::Offset<data::Arguments> Encode(
data::FloatValueBuilder value_builder(*builder);
value_builder.add_name(name_fb);
value_builder.add_value(value.second.value);
value_builder.add_offset(value.second.offset);
value_builder.add_active(value.second.active);
float_values_fb.push_back(value_builder.Finish());
}
@ -671,9 +644,7 @@ flatbuffers::Offset<data::Arguments> Encode(
data::HalfValueBuilder value_builder(*builder);
value_builder.add_name(name_fb);
value_builder.add_value(value.second.value);
value_builder.add_offset(value.second.offset);
value_builder.add_active(value.second.active);
value_builder.add_store_as_f32(value.second.store_as_f32);
half_values_fb.push_back(value_builder.Finish());
}
@ -681,7 +652,7 @@ flatbuffers::Offset<data::Arguments> Encode(
buffer_objs_fb;
for (auto& value : args.objects_) {
const auto* buffer_desc =
dynamic_cast<const BufferDescriptor*>(value.second.descriptor.get());
dynamic_cast<const BufferDescriptor*>(value.second.get());
if (!buffer_desc) continue;
auto desc_fb = Encode(*buffer_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -694,7 +665,7 @@ flatbuffers::Offset<data::Arguments> Encode(
texture2d_objs_fb;
for (auto& value : args.objects_) {
const auto* texture_desc =
dynamic_cast<const Texture2DDescriptor*>(value.second.descriptor.get());
dynamic_cast<const Texture2DDescriptor*>(value.second.get());
if (!texture_desc) continue;
auto desc_fb = Encode(*texture_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -706,8 +677,8 @@ flatbuffers::Offset<data::Arguments> Encode(
std::vector<flatbuffers::Offset<data::TensorLinearDescriptorMapValue>>
tensor_linear_objs_fb;
for (auto& value : args.objects_) {
const auto* tensor_desc = dynamic_cast<const TensorLinearDescriptor*>(
value.second.descriptor.get());
const auto* tensor_desc =
dynamic_cast<const TensorLinearDescriptor*>(value.second.get());
if (!tensor_desc) continue;
auto desc_fb = Encode(*tensor_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -720,7 +691,7 @@ flatbuffers::Offset<data::Arguments> Encode(
tensor_objs_fb;
for (auto& value : args.objects_) {
const auto* tensor_desc =
dynamic_cast<const TensorDescriptor*>(value.second.descriptor.get());
dynamic_cast<const TensorDescriptor*>(value.second.get());
if (!tensor_desc) continue;
auto desc_fb = Encode(*tensor_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -734,7 +705,7 @@ flatbuffers::Offset<data::Arguments> Encode(
buffer_refs_fb;
for (auto& value : args.object_refs_) {
const auto* buffer_desc =
dynamic_cast<const BufferDescriptor*>(value.second.descriptor.get());
dynamic_cast<const BufferDescriptor*>(value.second.get());
if (!buffer_desc) continue;
auto desc_fb = Encode(*buffer_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -747,7 +718,7 @@ flatbuffers::Offset<data::Arguments> Encode(
texture2d_refs_fb;
for (auto& value : args.object_refs_) {
const auto* texture_desc =
dynamic_cast<const Texture2DDescriptor*>(value.second.descriptor.get());
dynamic_cast<const Texture2DDescriptor*>(value.second.get());
if (!texture_desc) continue;
auto desc_fb = Encode(*texture_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -759,8 +730,8 @@ flatbuffers::Offset<data::Arguments> Encode(
std::vector<flatbuffers::Offset<data::TensorLinearDescriptorMapValue>>
tensor_linear_refs_fb;
for (auto& value : args.object_refs_) {
const auto* tensor_desc = dynamic_cast<const TensorLinearDescriptor*>(
value.second.descriptor.get());
const auto* tensor_desc =
dynamic_cast<const TensorLinearDescriptor*>(value.second.get());
if (!tensor_desc) continue;
auto desc_fb = Encode(*tensor_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -773,7 +744,7 @@ flatbuffers::Offset<data::Arguments> Encode(
tensor_refs_fb;
for (auto& value : args.object_refs_) {
const auto* tensor_desc =
dynamic_cast<const TensorDescriptor*>(value.second.descriptor.get());
dynamic_cast<const TensorDescriptor*>(value.second.get());
if (!tensor_desc) continue;
auto desc_fb = Encode(*tensor_desc, builder);
auto key_fb = builder->CreateString(value.first);
@ -783,14 +754,6 @@ flatbuffers::Offset<data::Arguments> Encode(
tensor_refs_fb.push_back(ten_map_builder.Finish());
}
auto shared_int4s_data_fb = builder->CreateVector(args.shared_int4s_data_);
auto shared_float4s_data_fb =
builder->CreateVector(args.shared_float4s_data_);
std::vector<float> tmp(args.shared_half4s_data_.size());
for (int i = 0; i < tmp.size(); ++i) {
tmp[i] = args.shared_half4s_data_[i];
}
auto shared_half4s_data_fb = builder->CreateVector(tmp);
auto int_values_fb_vec = builder->CreateVector(int_values_fb);
auto float_values_fb_vec = builder->CreateVector(float_values_fb);
auto half_values_fb_vec = builder->CreateVector(half_values_fb);
@ -803,9 +766,6 @@ flatbuffers::Offset<data::Arguments> Encode(
auto tensor_linear_refs_fb_vec = builder->CreateVector(tensor_linear_refs_fb);
auto tensor_refs_fb_vec = builder->CreateVector(tensor_refs_fb);
data::ArgumentsBuilder arguments_builder(*builder);
arguments_builder.add_shared_int4s(shared_int4s_data_fb);
arguments_builder.add_shared_float4s(shared_float4s_data_fb);
arguments_builder.add_shared_half4s(shared_half4s_data_fb);
arguments_builder.add_int_values(int_values_fb_vec);
arguments_builder.add_float_values(float_values_fb_vec);
arguments_builder.add_half_values(half_values_fb_vec);
@ -820,9 +780,8 @@ flatbuffers::Offset<data::Arguments> Encode(
return arguments_builder.Finish();
}
absl::Status Decode(CLContext* context, const data::GPUOperation* fb_op,
GPUOperation* op) {
RETURN_IF_ERROR(Decode(context, fb_op->arguments(), &op->args_));
absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op) {
RETURN_IF_ERROR(Decode(fb_op->arguments(), &op->args_));
op->code_ = std::string(fb_op->code()->c_str(), fb_op->code()->size());
op->work_group_size_.x = fb_op->work_group_size()->x();
op->work_group_size_.y = fb_op->work_group_size()->y();
@ -935,10 +894,9 @@ flatbuffers::Offset<data::CLNode> Encode(
return node_builder.Finish();
}
absl::Status Decode(CLContext* context, const data::CLNode* fb_node,
CLNode* node) {
absl::Status Decode(const data::CLNode* fb_node, CLNode* node) {
GPUOperation op;
RETURN_IF_ERROR(Decode(context, fb_node->gpu_op(), &op));
RETURN_IF_ERROR(Decode(fb_node->gpu_op(), &op));
node->operation = absl::make_unique<GPUOperation>(std::move(op));
for (auto in_fb : *fb_node->input_ids()) {
node->inputs.push_back(in_fb);
@ -1006,8 +964,7 @@ flatbuffers::Offset<data::InferenceContext> Encode(
return inf_builder.Finish();
}
absl::Status Decode(CLContext* context,
const data::InferenceContext* fb_inference,
absl::Status Decode(const data::InferenceContext* fb_inference,
InferenceContext* inference) {
inference->need_flush_ = fb_inference->need_flush();
inference->flush_periodically_ = fb_inference->flush_periodically();
@ -1019,7 +976,7 @@ absl::Status Decode(CLContext* context,
inference->nodes_.resize(fb_inference->nodes()->size());
int counter = 0;
for (auto node_fb : *fb_inference->nodes()) {
RETURN_IF_ERROR(Decode(context, node_fb, &inference->nodes_[counter]));
RETURN_IF_ERROR(Decode(node_fb, &inference->nodes_[counter]));
counter++;
}

View File

@ -36,22 +36,18 @@ table IntValue {
name:string;
value:int32;
active:bool;
offset:uint32;
}
table FloatValue {
name:string;
value:float;
active:bool;
offset:uint32;
}
table HalfValue {
name:string;
value:float;
active:bool;
store_as_f32:bool;
offset:uint32;
}
enum AccessType : byte {
@ -172,13 +168,8 @@ table TensorDescriptorMapValue {
table Arguments {
int_values:[IntValue];
shared_int4s:[int32];
float_values:[FloatValue];
shared_float4s:[float];
half_values:[HalfValue];
shared_half4s:[float];
buffer_refs:[BufferDescriptorMapValue];
texture2d_refs:[Texture2DDescriptorMapValue];