diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 63171348b74..b3ea6925118 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -52,12 +52,8 @@ cc_library( srcs = ["arguments.cc"], hdrs = ["arguments.h"], deps = [ - ":cl_device", ":gpu_object", - ":opencl_wrapper", ":serialization_cc_fbs", - ":tensor_type", - ":util", "//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:status", @@ -67,27 +63,6 @@ cc_library( ], ) -cc_test( - name = "arguments_test", - srcs = ["arguments_test.cc"], - linkstatic = True, - tags = tf_gpu_tests_tags() + [ - "linux", - "local", - ], - deps = [ - ":arguments", - ":buffer", - ":device_info", - ":gpu_object", - ":tensor", - ":tensor_type", - "//tensorflow/lite/delegates/gpu/common:data_type", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - ], -) - cc_library( name = "buffer", srcs = ["buffer.cc"], @@ -132,6 +107,45 @@ cc_library( ], ) +cc_library( + name = "cl_arguments", + srcs = ["cl_arguments.cc"], + hdrs = ["cl_arguments.h"], + deps = [ + ":arguments", + ":cl_context", + ":device_info", + ":gpu_object", + ":tensor", + ":tensor_type", + "//tensorflow/lite/delegates/gpu/common:access_type", + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/common:util", + "@com_google_absl//absl/strings", + ], +) + +cc_test( + name = "cl_arguments_test", + srcs = ["cl_arguments_test.cc"], + linkstatic = True, + tags = tf_gpu_tests_tags() + [ + "linux", + "local", + ], + deps = [ + ":arguments", + ":buffer", + ":cl_arguments", + ":device_info", + ":gpu_object", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cl_command_queue", srcs = ["cl_command_queue.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.cc b/tensorflow/lite/delegates/gpu/cl/arguments.cc index 7c5e635816e..f5e58f87ba8 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.cc +++ b/tensorflow/lite/delegates/gpu/cl/arguments.cc @@ -16,12 +16,8 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "absl/strings/ascii.h" +#include "absl/strings/match.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_replace.h" -#include "absl/strings/str_split.h" -#include "absl/strings/substitute.h" -#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" -#include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { @@ -42,75 +38,17 @@ std::string GetNextWord(const std::string& code, size_t first_position) { return code.substr(first_position, pos - first_position); } -size_t FindEnclosingBracket(const std::string& text, size_t first_pos, - char bracket) { - const std::map brackets = { - {'(', ')'}, - {'{', '}'}, - {'[', ']'}, - {'<', '>'}, - }; - char b_open = bracket; - auto it = brackets.find(b_open); - if (it == brackets.end()) { - return -1; - } - char b_close = it->second; - size_t pos = first_pos; - int opened = 1; - int closed = 0; - while (opened != closed && pos < text.size()) { - if (text[pos] == b_open) { - opened++; - } else if (text[pos] == b_close) { - closed++; +bool HasWord(const std::string& word, const std::string& text) { + size_t pos = text.find(word); + while (pos != std::string::npos) { + char prev = pos == 0 ? '.' : text[pos - 1]; + char next = pos + word.size() < text.size() ? text[pos + word.size()] : '.'; + if (!IsWordSymbol(prev) & !IsWordSymbol(next)) { + return true; } - pos++; - } - if (opened == closed) { - return pos; - } else { - return -1; - } -} - -absl::Status ParseArgsInsideBrackets(const std::string& text, - size_t open_bracket_pos, - size_t* close_bracket_pos, - std::vector* args) { - *close_bracket_pos = - FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]); - if (*close_bracket_pos == -1) { - return absl::NotFoundError("Not found enclosing bracket"); - } - std::string str_args = text.substr(open_bracket_pos + 1, - *close_bracket_pos - open_bracket_pos - 2); - std::vector words = absl::StrSplit(str_args, ','); - args->reserve(words.size()); - for (const auto& word : words) { - absl::string_view arg = absl::StripAsciiWhitespace(word); - if (!arg.empty()) { - args->push_back(std::string(arg)); - } - } - return absl::OkStatus(); -} - -void ReplaceAllWords(const std::string& old_word, const std::string& new_word, - std::string* str) { - size_t position = str->find(old_word); - while (position != std::string::npos) { - char prev = position == 0 ? '.' : (*str)[position - 1]; - char next = position + old_word.size() < str->size() - ? (*str)[position + old_word.size()] - : '.'; - if (IsWordSymbol(prev) || IsWordSymbol(next)) { - position = str->find(old_word, position + 1); - continue; - } - str->replace(position, old_word.size(), new_word); - position = str->find(old_word, position + new_word.size()); + pos = text.find(word, pos + 1); } + return false; } std::string RenameArg(const std::vector& object_names, @@ -127,91 +65,8 @@ std::string RenameArg(const std::vector& object_names, return arg_name + postfix; } -void AppendArgument(const std::string& arg, std::string* args) { - if (!args->empty()) { - absl::StrAppend(args, ",\n "); - } - absl::StrAppend(args, arg); -} - -std::string GetImageModifier(AccessType access) { - switch (access) { - case AccessType::READ: - return "__read_only"; - case AccessType::WRITE: - return "__write_only"; - case AccessType::READ_WRITE: - return "__read_write"; - } -} - -std::string GetDefaultSamplers(const DeviceInfo& device_info) { - std::string result; - result += - "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - if (device_info.IsAdreno3xx()) { - // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and - // we can observe huge register overhead when compared to other modes. - - // While using CLK_ADDRESS_NONE with out-of-range image coordinates is - // undefined in the OpenCL specification, we have observed that - // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image - // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using - // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno - // 3xx. - result += - "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - } else { - result += - "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"; - } - - return result; -} - } // namespace -// Static -constexpr char Arguments::kArgsPrefix[]; - -Arguments::Arguments(Arguments&& args) - : int_values_(std::move(args.int_values_)), - shared_int4s_data_(std::move(args.shared_int4s_data_)), - float_values_(std::move(args.float_values_)), - shared_float4s_data_(std::move(args.shared_float4s_data_)), - half_values_(std::move(args.half_values_)), - shared_half4s_data_(std::move(args.shared_half4s_data_)), - buffers_(std::move(args.buffers_)), - images2d_(std::move(args.images2d_)), - image2d_arrays_(std::move(args.image2d_arrays_)), - images3d_(std::move(args.images3d_)), - image_buffers_(std::move(args.image_buffers_)), - custom_memories_(std::move(args.custom_memories_)), - object_refs_(std::move(args.object_refs_)), - objects_(std::move(args.objects_)) {} -Arguments& Arguments::operator=(Arguments&& args) { - if (this != &args) { - int_values_ = std::move(args.int_values_); - shared_int4s_data_ = std::move(args.shared_int4s_data_); - float_values_ = std::move(args.float_values_); - shared_float4s_data_ = std::move(args.shared_float4s_data_); - half_values_ = std::move(args.half_values_); - shared_half4s_data_ = std::move(args.shared_half4s_data_); - buffers_ = std::move(args.buffers_); - images2d_ = std::move(args.images2d_); - image2d_arrays_ = std::move(args.image2d_arrays_); - images3d_ = std::move(args.images3d_); - image_buffers_ = std::move(args.image_buffers_); - custom_memories_ = std::move(args.custom_memories_); - object_refs_ = std::move(args.object_refs_); - objects_ = std::move(args.objects_); - } - return *this; -} - void Arguments::AddFloat(const std::string& name, float value) { float_values_[name].value = value; } @@ -221,34 +76,6 @@ void Arguments::AddHalf(const std::string& name, half value) { void Arguments::AddInt(const std::string& name, int value) { int_values_[name].value = value; } -void Arguments::AddBuffer(const std::string& name, - const GPUBufferDescriptor& desc) { - buffers_[name] = desc; -} -void Arguments::AddImage2D(const std::string& name, - const GPUImage2DDescriptor& desc) { - images2d_[name] = desc; -} - -void Arguments::AddImage2DArray(const std::string& name, - const GPUImage2DArrayDescriptor& desc) { - image2d_arrays_[name] = desc; -} - -void Arguments::AddImage3D(const std::string& name, - const GPUImage3DDescriptor& desc) { - images3d_[name] = desc; -} - -void Arguments::AddImageBuffer(const std::string& name, - const GPUImageBufferDescriptor& desc) { - image_buffers_[name] = desc; -} - -void Arguments::AddCustomMemory(const std::string& name, - const GPUCustomMemoryDescriptor& desc) { - custom_memories_[name] = desc; -} void Arguments::AddObjectRef(const std::string& name, AccessType access_type, GPUObjectDescriptorPtr&& descriptor_ptr) { @@ -259,188 +86,12 @@ void Arguments::AddObjectRef(const std::string& name, AccessType access_type, void Arguments::AddObject(const std::string& name, GPUObjectDescriptorPtr&& descriptor_ptr) { descriptor_ptr->SetAccess(AccessType::READ); - objects_[name] = {nullptr, std::move(descriptor_ptr)}; -} - -void Arguments::AddGPUResources(const std::string& name, - const GPUResources& resources) { - for (const auto& r : resources.ints) { - AddInt(absl::StrCat(name, "_", r)); - } - for (const auto& r : resources.floats) { - AddFloat(absl::StrCat(name, "_", r)); - } - for (const auto& r : resources.buffers) { - AddBuffer(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto& r : resources.images2d) { - AddImage2D(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto& r : resources.image2d_arrays) { - AddImage2DArray(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto& r : resources.images3d) { - AddImage3D(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto& r : resources.image_buffers) { - AddImageBuffer(absl::StrCat(name, "_", r.first), r.second); - } - for (const auto& r : resources.custom_memories) { - AddCustomMemory(absl::StrCat(name, "_", r.first), r.second); - } -} - -absl::Status Arguments::SetInt(const std::string& name, int value) { - auto it = int_values_.find(name); - if (it == int_values_.end()) { - return absl::NotFoundError( - absl::StrCat("No int argument with name - ", name)); - } - it->second.value = value; - if (it->second.active) { - shared_int4s_data_[it->second.offset] = value; - } - return absl::OkStatus(); -} - -absl::Status Arguments::SetFloat(const std::string& name, float value) { - auto it = float_values_.find(name); - if (it == float_values_.end()) { - return absl::NotFoundError( - absl::StrCat("No float argument with name - ", name)); - } - it->second.value = value; - if (it->second.active) { - shared_float4s_data_[it->second.offset] = value; - } - return absl::OkStatus(); -} - -absl::Status Arguments::SetHalf(const std::string& name, half value) { - auto it = half_values_.find(name); - if (it == half_values_.end()) { - return absl::NotFoundError( - absl::StrCat("No half argument with name - ", name)); - } - it->second.value = value; - if (it->second.active) { - if (it->second.store_as_f32) { - shared_float4s_data_[it->second.offset] = value; - } else { - shared_half4s_data_[it->second.offset] = value; - } - } - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage2D(const std::string& name, cl_mem memory) { - auto it = images2d_.find(name); - if (it == images2d_.end()) { - return absl::NotFoundError( - absl::StrCat("No image2D argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetBuffer(const std::string& name, cl_mem memory) { - auto it = buffers_.find(name); - if (it == buffers_.end()) { - return absl::NotFoundError( - absl::StrCat("No buffer argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage2DArray(const std::string& name, - cl_mem memory) { - auto it = image2d_arrays_.find(name); - if (it == image2d_arrays_.end()) { - return absl::NotFoundError( - absl::StrCat("No image2D array argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImage3D(const std::string& name, cl_mem memory) { - auto it = images3d_.find(name); - if (it == images3d_.end()) { - return absl::NotFoundError( - absl::StrCat("No image3D argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetImageBuffer(const std::string& name, cl_mem memory) { - auto it = image_buffers_.find(name); - if (it == image_buffers_.end()) { - return absl::NotFoundError( - absl::StrCat("No image buffer argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetCustomMemory(const std::string& name, - cl_mem memory) { - auto it = custom_memories_.find(name); - if (it == custom_memories_.end()) { - return absl::NotFoundError( - absl::StrCat("No custom memory argument with name - ", name)); - } - it->second.memory = memory; - return absl::OkStatus(); -} - -absl::Status Arguments::SetObjectRef(const std::string& name, - const GPUObject* object) { - auto it = object_refs_.find(name); - if (it == object_refs_.end()) { - return absl::NotFoundError( - absl::StrCat("No object ref with name - ", name)); - } - GPUResourcesWithValue resources; - RETURN_IF_ERROR( - object->GetGPUResources(it->second.descriptor.get(), &resources)); - return SetGPUResources(name, resources); -} - -absl::Status Arguments::SetGPUResources( - const std::string& name, const GPUResourcesWithValue& resources) { - for (const auto& r : resources.ints) { - RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.floats) { - RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.buffers) { - RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.images2d) { - RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.image2d_arrays) { - RETURN_IF_ERROR( - SetImage2DArray(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.images3d) { - RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.image_buffers) { - RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second)); - } - for (const auto& r : resources.custom_memories) { - RETURN_IF_ERROR( - SetCustomMemory(absl::StrCat(name, "_", r.first), r.second)); - } - return absl::OkStatus(); + objects_[name] = {std::move(descriptor_ptr)}; } void Arguments::RenameArgs(const std::string& postfix, std::string* code) const { + static constexpr char kArgsPrefix[] = "args."; size_t next_position = code->find(kArgsPrefix); while (next_position != std::string::npos) { size_t arg_pos = next_position + strlen(kArgsPrefix); @@ -460,7 +111,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) { return absl::InvalidArgumentError( absl::StrCat("Object reference name collision. Name - ", name)); } - object_refs_[name] = {std::move(v.second.descriptor)}; + object_refs_[name] = {std::move(v.second)}; } for (auto& v : args.objects_) { object_names.push_back(v.first); @@ -469,8 +120,7 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) { return absl::InvalidArgumentError( absl::StrCat("Object name collision. Name - ", name)); } - objects_[name] = {std::move(v.second.obj_ptr), - std::move(v.second.descriptor)}; + objects_[name] = {std::move(v.second)}; } for (const auto& v : args.int_values_) { AddInt(RenameArg(object_names, postfix, v.first), v.second.value); @@ -481,412 +131,26 @@ absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) { for (const auto& v : args.half_values_) { AddHalf(RenameArg(object_names, postfix, v.first), v.second.value); } - for (const auto& v : args.buffers_) { - AddBuffer(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto& v : args.images2d_) { - AddImage2D(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto& v : args.image2d_arrays_) { - AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto& v : args.images3d_) { - AddImage3D(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto& v : args.image_buffers_) { - AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second); - } - for (const auto& v : args.custom_memories_) { - AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second); - } - return absl::OkStatus(); -} - -absl::Status Arguments::TransformToCLCode( - const DeviceInfo& device_info, - const std::map& linkables, std::string* code) { - RETURN_IF_ERROR(AddObjectArgs()); - RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code)); - ResolveArgsPass(device_info, code); - *code = absl::Substitute(*code, GetListOfArgs()); - *code = GetDefaultSamplers(device_info) + *code; - return absl::OkStatus(); -} - -std::string Arguments::GetListOfArgs() { - std::string result; - for (auto& t : buffers_) { - const std::string type_name = - t.second.data_type == DataType::FLOAT32 ? "float" : "half"; - std::string attributes; - for (const auto& attr : t.second.attributes) { - attributes += absl::StrCat(" __attribute__((", attr, "))"); - } - AppendArgument( - absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ", - ToCLDataType(t.second.data_type, t.second.element_size), - "* ", t.first, attributes), - &result); - } - for (auto& t : image_buffers_) { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), - " image1d_buffer_t ", t.first), - &result); - } - for (auto& t : images2d_) { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), - " image2d_t ", t.first), - &result); - } - for (auto& t : image2d_arrays_) { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), - " image2d_array_t ", t.first), - &result); - } - for (auto& t : images3d_) { - AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), - " image3d_t ", t.first), - &result); - } - for (auto& t : custom_memories_) { - AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result); - } - for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) { - AppendArgument(absl::StrCat("int4 shared_int4_", i), &result); - } - for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) { - AppendArgument(absl::StrCat("float4 shared_float4_", i), &result); - } - for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) { - AppendArgument(absl::StrCat("half4 shared_half4_", i), &result); - } - return result; -} - -absl::Status Arguments::Bind(cl_kernel kernel, int offset) { - for (auto& t : buffers_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (auto& t : image_buffers_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (auto& t : images2d_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (auto& t : image2d_arrays_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (auto& t : images3d_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (auto& t : custom_memories_) { - const int error_code = - clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) { - const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, - &shared_int4s_data_[i * 4]); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) { - const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, - &shared_float4s_data_[i * 4]); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) { - const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4, - &shared_half4s_data_[i * 4]); - if (error_code != CL_SUCCESS) { - return absl::UnknownError(absl::StrCat( - "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), - "(at index - ", offset, ")")); - } - offset++; - } - return absl::OkStatus(); -} - -std::string Arguments::AddActiveArgument(const std::string& arg_name, - bool use_f32_for_halfs) { - { - auto it = int_values_.find(arg_name); - if (it != int_values_.end()) { - int int_index; - if (it->second.active) { - int_index = it->second.offset; - } else { - it->second.active = true; - it->second.offset = shared_int4s_data_.size(); - int_index = it->second.offset; - shared_int4s_data_.push_back(it->second.value); - } - std::string index = std::to_string(int_index / 4); - std::string postfixes[4] = {"x", "y", "z", "w"}; - return "shared_int4_" + index + "." + postfixes[int_index % 4]; - } - } - { - auto it = float_values_.find(arg_name); - if (it != float_values_.end()) { - int float_index; - if (it->second.active) { - float_index = it->second.offset; - } else { - it->second.active = true; - it->second.offset = shared_float4s_data_.size(); - float_index = it->second.offset; - shared_float4s_data_.push_back(it->second.value); - } - std::string index = std::to_string(float_index / 4); - std::string postfixes[4] = {"x", "y", "z", "w"}; - return "shared_float4_" + index + "." + postfixes[float_index % 4]; - } - } - { - auto it = half_values_.find(arg_name); - if (it != half_values_.end()) { - int half_index; - if (it->second.active) { - half_index = it->second.offset; - } else { - it->second.active = true; - if (use_f32_for_halfs) { - it->second.store_as_f32 = true; - it->second.offset = shared_float4s_data_.size(); - shared_float4s_data_.push_back(it->second.value); - } else { - it->second.offset = shared_half4s_data_.size(); - shared_half4s_data_.push_back(it->second.value); - } - half_index = it->second.offset; - } - std::string index = std::to_string(half_index / 4); - std::string postfixes[4] = {"x", "y", "z", "w"}; - if (it->second.store_as_f32) { - return "(half)(shared_float4_" + index + "." + - postfixes[half_index % 4] + ")"; - } - return "shared_half4_" + index + "." + postfixes[half_index % 4]; - } - } - return arg_name; -} - -void Arguments::ResolveArgsPass(const DeviceInfo& device_info, - std::string* code) { - bool use_f32_for_half_arguments = device_info.IsPowerVR(); - size_t position = 0; - size_t next_position = code->find(kArgsPrefix); - while (next_position != std::string::npos) { - size_t arg_pos = next_position; - next_position += strlen(kArgsPrefix); - std::string object_name = GetNextWord(*code, next_position); - std::string new_name = - AddActiveArgument(object_name, use_f32_for_half_arguments); - code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name); - position = arg_pos + new_name.size(); - next_position = code->find(kArgsPrefix, position); - } - - int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4); - shared_int4s_data_.resize(shared_int4s_aligned_size); - int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4); - shared_float4s_data_.resize(shared_float4s_aligned_size); - int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4); - shared_half4s_data_.resize(shared_half4s_aligned_size); -} - -void Arguments::ResolveObjectNames(const std::string& object_name, - const std::vector& member_names, - std::string* code) { - for (const auto& member_name : member_names) { - const std::string new_name = kArgsPrefix + object_name + "_" + member_name; - ReplaceAllWords(member_name, new_name, code); - } -} - -GPUObjectDescriptor* Arguments::GetObjectDescriptor( - const std::string& object_name) const { - { - auto it = object_refs_.find(object_name); - if (it != object_refs_.end()) { - return it->second.descriptor.get(); - } - } - { - auto it = objects_.find(object_name); - if (it != objects_.end()) { - return it->second.descriptor.get(); - } - } - return nullptr; -} - -absl::Status Arguments::ResolveSelector( - const std::map& linkables, - const std::string& object_name, const std::string& selector, - const std::vector& args, - const std::vector& template_args, std::string* result) { - const GPUObjectDescriptor* desc_ptr = GetObjectDescriptor(object_name); - if (!desc_ptr) { - return absl::NotFoundError( - absl::StrCat("No object with name - ", object_name)); - } - auto names = desc_ptr->GetGPUResources().GetNames(); - const auto* tensor_desc = dynamic_cast(desc_ptr); - if (tensor_desc && selector == "Write") { - auto it = linkables.find(object_name); - if (it != linkables.end()) { - if (desc_ptr->GetAccess() != AccessType::WRITE && - desc_ptr->GetAccess() != AccessType::READ_WRITE) { - return absl::FailedPreconditionError(absl::StrCat( - "Object with name - ", object_name, " should have Write access.")); - } - std::string value_name, x_coord, y_coord, s_coord; - RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector( - args, &value_name, &x_coord, &y_coord, &s_coord)); - // x_coord can have batch size property of link_object - ResolveObjectNames(object_name, names, &x_coord); - *result = it->second; - ReplaceAllWords("in_out_value", value_name, result); - ReplaceAllWords("X_COORD", x_coord, result); - ReplaceAllWords("Y_COORD", y_coord, result); - ReplaceAllWords("S_COORD", s_coord, result); - RETURN_IF_ERROR(ResolveSelectorsPass({}, result)); - } - } - std::string patch; - RETURN_IF_ERROR( - desc_ptr->PerformSelector(selector, args, template_args, &patch)); - ResolveObjectNames(object_name, names, &patch); - *result += patch; - return absl::OkStatus(); -} - -absl::Status Arguments::ResolveSelectorsPass( - const std::map& linkables, std::string* code) { - std::string result; - size_t position = 0; - size_t next_position = code->find(kArgsPrefix); - while (next_position != std::string::npos) { - size_t arg_pos = next_position; - next_position += strlen(kArgsPrefix); - std::string object_name = GetNextWord(*code, next_position); - char next = (*code)[next_position + object_name.size()]; - if (next == '.') { - next_position += object_name.size() + 1; - std::string selector_name = GetNextWord(*code, next_position); - next_position += selector_name.size(); - next = (*code)[next_position]; - std::vector template_args; - if (next == '<') { - size_t close_bracket_pos; - RETURN_IF_ERROR(ParseArgsInsideBrackets( - *code, next_position, &close_bracket_pos, &template_args)); - next_position = close_bracket_pos; - next = (*code)[next_position]; - } - if (next != '(') { - return absl::NotFoundError(absl::StrCat( - "Expected ( after ", object_name, ".", selector_name, " call")); - } - std::vector args; - size_t close_bracket_pos; - RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, - &close_bracket_pos, &args)); - for (auto& arg : args) { - RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg)); - } - std::string patch; - RETURN_IF_ERROR(ResolveSelector(linkables, object_name, selector_name, - args, template_args, &patch)); - code->replace(arg_pos, close_bracket_pos - arg_pos, patch); - position = arg_pos + patch.size(); - } else { - position = arg_pos + strlen(kArgsPrefix); - } - next_position = code->find(kArgsPrefix, position); - } - return absl::OkStatus(); -} - -absl::Status Arguments::AllocateObjects(CLContext* context) { - for (auto& t : objects_) { - RETURN_IF_ERROR( - t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr)); - } return absl::OkStatus(); } void Arguments::ReleaseCPURepresentation() { for (auto& t : objects_) { - t.second.descriptor->Release(); + t.second->Release(); } } -absl::Status Arguments::AddObjectArgs() { - for (auto& t : objects_) { - AddGPUResources(t.first, t.second.descriptor->GetGPUResources()); - GPUResourcesWithValue resources; - RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(), - &resources)); - RETURN_IF_ERROR(SetGPUResources(t.first, resources)); +void Arguments::GetActiveArguments(const std::string& args_prefix, + const std::string& code) { + for (auto& float_val : float_values_) { + float_val.second.active = HasWord(args_prefix + float_val.first, code); } - for (auto& t : object_refs_) { - AddGPUResources(t.first, t.second.descriptor->GetGPUResources()); + for (auto& int_val : int_values_) { + int_val.second.active = HasWord(args_prefix + int_val.first, code); + } + for (auto& half_val : half_values_) { + half_val.second.active = HasWord(args_prefix + half_val.first, code); } - return absl::OkStatus(); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h index a5435c4fc2f..3c4671212ec 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.h +++ b/tensorflow/lite/delegates/gpu/cl/arguments.h @@ -20,11 +20,8 @@ limitations under the License. #include #include -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" -#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" -#include "tensorflow/lite/delegates/gpu/cl/util.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -39,12 +36,22 @@ class ArgumentsBinder { virtual absl::Status SetInt(const std::string& name, int value) = 0; virtual absl::Status SetFloat(const std::string& name, float value) = 0; virtual absl::Status SetHalf(const std::string& name, half value) = 0; + virtual absl::Status SetObjectRef(const std::string& name, + const GPUObject* object) = 0; virtual ~ArgumentsBinder() = default; }; -class Arguments : public ArgumentsBinder { +class Arguments { public: Arguments() = default; + ~Arguments() = default; + + // Move only + Arguments(Arguments&& args) = default; + Arguments& operator=(Arguments&& args) = default; + Arguments(const Arguments&) = delete; + Arguments& operator=(const Arguments&) = delete; + void AddFloat(const std::string& name, float value = 0.0f); void AddHalf(const std::string& name, half value = half(0.0f)); void AddInt(const std::string& name, int value = 0); @@ -53,82 +60,19 @@ class Arguments : public ArgumentsBinder { void AddObject(const std::string& name, GPUObjectDescriptorPtr&& descriptor_ptr); - absl::Status SetInt(const std::string& name, int value) override; - absl::Status SetFloat(const std::string& name, float value) override; - absl::Status SetHalf(const std::string& name, half value) override; - absl::Status SetObjectRef(const std::string& name, const GPUObject* object); - - absl::Status Bind(cl_kernel kernel, int offset = 0); - void RenameArgs(const std::string& postfix, std::string* code) const; absl::Status Merge(Arguments&& args, const std::string& postfix); - absl::Status AllocateObjects(CLContext* context); void ReleaseCPURepresentation(); - absl::Status TransformToCLCode( - const DeviceInfo& device_info, - const std::map& linkables, std::string* code); - - // Move only - Arguments(Arguments&& args); - Arguments& operator=(Arguments&& args); - Arguments(const Arguments&) = delete; - Arguments& operator=(const Arguments&) = delete; - - ~Arguments() override = default; private: friend flatbuffers::Offset Encode( const Arguments& args, flatbuffers::FlatBufferBuilder* builder); - friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args, - Arguments* args); + friend absl::Status Decode(const data::Arguments* fb_args, Arguments* args); - void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc); - void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc); - void AddImage2DArray(const std::string& name, - const GPUImage2DArrayDescriptor& desc); - void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc); - void AddImageBuffer(const std::string& name, - const GPUImageBufferDescriptor& desc); - void AddCustomMemory(const std::string& name, - const GPUCustomMemoryDescriptor& desc); - - absl::Status SetImage2D(const std::string& name, cl_mem memory); - absl::Status SetBuffer(const std::string& name, cl_mem memory); - absl::Status SetImage2DArray(const std::string& name, cl_mem memory); - absl::Status SetImage3D(const std::string& name, cl_mem memory); - absl::Status SetImageBuffer(const std::string& name, cl_mem memory); - absl::Status SetCustomMemory(const std::string& name, cl_mem memory); - - std::string GetListOfArgs(); - - std::string AddActiveArgument(const std::string& arg_name, - bool use_f32_for_halfs); - void AddGPUResources(const std::string& name, const GPUResources& resources); - - absl::Status SetGPUResources(const std::string& name, - const GPUResourcesWithValue& resources); - - absl::Status AddObjectArgs(); - - void ResolveArgsPass(const DeviceInfo& device_info, std::string* code); - absl::Status ResolveSelectorsPass( - const std::map& linkables, std::string* code); - - absl::Status ResolveSelector( - const std::map& linkables, - const std::string& object_name, const std::string& selector, - const std::vector& args, - const std::vector& template_args, std::string* result); - - void ResolveObjectNames(const std::string& object_name, - const std::vector& member_names, - std::string* code); - - GPUObjectDescriptor* GetObjectDescriptor( - const std::string& object_name) const; - - static constexpr char kArgsPrefix[] = "args."; + friend class CLArguments; + void GetActiveArguments(const std::string& args_prefix, + const std::string& code); struct IntValue { int value; @@ -136,12 +80,8 @@ class Arguments : public ArgumentsBinder { // many uniforms generated automatically and not used // to reduce amount of data transferred we adding this optimization bool active = false; - - // offset to shared uniform storage. - uint32_t offset = -1; }; std::map int_values_; - std::vector shared_int4s_data_; struct FloatValue { float value; @@ -149,12 +89,8 @@ class Arguments : public ArgumentsBinder { // many uniforms generated automatically and not used // to reduce amount of data transferred we adding this optimization bool active = false; - - // offset to shared uniform storage. - uint32_t offset = -1; }; std::map float_values_; - std::vector shared_float4s_data_; struct HalfValue { half value; @@ -162,33 +98,11 @@ class Arguments : public ArgumentsBinder { // many uniforms generated automatically and not used // to reduce amount of data transferred we adding this optimization bool active = false; - - // some devices have issues with half parameters. - bool store_as_f32 = false; - - // offset to shared uniform storage. - uint32_t offset = -1; }; std::map half_values_; - std::vector shared_half4s_data_; - std::map buffers_; - std::map images2d_; - std::map image2d_arrays_; - std::map images3d_; - std::map image_buffers_; - std::map custom_memories_; - - struct ObjectRefArg { - GPUObjectDescriptorPtr descriptor; - }; - std::map object_refs_; - - struct ObjectArg { - GPUObjectPtr obj_ptr; - GPUObjectDescriptorPtr descriptor; - }; - std::map objects_; + std::map object_refs_; + std::map objects_; }; } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc new file mode 100644 index 00000000000..68d65a589e5 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments.cc @@ -0,0 +1,788 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h" + +#include + +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" +#include "tensorflow/lite/delegates/gpu/common/util.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace { +bool IsWordSymbol(char symbol) { + return absl::ascii_isalnum(symbol) || symbol == '_'; +} + +void ReplaceAllWords(const std::string& old_word, const std::string& new_word, + std::string* str) { + size_t position = str->find(old_word); + while (position != std::string::npos) { + char prev = position == 0 ? '.' : (*str)[position - 1]; + char next = position + old_word.size() < str->size() + ? (*str)[position + old_word.size()] + : '.'; + if (IsWordSymbol(prev) || IsWordSymbol(next)) { + position = str->find(old_word, position + 1); + continue; + } + str->replace(position, old_word.size(), new_word); + position = str->find(old_word, position + new_word.size()); + } +} + +std::string GetNextWord(const std::string& code, size_t first_position) { + size_t pos = first_position; + char t = code[pos]; + while (IsWordSymbol(t)) { + pos++; + t = code[pos]; + } + return code.substr(first_position, pos - first_position); +} + +size_t FindEnclosingBracket(const std::string& text, size_t first_pos, + char bracket) { + const std::map brackets = { + {'(', ')'}, + {'{', '}'}, + {'[', ']'}, + {'<', '>'}, + }; + char b_open = bracket; + auto it = brackets.find(b_open); + if (it == brackets.end()) { + return -1; + } + char b_close = it->second; + size_t pos = first_pos; + int opened = 1; + int closed = 0; + while (opened != closed && pos < text.size()) { + if (text[pos] == b_open) { + opened++; + } else if (text[pos] == b_close) { + closed++; + } + pos++; + } + if (opened == closed) { + return pos; + } else { + return -1; + } +} + +absl::Status ParseArgsInsideBrackets(const std::string& text, + size_t open_bracket_pos, + size_t* close_bracket_pos, + std::vector* args) { + *close_bracket_pos = + FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]); + if (*close_bracket_pos == -1) { + return absl::NotFoundError("Not found enclosing bracket"); + } + std::string str_args = text.substr(open_bracket_pos + 1, + *close_bracket_pos - open_bracket_pos - 2); + std::vector words = absl::StrSplit(str_args, ','); + args->reserve(words.size()); + for (const auto& word : words) { + absl::string_view arg = absl::StripAsciiWhitespace(word); + if (!arg.empty()) { + args->push_back(std::string(arg)); + } + } + return absl::OkStatus(); +} + +void AppendArgument(const std::string& arg, std::string* args) { + if (!args->empty()) { + absl::StrAppend(args, ",\n "); + } + absl::StrAppend(args, arg); +} + +std::string GetImageModifier(AccessType access) { + switch (access) { + case AccessType::READ: + return "__read_only"; + case AccessType::WRITE: + return "__write_only"; + case AccessType::READ_WRITE: + return "__read_write"; + } +} + +std::string GetDefaultSamplers(const DeviceInfo& device_info) { + std::string result; + result += + "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; + if (device_info.IsAdreno3xx()) { + // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and + // we can observe huge register overhead when compared to other modes. + + // While using CLK_ADDRESS_NONE with out-of-range image coordinates is + // undefined in the OpenCL specification, we have observed that + // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image + // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using + // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno + // 3xx. + result += + "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; + } else { + result += + "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"; + } + + return result; +} +} // namespace + +// Static +constexpr char CLArguments::kArgsPrefix[]; + +absl::Status CLArguments::Init( + const DeviceInfo& device_info, + const std::map& linkables, CLContext* context, + Arguments* args, std::string* code) { + RETURN_IF_ERROR(AllocateObjects(*args, context)); + RETURN_IF_ERROR(AddObjectArgs(args)); + RETURN_IF_ERROR(ResolveSelectorsPass(*args, linkables, code)); + object_refs_ = std::move(args->object_refs_); + args->GetActiveArguments(kArgsPrefix, *code); + const bool use_f32_for_halfs = device_info.IsPowerVR(); + CopyArguments(*args, use_f32_for_halfs); + RETURN_IF_ERROR(SetObjectsResources(*args)); + RenameArgumentsInCode(code); + ResolveArgsPass(code); + *code = absl::Substitute(*code, GetListOfArgs()); + *code = GetDefaultSamplers(device_info) + *code; + return absl::OkStatus(); +} + +absl::Status CLArguments::Init(const DeviceInfo& device_info, Arguments* args, + CLContext* context) { + RETURN_IF_ERROR(AllocateObjects(*args, context)); + RETURN_IF_ERROR(AddObjectArgs(args)); + object_refs_ = std::move(args->object_refs_); + const bool use_f32_for_halfs = device_info.IsPowerVR(); + CopyArguments(*args, use_f32_for_halfs); + RETURN_IF_ERROR(SetObjectsResources(*args)); + return absl::OkStatus(); +} + +absl::Status CLArguments::AllocateObjects(const Arguments& args, + CLContext* context) { + objects_.resize(args.objects_.size()); + int i = 0; + for (auto& t : args.objects_) { + RETURN_IF_ERROR(t.second->CreateGPUObject(context, &objects_[i])); + i++; + } + return absl::OkStatus(); +} + +absl::Status CLArguments::AddObjectArgs(Arguments* args) { + for (auto& t : args->objects_) { + AddGPUResources(t.first, t.second->GetGPUResources(), args); + } + for (auto& t : args->object_refs_) { + AddGPUResources(t.first, t.second->GetGPUResources(), args); + } + return absl::OkStatus(); +} + +absl::Status CLArguments::SetObjectsResources(const Arguments& args) { + int i = 0; + for (const auto& t : args.objects_) { + GPUResourcesWithValue resources; + RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources)); + RETURN_IF_ERROR(SetGPUResources(t.first, resources)); + i++; + } + return absl::OkStatus(); +} + +absl::Status CLArguments::ResolveSelectorsPass( + const Arguments& args, const std::map& linkables, + std::string* code) { + std::string result; + size_t position = 0; + size_t next_position = code->find(kArgsPrefix); + while (next_position != std::string::npos) { + size_t arg_pos = next_position; + next_position += strlen(kArgsPrefix); + std::string object_name = GetNextWord(*code, next_position); + char next = (*code)[next_position + object_name.size()]; + if (next == '.') { + next_position += object_name.size() + 1; + std::string selector_name = GetNextWord(*code, next_position); + next_position += selector_name.size(); + next = (*code)[next_position]; + std::vector template_args; + if (next == '<') { + size_t close_bracket_pos; + RETURN_IF_ERROR(ParseArgsInsideBrackets( + *code, next_position, &close_bracket_pos, &template_args)); + next_position = close_bracket_pos; + next = (*code)[next_position]; + } + if (next != '(') { + return absl::NotFoundError(absl::StrCat( + "Expected ( after ", object_name, ".", selector_name, " call")); + } + std::vector function_args; + size_t close_bracket_pos; + RETURN_IF_ERROR(ParseArgsInsideBrackets( + *code, next_position, &close_bracket_pos, &function_args)); + for (auto& arg : function_args) { + RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, &arg)); + } + std::string patch; + RETURN_IF_ERROR(ResolveSelector(args, linkables, object_name, + selector_name, function_args, + template_args, &patch)); + code->replace(arg_pos, close_bracket_pos - arg_pos, patch); + position = arg_pos + patch.size(); + } else { + position = arg_pos + strlen(kArgsPrefix); + } + next_position = code->find(kArgsPrefix, position); + } + return absl::OkStatus(); +} + +void CLArguments::ResolveObjectNames( + const std::string& object_name, + const std::vector& member_names, std::string* code) { + for (const auto& member_name : member_names) { + const std::string new_name = kArgsPrefix + object_name + "_" + member_name; + ReplaceAllWords(member_name, new_name, code); + } +} + +absl::Status CLArguments::ResolveSelector( + const Arguments& args, const std::map& linkables, + const std::string& object_name, const std::string& selector, + const std::vector& function_args, + const std::vector& template_args, std::string* result) { + const GPUObjectDescriptor* desc_ptr; + auto it_ref = args.object_refs_.find(object_name); + auto it_obj = args.objects_.find(object_name); + if (it_ref != args.object_refs_.end()) { + desc_ptr = it_ref->second.get(); + } else if (it_obj != args.objects_.end()) { + desc_ptr = it_obj->second.get(); + } else { + return absl::NotFoundError( + absl::StrCat("No object with name - ", object_name)); + } + auto names = desc_ptr->GetGPUResources().GetNames(); + const auto* tensor_desc = dynamic_cast(desc_ptr); + if (tensor_desc && selector == "Write") { + auto it = linkables.find(object_name); + if (it != linkables.end()) { + if (desc_ptr->GetAccess() != AccessType::WRITE && + desc_ptr->GetAccess() != AccessType::READ_WRITE) { + return absl::FailedPreconditionError(absl::StrCat( + "Object with name - ", object_name, " should have Write access.")); + } + std::string value_name, x_coord, y_coord, s_coord; + RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector( + function_args, &value_name, &x_coord, &y_coord, &s_coord)); + // x_coord can have batch size property of link_object + ResolveObjectNames(object_name, names, &x_coord); + *result = it->second; + ReplaceAllWords("in_out_value", value_name, result); + ReplaceAllWords("X_COORD", x_coord, result); + ReplaceAllWords("Y_COORD", y_coord, result); + ReplaceAllWords("S_COORD", s_coord, result); + RETURN_IF_ERROR(ResolveSelectorsPass(args, {}, result)); + } + } + std::string patch; + RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, function_args, + template_args, &patch)); + ResolveObjectNames(object_name, names, &patch); + *result += patch; + return absl::OkStatus(); +} + +void CLArguments::ResolveArgsPass(std::string* code) { + size_t position = 0; + size_t next_position = code->find(kArgsPrefix); + while (next_position != std::string::npos) { + size_t arg_pos = next_position; + next_position += strlen(kArgsPrefix); + std::string object_name = GetNextWord(*code, next_position); + std::string new_name = object_name; + code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name); + position = arg_pos + new_name.size(); + next_position = code->find(kArgsPrefix, position); + } +} + +void CLArguments::CopyScalarValues(Arguments* args) const { + for (const auto& fvalue : float_values_) { + args->float_values_[fvalue.first].value = fvalue.second.value; + } + for (const auto& ivalue : int_values_) { + args->int_values_[ivalue.first].value = ivalue.second.value; + } + for (const auto& hfvalue : half_values_) { + args->half_values_[hfvalue.first].value = hfvalue.second.value; + } +} + +void CLArguments::CopyArguments(const Arguments& args, bool use_f32_for_halfs) { + for (const auto& fvalue : args.float_values_) { + auto& new_val = float_values_[fvalue.first]; + new_val.value = fvalue.second.value; + new_val.active = fvalue.second.active; + if (fvalue.second.active) { + new_val.offset = shared_float4s_data_.size(); + shared_float4s_data_.push_back(new_val.value); + } + } + for (const auto& ivalue : args.int_values_) { + auto& new_val = int_values_[ivalue.first]; + new_val.value = ivalue.second.value; + new_val.active = ivalue.second.active; + if (ivalue.second.active) { + new_val.offset = shared_int4s_data_.size(); + shared_int4s_data_.push_back(new_val.value); + } + } + for (const auto& hfvalue : args.half_values_) { + auto& new_val = half_values_[hfvalue.first]; + new_val.value = hfvalue.second.value; + new_val.active = hfvalue.second.active; + if (hfvalue.second.active) { + if (use_f32_for_halfs) { + new_val.store_as_f32 = true; + new_val.offset = shared_float4s_data_.size(); + shared_float4s_data_.push_back(new_val.value); + } else { + new_val.store_as_f32 = false; + new_val.offset = shared_half4s_data_.size(); + shared_half4s_data_.push_back(new_val.value); + } + } + } + int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4); + shared_int4s_data_.resize(shared_int4s_aligned_size); + int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4); + shared_float4s_data_.resize(shared_float4s_aligned_size); + int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4); + shared_half4s_data_.resize(shared_half4s_aligned_size); +} + +void CLArguments::RenameArgumentsInCode(std::string* code) { + const std::string postfixes[4] = {"x", "y", "z", "w"}; + for (const auto& fvalue : float_values_) { + if (fvalue.second.active) { + std::string index = std::to_string(fvalue.second.offset / 4); + std::string new_name = + "shared_float4_" + index + "." + postfixes[fvalue.second.offset % 4]; + ReplaceAllWords(kArgsPrefix + fvalue.first, new_name, code); + } + } + for (const auto& ivalue : int_values_) { + if (ivalue.second.active) { + std::string index = std::to_string(ivalue.second.offset / 4); + std::string new_name = + "shared_int4_" + index + "." + postfixes[ivalue.second.offset % 4]; + ReplaceAllWords(kArgsPrefix + ivalue.first, new_name, code); + } + } + for (const auto& hfvalue : half_values_) { + if (hfvalue.second.active) { + std::string index = std::to_string(hfvalue.second.offset / 4); + std::string new_name; + if (hfvalue.second.store_as_f32) { + new_name = "(half)(shared_float4_" + index + "." + + postfixes[hfvalue.second.offset % 4] + ")"; + } else { + new_name = "shared_half4_" + index + "." + + postfixes[hfvalue.second.offset % 4]; + } + ReplaceAllWords(kArgsPrefix + hfvalue.first, new_name, code); + } + } +} + +void CLArguments::AddBuffer(const std::string& name, + const GPUBufferDescriptor& desc) { + buffers_[name] = desc; +} +void CLArguments::AddImage2D(const std::string& name, + const GPUImage2DDescriptor& desc) { + images2d_[name] = desc; +} + +void CLArguments::AddImage2DArray(const std::string& name, + const GPUImage2DArrayDescriptor& desc) { + image2d_arrays_[name] = desc; +} + +void CLArguments::AddImage3D(const std::string& name, + const GPUImage3DDescriptor& desc) { + images3d_[name] = desc; +} + +void CLArguments::AddImageBuffer(const std::string& name, + const GPUImageBufferDescriptor& desc) { + image_buffers_[name] = desc; +} + +void CLArguments::AddCustomMemory(const std::string& name, + const GPUCustomMemoryDescriptor& desc) { + custom_memories_[name] = desc; +} + +void CLArguments::AddGPUResources(const std::string& name, + const GPUResources& resources, + Arguments* args) { + for (const auto& r : resources.ints) { + args->AddInt(absl::StrCat(name, "_", r)); + } + for (const auto& r : resources.floats) { + args->AddFloat(absl::StrCat(name, "_", r)); + } + for (const auto& r : resources.buffers) { + AddBuffer(absl::StrCat(name, "_", r.first), r.second); + } + for (const auto& r : resources.images2d) { + AddImage2D(absl::StrCat(name, "_", r.first), r.second); + } + for (const auto& r : resources.image2d_arrays) { + AddImage2DArray(absl::StrCat(name, "_", r.first), r.second); + } + for (const auto& r : resources.images3d) { + AddImage3D(absl::StrCat(name, "_", r.first), r.second); + } + for (const auto& r : resources.image_buffers) { + AddImageBuffer(absl::StrCat(name, "_", r.first), r.second); + } + for (const auto& r : resources.custom_memories) { + AddCustomMemory(absl::StrCat(name, "_", r.first), r.second); + } +} + +absl::Status CLArguments::SetInt(const std::string& name, int value) { + auto it = int_values_.find(name); + if (it == int_values_.end()) { + return absl::NotFoundError( + absl::StrCat("No int argument with name - ", name)); + } + it->second.value = value; + if (it->second.active) { + shared_int4s_data_[it->second.offset] = value; + } + return absl::OkStatus(); +} +absl::Status CLArguments::SetFloat(const std::string& name, float value) { + auto it = float_values_.find(name); + if (it == float_values_.end()) { + return absl::NotFoundError( + absl::StrCat("No float argument with name - ", name)); + } + it->second.value = value; + if (it->second.active) { + shared_float4s_data_[it->second.offset] = value; + } + return absl::OkStatus(); +} + +absl::Status CLArguments::SetHalf(const std::string& name, half value) { + auto it = half_values_.find(name); + if (it == half_values_.end()) { + return absl::NotFoundError( + absl::StrCat("No half argument with name - ", name)); + } + it->second.value = value; + if (it->second.active) { + if (it->second.store_as_f32) { + shared_float4s_data_[it->second.offset] = value; + } else { + shared_half4s_data_[it->second.offset] = value; + } + } + return absl::OkStatus(); +} + +absl::Status CLArguments::SetImage2D(const std::string& name, cl_mem memory) { + auto it = images2d_.find(name); + if (it == images2d_.end()) { + return absl::NotFoundError( + absl::StrCat("No image2D argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetBuffer(const std::string& name, cl_mem memory) { + auto it = buffers_.find(name); + if (it == buffers_.end()) { + return absl::NotFoundError( + absl::StrCat("No buffer argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetImage2DArray(const std::string& name, + cl_mem memory) { + auto it = image2d_arrays_.find(name); + if (it == image2d_arrays_.end()) { + return absl::NotFoundError( + absl::StrCat("No image2D array argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetImage3D(const std::string& name, cl_mem memory) { + auto it = images3d_.find(name); + if (it == images3d_.end()) { + return absl::NotFoundError( + absl::StrCat("No image3D argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetImageBuffer(const std::string& name, + cl_mem memory) { + auto it = image_buffers_.find(name); + if (it == image_buffers_.end()) { + return absl::NotFoundError( + absl::StrCat("No image buffer argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetCustomMemory(const std::string& name, + cl_mem memory) { + auto it = custom_memories_.find(name); + if (it == custom_memories_.end()) { + return absl::NotFoundError( + absl::StrCat("No custom memory argument with name - ", name)); + } + it->second.memory = memory; + return absl::OkStatus(); +} + +absl::Status CLArguments::SetObjectRef(const std::string& name, + const GPUObject* object) { + auto it = object_refs_.find(name); + if (it == object_refs_.end()) { + return absl::NotFoundError( + absl::StrCat("No object ref with name - ", name)); + } + GPUResourcesWithValue resources; + RETURN_IF_ERROR(object->GetGPUResources(it->second.get(), &resources)); + return SetGPUResources(name, resources); +} + +absl::Status CLArguments::SetGPUResources( + const std::string& name, const GPUResourcesWithValue& resources) { + for (const auto& r : resources.ints) { + RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.floats) { + RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.buffers) { + RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.images2d) { + RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.image2d_arrays) { + RETURN_IF_ERROR( + SetImage2DArray(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.images3d) { + RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.image_buffers) { + RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second)); + } + for (const auto& r : resources.custom_memories) { + RETURN_IF_ERROR( + SetCustomMemory(absl::StrCat(name, "_", r.first), r.second)); + } + return absl::OkStatus(); +} + +std::string CLArguments::GetListOfArgs() { + std::string result; + for (auto& t : buffers_) { + const std::string type_name = + t.second.data_type == DataType::FLOAT32 ? "float" : "half"; + std::string attributes; + for (const auto& attr : t.second.attributes) { + attributes += absl::StrCat(" __attribute__((", attr, "))"); + } + AppendArgument( + absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ", + ToCLDataType(t.second.data_type, t.second.element_size), + "* ", t.first, attributes), + &result); + } + for (auto& t : image_buffers_) { + AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), + " image1d_buffer_t ", t.first), + &result); + } + for (auto& t : images2d_) { + AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), + " image2d_t ", t.first), + &result); + } + for (auto& t : image2d_arrays_) { + AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), + " image2d_array_t ", t.first), + &result); + } + for (auto& t : images3d_) { + AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), + " image3d_t ", t.first), + &result); + } + for (auto& t : custom_memories_) { + AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result); + } + for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) { + AppendArgument(absl::StrCat("int4 shared_int4_", i), &result); + } + for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) { + AppendArgument(absl::StrCat("float4 shared_float4_", i), &result); + } + for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) { + AppendArgument(absl::StrCat("half4 shared_half4_", i), &result); + } + return result; +} + +absl::Status CLArguments::Bind(cl_kernel kernel, int offset) { + for (auto& t : buffers_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (auto& t : image_buffers_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (auto& t : images2d_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (auto& t : image2d_arrays_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (auto& t : images3d_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (auto& t : custom_memories_) { + const int error_code = + clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) { + const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, + &shared_int4s_data_[i * 4]); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) { + const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, + &shared_float4s_data_[i * 4]); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) { + const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4, + &shared_half4s_data_[i * 4]); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } + return absl::OkStatus(); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments.h b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h new file mode 100644 index 00000000000..ce8d913ea1c --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h @@ -0,0 +1,168 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ + +#include +#include +#include + +#include "tensorflow/lite/delegates/gpu/cl/arguments.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" +#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" + +namespace tflite { +namespace gpu { +namespace cl { + +class CLArguments : public ArgumentsBinder { + public: + CLArguments() = default; + + absl::Status Init(const DeviceInfo& device_info, + const std::map& linkables, + CLContext* context, Arguments* args, std::string* code); + absl::Status Init(const DeviceInfo& device_info, Arguments* args, + CLContext* context); + + // Temporary, will be resolved later + void MoveObjectRefsIn(Arguments* args) { + object_refs_ = std::move(args->object_refs_); + } + void MoveObjectRefsOut(Arguments* args) { + args->object_refs_ = std::move(object_refs_); + } + void CopyScalarValues(Arguments* args) const; + + // Move only + CLArguments(CLArguments&& args) = default; + CLArguments& operator=(CLArguments&& args) = default; + CLArguments(const CLArguments&) = delete; + CLArguments& operator=(const CLArguments&) = delete; + + absl::Status SetInt(const std::string& name, int value) override; + absl::Status SetFloat(const std::string& name, float value) override; + absl::Status SetHalf(const std::string& name, half value) override; + absl::Status SetObjectRef(const std::string& name, + const GPUObject* object) override; + + absl::Status Bind(cl_kernel kernel, int offset = 0); + + private: + absl::Status AllocateObjects(const Arguments& args, CLContext* context); + absl::Status AddObjectArgs(Arguments* args); + + absl::Status ResolveSelectorsPass( + const Arguments& args, + const std::map& linkables, std::string* code); + absl::Status ResolveSelector( + const Arguments& args, + const std::map& linkables, + const std::string& object_name, const std::string& selector, + const std::vector& function_args, + const std::vector& template_args, std::string* result); + void ResolveObjectNames(const std::string& object_name, + const std::vector& member_names, + std::string* code); + void ResolveArgsPass(std::string* code); + + void CopyArguments(const Arguments& args, bool use_f32_for_halfs); + void RenameArgumentsInCode(std::string* code); + std::string GetListOfArgs(); + + void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc); + void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc); + void AddImage2DArray(const std::string& name, + const GPUImage2DArrayDescriptor& desc); + void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc); + void AddImageBuffer(const std::string& name, + const GPUImageBufferDescriptor& desc); + void AddCustomMemory(const std::string& name, + const GPUCustomMemoryDescriptor& desc); + void AddGPUResources(const std::string& name, const GPUResources& resources, + Arguments* args); + absl::Status SetObjectsResources(const Arguments& args); + absl::Status SetGPUResources(const std::string& name, + const GPUResourcesWithValue& resources); + + absl::Status SetImage2D(const std::string& name, cl_mem memory); + absl::Status SetBuffer(const std::string& name, cl_mem memory); + absl::Status SetImage2DArray(const std::string& name, cl_mem memory); + absl::Status SetImage3D(const std::string& name, cl_mem memory); + absl::Status SetImageBuffer(const std::string& name, cl_mem memory); + absl::Status SetCustomMemory(const std::string& name, cl_mem memory); + + static constexpr char kArgsPrefix[] = "args."; + struct IntValue { + int value; + + // many arguments generated automatically and not used + // to reduce amount of data transferred we adding this optimization + bool active = false; + + // offset to shared storage. + uint32_t offset = -1; + }; + std::map int_values_; + std::vector shared_int4s_data_; + + struct FloatValue { + float value; + + // many arguments generated automatically and not used + // to reduce amount of data transferred we adding this optimization + bool active = false; + + // offset to shared storage. + uint32_t offset = -1; + }; + std::map float_values_; + std::vector shared_float4s_data_; + + struct HalfValue { + half value; + + // many arguments generated automatically and not used + // to reduce amount of data transferred we adding this optimization + bool active = false; + + // some devices have issues with half parameters. + bool store_as_f32 = false; + + // offset to shared uniform storage. + uint32_t offset = -1; + }; + std::map half_values_; + std::vector shared_half4s_data_; + + std::map buffers_; + std::map images2d_; + std::map image2d_arrays_; + std::map images3d_; + std::map image_buffers_; + std::map custom_memories_; + + std::map object_refs_; + std::vector objects_; +}; + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/arguments_test.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc similarity index 80% rename from tensorflow/lite/delegates/gpu/cl/arguments_test.cc rename to tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc index 722ca5b1827..adaf30b7909 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/delegates/gpu/cl/arguments.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h" #include #include @@ -20,6 +20,7 @@ limitations under the License. #include #include #include "absl/strings/match.h" +#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" @@ -27,7 +28,7 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -TEST(ArgumentsTest, TestSelectorResolve) { +TEST(CLArgumentsTest, TestSelectorResolve) { BufferDescriptor desc; desc.element_type = DataType::FLOAT32; desc.element_size = 4; @@ -43,14 +44,15 @@ __kernel void main_function($0) { } })"; + CLArguments cl_args; DeviceInfo device_info; - ASSERT_OK(args.TransformToCLCode(device_info, {}, &sample_code)); + ASSERT_OK(cl_args.Init(device_info, {}, nullptr, &args, &sample_code)); EXPECT_TRUE(absl::StrContains(sample_code, "value = weights_buffer[id];")); EXPECT_TRUE( absl::StrContains(sample_code, "__global float4* weights_buffer")); } -TEST(ArgumentsTest, TestNoSelector) { +TEST(CLArgumentsTest, TestNoSelector) { BufferDescriptor desc; desc.element_type = DataType::FLOAT32; desc.element_size = 4; @@ -64,16 +66,10 @@ TEST(ArgumentsTest, TestNoSelector) { value = args.weights.UnknownSelector(id); } )"; + CLArguments cl_args; DeviceInfo device_info; - EXPECT_FALSE(args.TransformToCLCode(device_info, {}, &sample_code).ok()); -} - -TEST(ArgumentsTest, TestRenameArgs) { - Arguments linkable_args; - linkable_args.AddFloat("alpha", 0.5f); - std::string linkable_code = "in_out_value += args.alpha;\n"; - linkable_args.RenameArgs("_link0", &linkable_code); - EXPECT_EQ(linkable_code, "in_out_value += args.alpha_link0;\n"); + EXPECT_FALSE( + cl_args.Init(device_info, {}, nullptr, &args, &sample_code).ok()); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index ca0c0319f54..add7671a2e4 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -199,12 +199,21 @@ absl::Status InferenceContext::InitFromGraph( RETURN_IF_ERROR(Tune(tuning_parameters)); if (serialized_model) { + // Temporary, will be resolved later, now we don't have complete + // intermediate representation + for (auto& node : nodes_) { + node.operation->MoveObjectRefsFromCLToGeneric(); + node.operation->SyncScalarValues(); + } flatbuffers::FlatBufferBuilder builder; auto encoded_fb = Encode(*this, &builder); data::FinishInferenceContextBuffer(builder, encoded_fb); serialized_model->resize(builder.GetSize()); std::memcpy(serialized_model->data(), builder.GetBufferPointer(), builder.GetSize()); + for (auto& node : nodes_) { + node.operation->MoveObjectRefsFromGenericToCL(); + } } for (auto& node : nodes_) { node.operation->args_.ReleaseCPURepresentation(); @@ -220,7 +229,7 @@ absl::Status InferenceContext::RestoreDeserialized( return absl::DataLossError("Deserialization failed."); } auto decoded_fb = data::GetInferenceContext(serialized_model.data()); - RETURN_IF_ERROR(Decode(&env->context(), decoded_fb, this)); + RETURN_IF_ERROR(Decode(decoded_fb, this)); CreationContext creation_context; creation_context.device = env->GetDevicePtr(); diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index ec8055ebcde..8e2aa964f74 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -103,8 +103,7 @@ class InferenceContext { friend flatbuffers::Offset Encode( const InferenceContext& inference, flatbuffers::FlatBufferBuilder* builder); - friend absl::Status Decode(CLContext* context, - const data::InferenceContext* fb_inference, + friend absl::Status Decode(const data::InferenceContext* fb_inference, InferenceContext* inference); void CopyInAndOutIds(const GraphFloat32& graph); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index d7e7c7dd498..a1deeada102 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -263,6 +263,7 @@ cc_library( ":util", "//tensorflow/lite/delegates/gpu:spi", "//tensorflow/lite/delegates/gpu/cl:arguments", + "//tensorflow/lite/delegates/gpu/cl:cl_arguments", "//tensorflow/lite/delegates/gpu/cl:cl_command_queue", "//tensorflow/lite/delegates/gpu/cl:cl_errors", "//tensorflow/lite/delegates/gpu/cl:environment", @@ -643,6 +644,7 @@ cc_library( ":work_group_picking", "//tensorflow/lite/delegates/gpu/cl:arguments", "//tensorflow/lite/delegates/gpu/cl:buffer", + "//tensorflow/lite/delegates/gpu/cl:cl_arguments", "//tensorflow/lite/delegates/gpu/cl:cl_command_queue", "//tensorflow/lite/delegates/gpu/cl:cl_context", "//tensorflow/lite/delegates/gpu/cl:cl_device", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index 77ac946637d..8087df56a05 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/cl/arguments.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h" #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" @@ -44,8 +45,9 @@ class OpenClConverterImpl : public TensorObjectConverter { absl::Status DispatchKernel(cl_mem buffer_mem, Tensor* tensor) { kernel_.ResetBindingCounter(); RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem)); - RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter())); + RETURN_IF_ERROR(cl_args_.SetObjectRef("tensor", tensor)); + RETURN_IF_ERROR( + cl_args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter())); const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices()); const int3 work_group_size = {16, 8, 1}; @@ -53,7 +55,7 @@ class OpenClConverterImpl : public TensorObjectConverter { return queue_->Dispatch(kernel_, work_groups_count, work_group_size); } - Arguments args_; + CLArguments cl_args_; BHWC shape_; CLKernel kernel_; TensorDescriptor tensor_descriptor_; @@ -115,7 +117,8 @@ class TensorToTensorConverter : public OpenClConverterImpl { src_tensor_descriptor_.storage_type = ToTensorStorageType( input_def.object_def.object_type, input_def.object_def.data_layout); src_tensor_descriptor_.data_type = input_def.object_def.data_type; - args_.AddObjectRef( + Arguments args; + args.AddObjectRef( "src_tensor", AccessType::READ, absl::make_unique(src_tensor_descriptor_)); @@ -123,7 +126,7 @@ class TensorToTensorConverter : public OpenClConverterImpl { dst_tensor_descriptor_.storage_type = ToTensorStorageType( output_def.object_def.object_type, output_def.object_def.data_layout); dst_tensor_descriptor_.data_type = output_def.object_def.data_type; - args_.AddObjectRef( + args.AddObjectRef( "dst_tensor", AccessType::WRITE, absl::make_unique(dst_tensor_descriptor_)); @@ -152,8 +155,8 @@ class TensorToTensorConverter : public OpenClConverterImpl { context_ = &environment->context(); shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, input_def.dimensions.c); - RETURN_IF_ERROR( - args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); + RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr, + &args, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "tensor_to_tensor", environment->context(), environment->device(), &kernel_); @@ -172,9 +175,9 @@ class TensorToTensorConverter : public OpenClConverterImpl { Tensor dst_tensor; RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_, dst_tensor_descriptor_, &dst_tensor)); - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor)); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(cl_args_.SetObjectRef("src_tensor", &src_tensor)); + RETURN_IF_ERROR(cl_args_.SetObjectRef("dst_tensor", &dst_tensor)); + RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); const int3 grid = int3(dst_tensor.Width() * dst_tensor.Batch(), dst_tensor.Height(), dst_tensor.Slices()); const int3 work_group_size = {16, 8, 1}; @@ -203,8 +206,9 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl { tensor_descriptor_.layout = Layout::BHWC; tensor_descriptor_.storage_type = src_tensor_type; tensor_descriptor_.data_type = input_def.object_def.data_type; - args_.AddObjectRef("tensor", AccessType::READ, - absl::make_unique(tensor_descriptor_)); + Arguments args; + args.AddObjectRef("tensor", AccessType::READ, + absl::make_unique(tensor_descriptor_)); const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 || @@ -244,8 +248,8 @@ class TensorToBHWCBufferConverter : public OpenClConverterImpl { context_ = &environment->context(); shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, input_def.dimensions.c); - RETURN_IF_ERROR( - args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); + RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr, + &args, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "tensor_to_bhwc", environment->context(), environment->device(), &kernel_); @@ -300,8 +304,9 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl { tensor_descriptor_.layout = Layout::BHWC; tensor_descriptor_.storage_type = dst_tensor_type; tensor_descriptor_.data_type = output_def.object_def.data_type; - args_.AddObjectRef("tensor", AccessType::WRITE, - absl::make_unique(tensor_descriptor_)); + Arguments args; + args.AddObjectRef("tensor", AccessType::WRITE, + absl::make_unique(tensor_descriptor_)); const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 || @@ -338,8 +343,8 @@ class BHWCBufferToTensorConverter : public OpenClConverterImpl { context_ = &environment->context(); shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w, output_def.dimensions.c); - RETURN_IF_ERROR( - args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); + RETURN_IF_ERROR(cl_args_.Init(environment->device().GetInfo(), {}, nullptr, + &args, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "bhwc_to_tensor", environment->context(), environment->device(), &kernel_); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index b39f03af846..9d6bc59f716 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -132,12 +132,13 @@ GPUOperation::GPUOperation(GPUOperation&& operation) definition_(std::move(operation.definition_)), src_(std::move(operation.src_)), dst_(std::move(operation.dst_)), - kernel_(std::move(operation.kernel_)), grid_dimension_(operation.grid_dimension_), work_group_launch_order_(operation.work_group_launch_order_), grid_size_(operation.grid_size_), src_tensors_names_(std::move(operation.src_tensors_names_)), dst_tensors_names_(std::move(operation.dst_tensors_names_)), + kernel_(std::move(operation.kernel_)), + cl_args_(std::move(operation.cl_args_)), work_groups_count_(operation.work_groups_count_), linkable_count_(operation.linkable_count_), elementwise_code_(std::move(operation.elementwise_code_)) {} @@ -155,12 +156,13 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { definition_ = std::move(operation.definition_); src_ = std::move(operation.src_); dst_ = std::move(operation.dst_); - kernel_ = std::move(operation.kernel_); std::swap(grid_dimension_, operation.grid_dimension_); std::swap(work_group_launch_order_, operation.work_group_launch_order_); std::swap(grid_size_, operation.grid_size_); src_tensors_names_ = std::move(operation.src_tensors_names_); dst_tensors_names_ = std::move(operation.dst_tensors_names_); + kernel_ = std::move(operation.kernel_); + cl_args_ = std::move(operation.cl_args_); std::swap(work_groups_count_, operation.work_groups_count_); std::swap(linkable_count_, operation.linkable_count_); elementwise_code_ = std::move(operation.elementwise_code_); @@ -211,12 +213,12 @@ void GPUOperation::AddDstTensor(const std::string& tensor_name, absl::Status GPUOperation::UpdateParams() { for (int i = 0; i < src_tensors_names_.size(); ++i) { - RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i])); + RETURN_IF_ERROR(cl_args_.SetObjectRef(src_tensors_names_[i], src_[i])); } for (int i = 0; i < dst_tensors_names_.size(); ++i) { - RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i])); + RETURN_IF_ERROR(cl_args_.SetObjectRef(dst_tensors_names_[i], dst_[i])); } - RETURN_IF_ERROR(BindArguments(&args_)); + RETURN_IF_ERROR(BindArguments(&cl_args_)); grid_size_ = GetGridSize(); work_groups_count_ = GetWorkGroupsCount( grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_); @@ -244,15 +246,10 @@ absl::Status GPUOperation::AssembleCode(const DeviceInfo& device_info, elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_; code_ = GetElementWiseCode(definition_, check_src_channels_size_); - RETURN_IF_ERROR(args_.AllocateObjects(context)); - RETURN_IF_ERROR(args_.TransformToCLCode( - device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); - } else { - RETURN_IF_ERROR(args_.AllocateObjects(context)); - RETURN_IF_ERROR(args_.TransformToCLCode( - device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_)); } - return absl::OkStatus(); + return cl_args_.Init(device_info, + {{dst_tensors_names_[0], elementwise_code_}}, context, + &args_, &code_); } absl::Status GPUOperation::Compile(const CreationContext& creation_context) { @@ -266,6 +263,8 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { absl::Status GPUOperation::CompileDeserialized( const CreationContext& creation_context) { + RETURN_IF_ERROR(cl_args_.Init(creation_context.GetDeviceInfo(), &args_, + creation_context.context)); return creation_context.cache->GetOrCreateCLKernel( code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_); @@ -299,7 +298,7 @@ absl::Status GPUOperation::Tune(const TuningParameters& params) { GetWorkGroupsCount(grid_dimension_, grid_size_, possible_work_groups[i], work_group_launch_order_); } - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); int best_work_group_index; RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( kernel_, *params.info, work_groups_count, possible_work_groups, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 57d8690c54e..844e45f28b2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h" #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" @@ -120,7 +121,7 @@ class GPUOperation { absl::Status UpdateParams(); absl::Status AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); return queue->Dispatch(kernel_, work_groups_count_, work_group_size_); } @@ -168,11 +169,15 @@ class GPUOperation { // applicable only with elementwise_ = true; bool check_src_channels_size_ = false; + // Temporary, will be resolved later + void MoveObjectRefsFromCLToGeneric() { cl_args_.MoveObjectRefsOut(&args_); } + void MoveObjectRefsFromGenericToCL() { cl_args_.MoveObjectRefsIn(&args_); } + void SyncScalarValues() { cl_args_.CopyScalarValues(&args_); } + protected: friend flatbuffers::Offset Encode( const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder); - friend absl::Status Decode(CLContext* context, - const data::GPUOperation* fb_op, GPUOperation* op); + friend absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op); virtual absl::Status BindArguments(ArgumentsBinder* args) { return absl::OkStatus(); @@ -183,7 +188,6 @@ class GPUOperation { OperationDef definition_; std::vector src_; std::vector dst_; - CLKernel kernel_; int grid_dimension_ = 3; // can be 1, 2 or 3 int3 work_group_launch_order_ = int3(0, 1, 2); int3 grid_size_ = int3(0, 0, 0); @@ -191,6 +195,8 @@ class GPUOperation { std::vector dst_tensors_names_; private: + CLKernel kernel_; + CLArguments cl_args_; int3 work_groups_count_ = int3(0, 0, 0); int linkable_count_ = 0; std::string elementwise_code_; // temporary, used during op construction diff --git a/tensorflow/lite/delegates/gpu/cl/serialization.cc b/tensorflow/lite/delegates/gpu/cl/serialization.cc index 3b52fc40bdf..f9ee6959a50 100644 --- a/tensorflow/lite/delegates/gpu/cl/serialization.cc +++ b/tensorflow/lite/delegates/gpu/cl/serialization.cc @@ -508,30 +508,11 @@ void Decode(const data::TensorDescWithId* fb_desc, TensorDescriptor* desc, *id = fb_desc->id(); } -absl::Status Decode(CLContext* context, const data::Arguments* fb_args, - Arguments* args) { - args->shared_int4s_data_ = std::vector( - fb_args->shared_int4s()->data(), - fb_args->shared_int4s()->data() + fb_args->shared_int4s()->size()); - - args->shared_float4s_data_ = std::vector( - fb_args->shared_float4s()->data(), - fb_args->shared_float4s()->data() + fb_args->shared_float4s()->size()); - - std::vector tmp = std::vector( - fb_args->shared_half4s()->data(), - fb_args->shared_half4s()->data() + fb_args->shared_half4s()->size()); - - args->shared_half4s_data_.resize(tmp.size()); - for (int i = 0; i < tmp.size(); ++i) { - args->shared_half4s_data_[i] = tmp[i]; - } - +absl::Status Decode(const data::Arguments* fb_args, Arguments* args) { args->int_values_.clear(); for (auto int_values_fb : *fb_args->int_values()) { Arguments::IntValue value; value.value = int_values_fb->value(); - value.offset = int_values_fb->offset(); value.active = int_values_fb->active(); std::string name(int_values_fb->name()->c_str(), int_values_fb->name()->size()); @@ -542,7 +523,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args, for (auto float_values_fb : *fb_args->float_values()) { Arguments::FloatValue value; value.value = float_values_fb->value(); - value.offset = float_values_fb->offset(); value.active = float_values_fb->active(); std::string name(float_values_fb->name()->c_str(), float_values_fb->name()->size()); @@ -553,9 +533,7 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args, for (auto half_values_fb : *fb_args->half_values()) { Arguments::HalfValue value; value.value = half_values_fb->value(); - value.offset = half_values_fb->offset(); value.active = half_values_fb->active(); - value.store_as_f32 = half_values_fb->store_as_f32(); std::string name(half_values_fb->name()->c_str(), half_values_fb->name()->size()); args->half_values_[name] = value; @@ -635,9 +613,6 @@ absl::Status Decode(CLContext* context, const data::Arguments* fb_args, args->AddObjectRef(key, access_type, absl::make_unique(std::move(desc))); } - - RETURN_IF_ERROR(args->AllocateObjects(context)); - RETURN_IF_ERROR(args->AddObjectArgs()); return absl::OkStatus(); } @@ -649,7 +624,6 @@ flatbuffers::Offset Encode( data::IntValueBuilder value_builder(*builder); value_builder.add_name(name_fb); value_builder.add_value(value.second.value); - value_builder.add_offset(value.second.offset); value_builder.add_active(value.second.active); int_values_fb.push_back(value_builder.Finish()); } @@ -660,7 +634,6 @@ flatbuffers::Offset Encode( data::FloatValueBuilder value_builder(*builder); value_builder.add_name(name_fb); value_builder.add_value(value.second.value); - value_builder.add_offset(value.second.offset); value_builder.add_active(value.second.active); float_values_fb.push_back(value_builder.Finish()); } @@ -671,9 +644,7 @@ flatbuffers::Offset Encode( data::HalfValueBuilder value_builder(*builder); value_builder.add_name(name_fb); value_builder.add_value(value.second.value); - value_builder.add_offset(value.second.offset); value_builder.add_active(value.second.active); - value_builder.add_store_as_f32(value.second.store_as_f32); half_values_fb.push_back(value_builder.Finish()); } @@ -681,7 +652,7 @@ flatbuffers::Offset Encode( buffer_objs_fb; for (auto& value : args.objects_) { const auto* buffer_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!buffer_desc) continue; auto desc_fb = Encode(*buffer_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -694,7 +665,7 @@ flatbuffers::Offset Encode( texture2d_objs_fb; for (auto& value : args.objects_) { const auto* texture_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!texture_desc) continue; auto desc_fb = Encode(*texture_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -706,8 +677,8 @@ flatbuffers::Offset Encode( std::vector> tensor_linear_objs_fb; for (auto& value : args.objects_) { - const auto* tensor_desc = dynamic_cast( - value.second.descriptor.get()); + const auto* tensor_desc = + dynamic_cast(value.second.get()); if (!tensor_desc) continue; auto desc_fb = Encode(*tensor_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -720,7 +691,7 @@ flatbuffers::Offset Encode( tensor_objs_fb; for (auto& value : args.objects_) { const auto* tensor_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!tensor_desc) continue; auto desc_fb = Encode(*tensor_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -734,7 +705,7 @@ flatbuffers::Offset Encode( buffer_refs_fb; for (auto& value : args.object_refs_) { const auto* buffer_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!buffer_desc) continue; auto desc_fb = Encode(*buffer_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -747,7 +718,7 @@ flatbuffers::Offset Encode( texture2d_refs_fb; for (auto& value : args.object_refs_) { const auto* texture_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!texture_desc) continue; auto desc_fb = Encode(*texture_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -759,8 +730,8 @@ flatbuffers::Offset Encode( std::vector> tensor_linear_refs_fb; for (auto& value : args.object_refs_) { - const auto* tensor_desc = dynamic_cast( - value.second.descriptor.get()); + const auto* tensor_desc = + dynamic_cast(value.second.get()); if (!tensor_desc) continue; auto desc_fb = Encode(*tensor_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -773,7 +744,7 @@ flatbuffers::Offset Encode( tensor_refs_fb; for (auto& value : args.object_refs_) { const auto* tensor_desc = - dynamic_cast(value.second.descriptor.get()); + dynamic_cast(value.second.get()); if (!tensor_desc) continue; auto desc_fb = Encode(*tensor_desc, builder); auto key_fb = builder->CreateString(value.first); @@ -783,14 +754,6 @@ flatbuffers::Offset Encode( tensor_refs_fb.push_back(ten_map_builder.Finish()); } - auto shared_int4s_data_fb = builder->CreateVector(args.shared_int4s_data_); - auto shared_float4s_data_fb = - builder->CreateVector(args.shared_float4s_data_); - std::vector tmp(args.shared_half4s_data_.size()); - for (int i = 0; i < tmp.size(); ++i) { - tmp[i] = args.shared_half4s_data_[i]; - } - auto shared_half4s_data_fb = builder->CreateVector(tmp); auto int_values_fb_vec = builder->CreateVector(int_values_fb); auto float_values_fb_vec = builder->CreateVector(float_values_fb); auto half_values_fb_vec = builder->CreateVector(half_values_fb); @@ -803,9 +766,6 @@ flatbuffers::Offset Encode( auto tensor_linear_refs_fb_vec = builder->CreateVector(tensor_linear_refs_fb); auto tensor_refs_fb_vec = builder->CreateVector(tensor_refs_fb); data::ArgumentsBuilder arguments_builder(*builder); - arguments_builder.add_shared_int4s(shared_int4s_data_fb); - arguments_builder.add_shared_float4s(shared_float4s_data_fb); - arguments_builder.add_shared_half4s(shared_half4s_data_fb); arguments_builder.add_int_values(int_values_fb_vec); arguments_builder.add_float_values(float_values_fb_vec); arguments_builder.add_half_values(half_values_fb_vec); @@ -820,9 +780,8 @@ flatbuffers::Offset Encode( return arguments_builder.Finish(); } -absl::Status Decode(CLContext* context, const data::GPUOperation* fb_op, - GPUOperation* op) { - RETURN_IF_ERROR(Decode(context, fb_op->arguments(), &op->args_)); +absl::Status Decode(const data::GPUOperation* fb_op, GPUOperation* op) { + RETURN_IF_ERROR(Decode(fb_op->arguments(), &op->args_)); op->code_ = std::string(fb_op->code()->c_str(), fb_op->code()->size()); op->work_group_size_.x = fb_op->work_group_size()->x(); op->work_group_size_.y = fb_op->work_group_size()->y(); @@ -935,10 +894,9 @@ flatbuffers::Offset Encode( return node_builder.Finish(); } -absl::Status Decode(CLContext* context, const data::CLNode* fb_node, - CLNode* node) { +absl::Status Decode(const data::CLNode* fb_node, CLNode* node) { GPUOperation op; - RETURN_IF_ERROR(Decode(context, fb_node->gpu_op(), &op)); + RETURN_IF_ERROR(Decode(fb_node->gpu_op(), &op)); node->operation = absl::make_unique(std::move(op)); for (auto in_fb : *fb_node->input_ids()) { node->inputs.push_back(in_fb); @@ -1006,8 +964,7 @@ flatbuffers::Offset Encode( return inf_builder.Finish(); } -absl::Status Decode(CLContext* context, - const data::InferenceContext* fb_inference, +absl::Status Decode(const data::InferenceContext* fb_inference, InferenceContext* inference) { inference->need_flush_ = fb_inference->need_flush(); inference->flush_periodically_ = fb_inference->flush_periodically(); @@ -1019,7 +976,7 @@ absl::Status Decode(CLContext* context, inference->nodes_.resize(fb_inference->nodes()->size()); int counter = 0; for (auto node_fb : *fb_inference->nodes()) { - RETURN_IF_ERROR(Decode(context, node_fb, &inference->nodes_[counter])); + RETURN_IF_ERROR(Decode(node_fb, &inference->nodes_[counter])); counter++; } diff --git a/tensorflow/lite/delegates/gpu/cl/serialization.fbs b/tensorflow/lite/delegates/gpu/cl/serialization.fbs index 0c0d2241b5a..13b9df69b36 100644 --- a/tensorflow/lite/delegates/gpu/cl/serialization.fbs +++ b/tensorflow/lite/delegates/gpu/cl/serialization.fbs @@ -36,22 +36,18 @@ table IntValue { name:string; value:int32; active:bool; - offset:uint32; } table FloatValue { name:string; value:float; active:bool; - offset:uint32; } table HalfValue { name:string; value:float; active:bool; - store_as_f32:bool; - offset:uint32; } enum AccessType : byte { @@ -172,13 +168,8 @@ table TensorDescriptorMapValue { table Arguments { int_values:[IntValue]; - shared_int4s:[int32]; - float_values:[FloatValue]; - shared_float4s:[float]; - half_values:[HalfValue]; - shared_half4s:[float]; buffer_refs:[BufferDescriptorMapValue]; texture2d_refs:[Texture2DDescriptorMapValue];