STT-tensorflow/tensorflow/lite/delegates/gpu/cl/texture2d.h
Raman Sarokin ff9594c4f7 Added support of int types to Texture2DDescriptor.
PiperOrigin-RevId: 333550534
Change-Id: Ic580aa6fd19ec9fa20563c2018f8833a03ac18bc
2020-09-24 11:12:07 -07:00

162 lines
6.0 KiB
C++

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_TEXTURE2D_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_TEXTURE2D_H_
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace tflite {
namespace gpu {
namespace cl {
struct Texture2DDescriptor : public GPUObjectDescriptor {
DataType element_type;
bool normalized = false; // used with INT data types, if normalized, we read
// in kernel float data.
DataType normalized_type; // can be FLOAT32 or FLOAT16, using with normalized
// = true
// optional
int2 size = int2(0, 0);
std::vector<uint8_t> data;
Texture2DDescriptor() = default;
Texture2DDescriptor(const Texture2DDescriptor&) = default;
Texture2DDescriptor& operator=(const Texture2DDescriptor&) = default;
Texture2DDescriptor(Texture2DDescriptor&& desc);
Texture2DDescriptor& operator=(Texture2DDescriptor&& desc);
absl::Status PerformSelector(const std::string& selector,
const std::vector<std::string>& args,
const std::vector<std::string>& template_args,
std::string* result) const override;
GPUResources GetGPUResources() const override;
absl::Status PerformReadSelector(const std::vector<std::string>& args,
std::string* result) const;
absl::Status CreateGPUObject(CLContext* context,
GPUObjectPtr* result) const override;
void Release() override;
};
// Texture2D represent formatted GPU data storage.
// Texture2D is moveable but not copyable.
class Texture2D : public GPUObject {
public:
Texture2D() {} // just for using Texture2D as a class members
Texture2D(cl_mem texture, int width, int height, cl_channel_type type);
// Move only
Texture2D(Texture2D&& texture);
Texture2D& operator=(Texture2D&& texture);
Texture2D(const Texture2D&) = delete;
Texture2D& operator=(const Texture2D&) = delete;
virtual ~Texture2D() { Release(); }
cl_mem GetMemoryPtr() const { return texture_; }
// Writes data to a texture. Data should point to a region that
// has exact width * height * sizeof(pixel) bytes.
template <typename T>
absl::Status WriteData(CLCommandQueue* queue, const absl::Span<T> data);
// Reads data from Texture2D into CPU memory.
template <typename T>
absl::Status ReadData(CLCommandQueue* queue, std::vector<T>* result) const;
absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr,
GPUResourcesWithValue* resources) const override;
absl::Status CreateFromTexture2DDescriptor(const Texture2DDescriptor& desc,
CLContext* context);
private:
void Release();
cl_mem texture_ = nullptr;
int width_;
int height_;
cl_channel_type channel_type_;
};
using Texture2DPtr = std::shared_ptr<Texture2D>;
// Creates new 4-channel 2D texture with f32 elements
absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext* context,
Texture2D* result);
// Creates new 4-channel 2D texture with f16 elements
absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext* context,
Texture2D* result);
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
CLContext* context, Texture2D* result);
absl::Status CreateTexture2DRGBA(DataType type, int width, int height,
void* data, CLContext* context,
Texture2D* result);
template <typename T>
absl::Status Texture2D::WriteData(CLCommandQueue* queue,
const absl::Span<T> data) {
const int element_size = ChannelTypeToSizeInBytes(channel_type_);
if (sizeof(T) % element_size != 0) {
return absl::InvalidArgumentError(
"Template type T has not suitable element type for created texture.");
}
if (4 * width_ * height_ * element_size != data.size() * sizeof(T)) {
return absl::InvalidArgumentError(
"absl::Span<T> data size is different from texture allocated size.");
}
RETURN_IF_ERROR(queue->EnqueueWriteImage(texture_, int3(width_, height_, 1),
data.data()));
return absl::OkStatus();
}
template <typename T>
absl::Status Texture2D::ReadData(CLCommandQueue* queue,
std::vector<T>* result) const {
const int element_size = ChannelTypeToSizeInBytes(channel_type_);
if (sizeof(T) != element_size) {
return absl::InvalidArgumentError("Pixel format is different.");
}
const int elements_count = width_ * height_ * 4;
result->resize(elements_count);
return queue->EnqueueReadImage(texture_, int3(width_, height_, 1),
result->data());
}
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_TEXTURE2D_H_