From 87ae6fb8ee46524761e9d163e1863811a4afec2d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 May 2019 16:27:51 -0700 Subject: [PATCH] Implemented: GlPersistentBuffer, GlShaderSync PiperOrigin-RevId: 249544610 --- tensorflow/lite/delegates/gpu/gl/BUILD | 2 + tensorflow/lite/delegates/gpu/gl/gl_buffer.cc | 48 +++++++++++++++++++ tensorflow/lite/delegates/gpu/gl/gl_buffer.h | 26 ++++++++++ tensorflow/lite/delegates/gpu/gl/gl_sync.cc | 40 ++++++++++++++++ tensorflow/lite/delegates/gpu/gl/gl_sync.h | 24 +++++++++- 5 files changed, 138 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/gl/BUILD b/tensorflow/lite/delegates/gpu/gl/BUILD index 9a47a35c1ae..519f1b245be 100644 --- a/tensorflow/lite/delegates/gpu/gl/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/BUILD @@ -260,8 +260,10 @@ cc_library( srcs = ["gl_sync.cc"], hdrs = ["gl_sync.h"], deps = [ + ":gl_buffer", ":gl_call", ":gl_errors", + ":gl_program", ":portable", "//tensorflow/lite/delegates/gpu/common:status", ], diff --git a/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc b/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc index 6e5e8afa364..86c3c59639f 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc +++ b/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc @@ -84,6 +84,54 @@ GlBuffer GlBuffer::MakeRef() { /* has_ownership = */ false); } +GlPersistentBuffer::GlPersistentBuffer(GLenum target, GLuint id, + size_t bytes_size, size_t offset, + bool has_ownership, void* data) + : GlBuffer(target, id, bytes_size, offset, has_ownership), data_(data) {} + +GlPersistentBuffer::GlPersistentBuffer() + : GlPersistentBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false, + nullptr) {} + +GlPersistentBuffer::GlPersistentBuffer(GlPersistentBuffer&& buffer) + : GlBuffer(std::move(buffer)), data_(buffer.data_) {} + +GlPersistentBuffer& GlPersistentBuffer::operator=(GlPersistentBuffer&& buffer) { + if (this != &buffer) { + data_ = buffer.data_; + GlBuffer::operator=(std::move(buffer)); + } + return *this; +} + +GlPersistentBuffer::~GlPersistentBuffer() { + if (!data_) return; + gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id()); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); +} + +Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer) { + PFNGLBUFFERSTORAGEEXTPROC glBufferStorageEXT = nullptr; + glBufferStorageEXT = reinterpret_cast( + eglGetProcAddress("glBufferStorageEXT")); + if (!glBufferStorageEXT) { + return UnavailableError("glBufferStorageEXT is not supported"); + } + gl_buffer_internal::BufferId id; + gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id()); + RETURN_IF_ERROR(TFLITE_GPU_CALL_GL( + glBufferStorageEXT, GL_SHADER_STORAGE_BUFFER, size, nullptr, + GL_MAP_COHERENT_BIT_EXT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | + GL_MAP_PERSISTENT_BIT_EXT)); + void* data = nullptr; + RETURN_IF_ERROR(TFLITE_GPU_CALL_GL( + glMapBufferRange, &data, GL_SHADER_STORAGE_BUFFER, 0, size, + GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)); + *gl_buffer = GlPersistentBuffer{ + GL_SHADER_STORAGE_BUFFER, id.Release(), size, 0, true, data}; + return OkStatus(); +} + } // namespace gl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/gl_buffer.h b/tensorflow/lite/delegates/gpu/gl/gl_buffer.h index 5897499598c..019022a5baa 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_buffer.h +++ b/tensorflow/lite/delegates/gpu/gl/gl_buffer.h @@ -141,6 +141,32 @@ Status AppendFromBuffer(const GlBuffer& buffer, std::vector* data) { absl::MakeSpan(data->data() + data->size() - num_elements, num_elements)); } +// Persistent buffer provides CPU pointer to the buffer that is valid all the +// time. A user should properly synchronize the access to the buffer on CPU and +// GPU sides. +class GlPersistentBuffer : public GlBuffer { + public: + GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset, + bool has_ownership, void* data); + GlPersistentBuffer(); + + // Move-only + GlPersistentBuffer(GlPersistentBuffer&& buffer); + GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer); + GlPersistentBuffer(const GlPersistentBuffer&) = delete; + GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete; + + ~GlPersistentBuffer(); + + void* data() { return data_; } + + private: + void* data_; +}; + +// Creates read-write persistent buffer with valid CPU pointer +Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer); + //////////////////////////////////////////////////////////////////////////////// // Implementation details are below. diff --git a/tensorflow/lite/delegates/gpu/gl/gl_sync.cc b/tensorflow/lite/delegates/gpu/gl/gl_sync.cc index 889e8dda428..92caaa5c78a 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_sync.cc +++ b/tensorflow/lite/delegates/gpu/gl/gl_sync.cc @@ -78,6 +78,46 @@ Status GlActiveSyncWait() { } } +Status GlShaderSync::NewSync(GlShaderSync* gl_sync) { + GlShaderSync sync; + RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_)); + static const std::string* kCode = new std::string(R"(#version 310 es + layout(local_size_x = 1, local_size_y = 1) in; + layout(std430) buffer; + layout(binding = 0) buffer Output { + int elements[]; + } output_data; + void main() { + output_data.elements[0] = 1; + })"); + GlShader shader; + RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader)); + RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_)); + *gl_sync = std::move(sync); + return OkStatus(); +} + +// How it works: GPU writes a buffer and CPU checks the buffer value to be +// changed. The buffer is accessible for writing by GPU and reading by CPU +// simultaneously - persistent buffer or buffer across shild context can be used +// for that. +Status GlShaderSync::Wait() { + if (!flag_buffer_.is_valid()) { + return UnavailableError("GlShaderSync is not initialized."); + } + RETURN_IF_ERROR(flag_buffer_.BindToIndex(0)); + volatile int* flag_ptr_ = reinterpret_cast(flag_buffer_.data()); + *flag_ptr_ = 0; + RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1})); + // glFlush must be called to upload GPU task. Adreno won't start executing + // the task without glFlush. + glFlush(); + // Wait for the value is being updated by the shader. + while (*flag_ptr_ != 1) { + } + return OkStatus(); +} + } // namespace gl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/gl_sync.h b/tensorflow/lite/delegates/gpu/gl/gl_sync.h index a00a0c2b048..10bab988d5e 100644 --- a/tensorflow/lite/delegates/gpu/gl/gl_sync.h +++ b/tensorflow/lite/delegates/gpu/gl/gl_sync.h @@ -17,7 +17,9 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_ #include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_call.h" +#include "tensorflow/lite/delegates/gpu/gl/gl_program.h" #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" namespace tflite { @@ -75,10 +77,28 @@ class GlSync { // Waits until GPU is done with processing. Status GlSyncWait(); -// Performs active waiting by spinning a thread and checking sync status. It -// leads to shorter wait time (up to tens of ms) but consumes more CPU. +// Waits until all comands are flushed and then performs active waiting by +// spinning a thread and checking sync status. It leads to shorter wait time +// (up to tens of ms) but consumes more CPU. Status GlActiveSyncWait(); +// Performs the best available minimum latency finish. A calling thread is not +// going to sleep keeping active busy wait. +// 1) CPU checks the value in the buffer that is going to be written by GPU. The +// persistent buffer is used if the extension is available. +// 2) glSync is checked for the signalling state in a loop. +// 3) glFinish() is performed if all other methods are not available +class GlShaderSync { + public: + static Status NewSync(GlShaderSync* gl_sync); + GlShaderSync() {} + Status Wait(); + + private: + GlProgram flag_program_; + GlPersistentBuffer flag_buffer_; +}; + } // namespace gl } // namespace gpu } // namespace tflite