Implemented: GlPersistentBuffer, GlShaderSync

PiperOrigin-RevId: 249544610
2019-05-22 16:27:51 -07:00 · 2019-05-22 16:27:51 -07:00 · 87ae6fb8ee
commit 87ae6fb8ee
parent c582d56010
5 changed files with 138 additions and 2 deletions
--- a/tensorflow/lite/delegates/gpu/gl/BUILD
+++ b/tensorflow/lite/delegates/gpu/gl/BUILD
@ -260,8 +260,10 @@ cc_library(
    srcs = ["gl_sync.cc"],
    hdrs = ["gl_sync.h"],
    deps = [
        ":gl_buffer",
        ":gl_call",
        ":gl_errors",
        ":gl_program",
        ":portable",
        "//tensorflow/lite/delegates/gpu/common:status",
    ],
--- a/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc
+++ b/tensorflow/lite/delegates/gpu/gl/gl_buffer.cc
@ -84,6 +84,54 @@ GlBuffer GlBuffer::MakeRef() {
                  /* has_ownership = */ false);
 }
 GlPersistentBuffer::GlPersistentBuffer(GLenum target, GLuint id,
                                       size_t bytes_size, size_t offset,
                                       bool has_ownership, void* data)
    : GlBuffer(target, id, bytes_size, offset, has_ownership), data_(data) {}
 GlPersistentBuffer::GlPersistentBuffer()
    : GlPersistentBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false,
                         nullptr) {}
 GlPersistentBuffer::GlPersistentBuffer(GlPersistentBuffer&& buffer)
    : GlBuffer(std::move(buffer)), data_(buffer.data_) {}
 GlPersistentBuffer& GlPersistentBuffer::operator=(GlPersistentBuffer&& buffer) {
  if (this != &buffer) {
    data_ = buffer.data_;
    GlBuffer::operator=(std::move(buffer));
  }
  return *this;
 }
 GlPersistentBuffer::~GlPersistentBuffer() {
  if (!data_) return;
  gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id());
  glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
 }
 Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer) {
  PFNGLBUFFERSTORAGEEXTPROC glBufferStorageEXT = nullptr;
  glBufferStorageEXT = reinterpret_cast<PFNGLBUFFERSTORAGEEXTPROC>(
      eglGetProcAddress("glBufferStorageEXT"));
  if (!glBufferStorageEXT) {
    return UnavailableError("glBufferStorageEXT is not supported");
  }
  gl_buffer_internal::BufferId id;
  gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
  RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
      glBufferStorageEXT, GL_SHADER_STORAGE_BUFFER, size, nullptr,
      GL_MAP_COHERENT_BIT_EXT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
          GL_MAP_PERSISTENT_BIT_EXT));
  void* data = nullptr;
  RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
      glMapBufferRange, &data, GL_SHADER_STORAGE_BUFFER, 0, size,
      GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT));
  *gl_buffer = GlPersistentBuffer{
      GL_SHADER_STORAGE_BUFFER, id.Release(), size, 0, true, data};
  return OkStatus();
 }
 }  // namespace gl
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/gl/gl_buffer.h
+++ b/tensorflow/lite/delegates/gpu/gl/gl_buffer.h
@ -141,6 +141,32 @@ Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
      absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
 }
 // Persistent buffer provides CPU pointer to the buffer that is valid all the
 // time. A user should properly synchronize the access to the buffer on CPU and
 // GPU sides.
 class GlPersistentBuffer : public GlBuffer {
 public:
  GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
                     bool has_ownership, void* data);
  GlPersistentBuffer();
  // Move-only
  GlPersistentBuffer(GlPersistentBuffer&& buffer);
  GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
  GlPersistentBuffer(const GlPersistentBuffer&) = delete;
  GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
  ~GlPersistentBuffer();
  void* data() { return data_; }
 private:
  void* data_;
 };
 // Creates read-write persistent buffer with valid CPU pointer
 Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
 ////////////////////////////////////////////////////////////////////////////////
 // Implementation details are below.
--- a/tensorflow/lite/delegates/gpu/gl/gl_sync.cc
+++ b/tensorflow/lite/delegates/gpu/gl/gl_sync.cc
@ -78,6 +78,46 @@ Status GlActiveSyncWait() {
  }
 }
 Status GlShaderSync::NewSync(GlShaderSync* gl_sync) {
  GlShaderSync sync;
  RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_));
  static const std::string* kCode = new std::string(R"(#version 310 es
  layout(local_size_x = 1, local_size_y = 1) in;
  layout(std430) buffer;
  layout(binding = 0) buffer Output {
    int elements[];
  } output_data;
  void main() {
    output_data.elements[0] = 1;
  })");
  GlShader shader;
  RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader));
  RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_));
  *gl_sync = std::move(sync);
  return OkStatus();
 }
 // How it works: GPU writes a buffer and CPU checks the buffer value to be
 // changed. The buffer is accessible for writing by GPU and reading by CPU
 // simultaneously - persistent buffer or buffer across shild context can be used
 // for that.
 Status GlShaderSync::Wait() {
  if (!flag_buffer_.is_valid()) {
    return UnavailableError("GlShaderSync is not initialized.");
  }
  RETURN_IF_ERROR(flag_buffer_.BindToIndex(0));
  volatile int* flag_ptr_ = reinterpret_cast<int*>(flag_buffer_.data());
  *flag_ptr_ = 0;
  RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1}));
  // glFlush must be called to upload GPU task. Adreno won't start executing
  // the task without glFlush.
  glFlush();
  // Wait for the value is being updated by the shader.
  while (*flag_ptr_ != 1) {
  }
  return OkStatus();
 }
 }  // namespace gl
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/gl/gl_sync.h
+++ b/tensorflow/lite/delegates/gpu/gl/gl_sync.h
@ -17,7 +17,9 @@ limitations under the License.
 #define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
 #include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
 namespace tflite {
@ -75,10 +77,28 @@ class GlSync {
 // Waits until GPU is done with processing.
 Status GlSyncWait();
-// Performs active waiting by spinning a thread and checking sync status. It
+// Waits until all comands are flushed and then performs active waiting by
-// leads to shorter wait time (up to tens of ms) but consumes more CPU.
+// spinning a thread and checking sync status. It leads to shorter wait time
 // (up to tens of ms) but consumes more CPU.
 Status GlActiveSyncWait();
 // Performs the best available minimum latency finish. A calling thread is not
 // going to sleep keeping active busy wait.
 // 1) CPU checks the value in the buffer that is going to be written by GPU. The
 //    persistent buffer is used if the extension is available.
 // 2) glSync is checked for the signalling state in a loop.
 // 3) glFinish() is performed if all other methods are not available
 class GlShaderSync {
 public:
  static Status NewSync(GlShaderSync* gl_sync);
  GlShaderSync() {}
  Status Wait();
 private:
  GlProgram flag_program_;
  GlPersistentBuffer flag_buffer_;
 };
 }  // namespace gl
 }  // namespace gpu
 }  // namespace tflite