Implemented: GlPersistentBuffer, GlShaderSync

PiperOrigin-RevId: 249544610
This commit is contained in:
A. Unique TensorFlower 2019-05-22 16:27:51 -07:00 committed by TensorFlower Gardener
parent c582d56010
commit 87ae6fb8ee
5 changed files with 138 additions and 2 deletions

View File

@ -260,8 +260,10 @@ cc_library(
srcs = ["gl_sync.cc"], srcs = ["gl_sync.cc"],
hdrs = ["gl_sync.h"], hdrs = ["gl_sync.h"],
deps = [ deps = [
":gl_buffer",
":gl_call", ":gl_call",
":gl_errors", ":gl_errors",
":gl_program",
":portable", ":portable",
"//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:status",
], ],

View File

@ -84,6 +84,54 @@ GlBuffer GlBuffer::MakeRef() {
/* has_ownership = */ false); /* has_ownership = */ false);
} }
GlPersistentBuffer::GlPersistentBuffer(GLenum target, GLuint id,
size_t bytes_size, size_t offset,
bool has_ownership, void* data)
: GlBuffer(target, id, bytes_size, offset, has_ownership), data_(data) {}
GlPersistentBuffer::GlPersistentBuffer()
: GlPersistentBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false,
nullptr) {}
GlPersistentBuffer::GlPersistentBuffer(GlPersistentBuffer&& buffer)
: GlBuffer(std::move(buffer)), data_(buffer.data_) {}
GlPersistentBuffer& GlPersistentBuffer::operator=(GlPersistentBuffer&& buffer) {
if (this != &buffer) {
data_ = buffer.data_;
GlBuffer::operator=(std::move(buffer));
}
return *this;
}
GlPersistentBuffer::~GlPersistentBuffer() {
if (!data_) return;
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id());
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer) {
PFNGLBUFFERSTORAGEEXTPROC glBufferStorageEXT = nullptr;
glBufferStorageEXT = reinterpret_cast<PFNGLBUFFERSTORAGEEXTPROC>(
eglGetProcAddress("glBufferStorageEXT"));
if (!glBufferStorageEXT) {
return UnavailableError("glBufferStorageEXT is not supported");
}
gl_buffer_internal::BufferId id;
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
glBufferStorageEXT, GL_SHADER_STORAGE_BUFFER, size, nullptr,
GL_MAP_COHERENT_BIT_EXT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
GL_MAP_PERSISTENT_BIT_EXT));
void* data = nullptr;
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
glMapBufferRange, &data, GL_SHADER_STORAGE_BUFFER, 0, size,
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT));
*gl_buffer = GlPersistentBuffer{
GL_SHADER_STORAGE_BUFFER, id.Release(), size, 0, true, data};
return OkStatus();
}
} // namespace gl } // namespace gl
} // namespace gpu } // namespace gpu
} // namespace tflite } // namespace tflite

View File

@ -141,6 +141,32 @@ Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
absl::MakeSpan(data->data() + data->size() - num_elements, num_elements)); absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
} }
// Persistent buffer provides CPU pointer to the buffer that is valid all the
// time. A user should properly synchronize the access to the buffer on CPU and
// GPU sides.
class GlPersistentBuffer : public GlBuffer {
public:
GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
bool has_ownership, void* data);
GlPersistentBuffer();
// Move-only
GlPersistentBuffer(GlPersistentBuffer&& buffer);
GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
GlPersistentBuffer(const GlPersistentBuffer&) = delete;
GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
~GlPersistentBuffer();
void* data() { return data_; }
private:
void* data_;
};
// Creates read-write persistent buffer with valid CPU pointer
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Implementation details are below. // Implementation details are below.

View File

@ -78,6 +78,46 @@ Status GlActiveSyncWait() {
} }
} }
Status GlShaderSync::NewSync(GlShaderSync* gl_sync) {
GlShaderSync sync;
RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_));
static const std::string* kCode = new std::string(R"(#version 310 es
layout(local_size_x = 1, local_size_y = 1) in;
layout(std430) buffer;
layout(binding = 0) buffer Output {
int elements[];
} output_data;
void main() {
output_data.elements[0] = 1;
})");
GlShader shader;
RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader));
RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_));
*gl_sync = std::move(sync);
return OkStatus();
}
// How it works: GPU writes a buffer and CPU checks the buffer value to be
// changed. The buffer is accessible for writing by GPU and reading by CPU
// simultaneously - persistent buffer or buffer across shild context can be used
// for that.
Status GlShaderSync::Wait() {
if (!flag_buffer_.is_valid()) {
return UnavailableError("GlShaderSync is not initialized.");
}
RETURN_IF_ERROR(flag_buffer_.BindToIndex(0));
volatile int* flag_ptr_ = reinterpret_cast<int*>(flag_buffer_.data());
*flag_ptr_ = 0;
RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1}));
// glFlush must be called to upload GPU task. Adreno won't start executing
// the task without glFlush.
glFlush();
// Wait for the value is being updated by the shader.
while (*flag_ptr_ != 1) {
}
return OkStatus();
}
} // namespace gl } // namespace gl
} // namespace gpu } // namespace gpu
} // namespace tflite } // namespace tflite

View File

@ -17,7 +17,9 @@ limitations under the License.
#define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_
#include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h" #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
namespace tflite { namespace tflite {
@ -75,10 +77,28 @@ class GlSync {
// Waits until GPU is done with processing. // Waits until GPU is done with processing.
Status GlSyncWait(); Status GlSyncWait();
// Performs active waiting by spinning a thread and checking sync status. It // Waits until all comands are flushed and then performs active waiting by
// leads to shorter wait time (up to tens of ms) but consumes more CPU. // spinning a thread and checking sync status. It leads to shorter wait time
// (up to tens of ms) but consumes more CPU.
Status GlActiveSyncWait(); Status GlActiveSyncWait();
// Performs the best available minimum latency finish. A calling thread is not
// going to sleep keeping active busy wait.
// 1) CPU checks the value in the buffer that is going to be written by GPU. The
// persistent buffer is used if the extension is available.
// 2) glSync is checked for the signalling state in a loop.
// 3) glFinish() is performed if all other methods are not available
class GlShaderSync {
public:
static Status NewSync(GlShaderSync* gl_sync);
GlShaderSync() {}
Status Wait();
private:
GlProgram flag_program_;
GlPersistentBuffer flag_buffer_;
};
} // namespace gl } // namespace gl
} // namespace gpu } // namespace gpu
} // namespace tflite } // namespace tflite