Implemented: GlPersistentBuffer, GlShaderSync
PiperOrigin-RevId: 249544610
This commit is contained in:
parent
c582d56010
commit
87ae6fb8ee
@ -260,8 +260,10 @@ cc_library(
|
||||
srcs = ["gl_sync.cc"],
|
||||
hdrs = ["gl_sync.h"],
|
||||
deps = [
|
||||
":gl_buffer",
|
||||
":gl_call",
|
||||
":gl_errors",
|
||||
":gl_program",
|
||||
":portable",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
],
|
||||
|
@ -84,6 +84,54 @@ GlBuffer GlBuffer::MakeRef() {
|
||||
/* has_ownership = */ false);
|
||||
}
|
||||
|
||||
GlPersistentBuffer::GlPersistentBuffer(GLenum target, GLuint id,
|
||||
size_t bytes_size, size_t offset,
|
||||
bool has_ownership, void* data)
|
||||
: GlBuffer(target, id, bytes_size, offset, has_ownership), data_(data) {}
|
||||
|
||||
GlPersistentBuffer::GlPersistentBuffer()
|
||||
: GlPersistentBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false,
|
||||
nullptr) {}
|
||||
|
||||
GlPersistentBuffer::GlPersistentBuffer(GlPersistentBuffer&& buffer)
|
||||
: GlBuffer(std::move(buffer)), data_(buffer.data_) {}
|
||||
|
||||
GlPersistentBuffer& GlPersistentBuffer::operator=(GlPersistentBuffer&& buffer) {
|
||||
if (this != &buffer) {
|
||||
data_ = buffer.data_;
|
||||
GlBuffer::operator=(std::move(buffer));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
GlPersistentBuffer::~GlPersistentBuffer() {
|
||||
if (!data_) return;
|
||||
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id());
|
||||
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
|
||||
}
|
||||
|
||||
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer) {
|
||||
PFNGLBUFFERSTORAGEEXTPROC glBufferStorageEXT = nullptr;
|
||||
glBufferStorageEXT = reinterpret_cast<PFNGLBUFFERSTORAGEEXTPROC>(
|
||||
eglGetProcAddress("glBufferStorageEXT"));
|
||||
if (!glBufferStorageEXT) {
|
||||
return UnavailableError("glBufferStorageEXT is not supported");
|
||||
}
|
||||
gl_buffer_internal::BufferId id;
|
||||
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
|
||||
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
|
||||
glBufferStorageEXT, GL_SHADER_STORAGE_BUFFER, size, nullptr,
|
||||
GL_MAP_COHERENT_BIT_EXT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
|
||||
GL_MAP_PERSISTENT_BIT_EXT));
|
||||
void* data = nullptr;
|
||||
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
|
||||
glMapBufferRange, &data, GL_SHADER_STORAGE_BUFFER, 0, size,
|
||||
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT));
|
||||
*gl_buffer = GlPersistentBuffer{
|
||||
GL_SHADER_STORAGE_BUFFER, id.Release(), size, 0, true, data};
|
||||
return OkStatus();
|
||||
}
|
||||
|
||||
} // namespace gl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -141,6 +141,32 @@ Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
|
||||
absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
|
||||
}
|
||||
|
||||
// Persistent buffer provides CPU pointer to the buffer that is valid all the
|
||||
// time. A user should properly synchronize the access to the buffer on CPU and
|
||||
// GPU sides.
|
||||
class GlPersistentBuffer : public GlBuffer {
|
||||
public:
|
||||
GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
|
||||
bool has_ownership, void* data);
|
||||
GlPersistentBuffer();
|
||||
|
||||
// Move-only
|
||||
GlPersistentBuffer(GlPersistentBuffer&& buffer);
|
||||
GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
|
||||
GlPersistentBuffer(const GlPersistentBuffer&) = delete;
|
||||
GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
|
||||
|
||||
~GlPersistentBuffer();
|
||||
|
||||
void* data() { return data_; }
|
||||
|
||||
private:
|
||||
void* data_;
|
||||
};
|
||||
|
||||
// Creates read-write persistent buffer with valid CPU pointer
|
||||
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementation details are below.
|
||||
|
||||
|
@ -78,6 +78,46 @@ Status GlActiveSyncWait() {
|
||||
}
|
||||
}
|
||||
|
||||
Status GlShaderSync::NewSync(GlShaderSync* gl_sync) {
|
||||
GlShaderSync sync;
|
||||
RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_));
|
||||
static const std::string* kCode = new std::string(R"(#version 310 es
|
||||
layout(local_size_x = 1, local_size_y = 1) in;
|
||||
layout(std430) buffer;
|
||||
layout(binding = 0) buffer Output {
|
||||
int elements[];
|
||||
} output_data;
|
||||
void main() {
|
||||
output_data.elements[0] = 1;
|
||||
})");
|
||||
GlShader shader;
|
||||
RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader));
|
||||
RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_));
|
||||
*gl_sync = std::move(sync);
|
||||
return OkStatus();
|
||||
}
|
||||
|
||||
// How it works: GPU writes a buffer and CPU checks the buffer value to be
|
||||
// changed. The buffer is accessible for writing by GPU and reading by CPU
|
||||
// simultaneously - persistent buffer or buffer across shild context can be used
|
||||
// for that.
|
||||
Status GlShaderSync::Wait() {
|
||||
if (!flag_buffer_.is_valid()) {
|
||||
return UnavailableError("GlShaderSync is not initialized.");
|
||||
}
|
||||
RETURN_IF_ERROR(flag_buffer_.BindToIndex(0));
|
||||
volatile int* flag_ptr_ = reinterpret_cast<int*>(flag_buffer_.data());
|
||||
*flag_ptr_ = 0;
|
||||
RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1}));
|
||||
// glFlush must be called to upload GPU task. Adreno won't start executing
|
||||
// the task without glFlush.
|
||||
glFlush();
|
||||
// Wait for the value is being updated by the shader.
|
||||
while (*flag_ptr_ != 1) {
|
||||
}
|
||||
return OkStatus();
|
||||
}
|
||||
|
||||
} // namespace gl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -17,7 +17,9 @@ limitations under the License.
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
|
||||
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
|
||||
|
||||
namespace tflite {
|
||||
@ -75,10 +77,28 @@ class GlSync {
|
||||
// Waits until GPU is done with processing.
|
||||
Status GlSyncWait();
|
||||
|
||||
// Performs active waiting by spinning a thread and checking sync status. It
|
||||
// leads to shorter wait time (up to tens of ms) but consumes more CPU.
|
||||
// Waits until all comands are flushed and then performs active waiting by
|
||||
// spinning a thread and checking sync status. It leads to shorter wait time
|
||||
// (up to tens of ms) but consumes more CPU.
|
||||
Status GlActiveSyncWait();
|
||||
|
||||
// Performs the best available minimum latency finish. A calling thread is not
|
||||
// going to sleep keeping active busy wait.
|
||||
// 1) CPU checks the value in the buffer that is going to be written by GPU. The
|
||||
// persistent buffer is used if the extension is available.
|
||||
// 2) glSync is checked for the signalling state in a loop.
|
||||
// 3) glFinish() is performed if all other methods are not available
|
||||
class GlShaderSync {
|
||||
public:
|
||||
static Status NewSync(GlShaderSync* gl_sync);
|
||||
GlShaderSync() {}
|
||||
Status Wait();
|
||||
|
||||
private:
|
||||
GlProgram flag_program_;
|
||||
GlPersistentBuffer flag_buffer_;
|
||||
};
|
||||
|
||||
} // namespace gl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
Loading…
Reference in New Issue
Block a user