Implemented: GlPersistentBuffer, GlShaderSync
PiperOrigin-RevId: 249544610
This commit is contained in:
parent
c582d56010
commit
87ae6fb8ee
@ -260,8 +260,10 @@ cc_library(
|
|||||||
srcs = ["gl_sync.cc"],
|
srcs = ["gl_sync.cc"],
|
||||||
hdrs = ["gl_sync.h"],
|
hdrs = ["gl_sync.h"],
|
||||||
deps = [
|
deps = [
|
||||||
|
":gl_buffer",
|
||||||
":gl_call",
|
":gl_call",
|
||||||
":gl_errors",
|
":gl_errors",
|
||||||
|
":gl_program",
|
||||||
":portable",
|
":portable",
|
||||||
"//tensorflow/lite/delegates/gpu/common:status",
|
"//tensorflow/lite/delegates/gpu/common:status",
|
||||||
],
|
],
|
||||||
|
@ -84,6 +84,54 @@ GlBuffer GlBuffer::MakeRef() {
|
|||||||
/* has_ownership = */ false);
|
/* has_ownership = */ false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GlPersistentBuffer::GlPersistentBuffer(GLenum target, GLuint id,
|
||||||
|
size_t bytes_size, size_t offset,
|
||||||
|
bool has_ownership, void* data)
|
||||||
|
: GlBuffer(target, id, bytes_size, offset, has_ownership), data_(data) {}
|
||||||
|
|
||||||
|
GlPersistentBuffer::GlPersistentBuffer()
|
||||||
|
: GlPersistentBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false,
|
||||||
|
nullptr) {}
|
||||||
|
|
||||||
|
GlPersistentBuffer::GlPersistentBuffer(GlPersistentBuffer&& buffer)
|
||||||
|
: GlBuffer(std::move(buffer)), data_(buffer.data_) {}
|
||||||
|
|
||||||
|
GlPersistentBuffer& GlPersistentBuffer::operator=(GlPersistentBuffer&& buffer) {
|
||||||
|
if (this != &buffer) {
|
||||||
|
data_ = buffer.data_;
|
||||||
|
GlBuffer::operator=(std::move(buffer));
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
GlPersistentBuffer::~GlPersistentBuffer() {
|
||||||
|
if (!data_) return;
|
||||||
|
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id());
|
||||||
|
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer) {
|
||||||
|
PFNGLBUFFERSTORAGEEXTPROC glBufferStorageEXT = nullptr;
|
||||||
|
glBufferStorageEXT = reinterpret_cast<PFNGLBUFFERSTORAGEEXTPROC>(
|
||||||
|
eglGetProcAddress("glBufferStorageEXT"));
|
||||||
|
if (!glBufferStorageEXT) {
|
||||||
|
return UnavailableError("glBufferStorageEXT is not supported");
|
||||||
|
}
|
||||||
|
gl_buffer_internal::BufferId id;
|
||||||
|
gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
|
||||||
|
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
|
||||||
|
glBufferStorageEXT, GL_SHADER_STORAGE_BUFFER, size, nullptr,
|
||||||
|
GL_MAP_COHERENT_BIT_EXT | GL_MAP_READ_BIT | GL_MAP_WRITE_BIT |
|
||||||
|
GL_MAP_PERSISTENT_BIT_EXT));
|
||||||
|
void* data = nullptr;
|
||||||
|
RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(
|
||||||
|
glMapBufferRange, &data, GL_SHADER_STORAGE_BUFFER, 0, size,
|
||||||
|
GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT));
|
||||||
|
*gl_buffer = GlPersistentBuffer{
|
||||||
|
GL_SHADER_STORAGE_BUFFER, id.Release(), size, 0, true, data};
|
||||||
|
return OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace gl
|
} // namespace gl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -141,6 +141,32 @@ Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
|
|||||||
absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
|
absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Persistent buffer provides CPU pointer to the buffer that is valid all the
|
||||||
|
// time. A user should properly synchronize the access to the buffer on CPU and
|
||||||
|
// GPU sides.
|
||||||
|
class GlPersistentBuffer : public GlBuffer {
|
||||||
|
public:
|
||||||
|
GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
|
||||||
|
bool has_ownership, void* data);
|
||||||
|
GlPersistentBuffer();
|
||||||
|
|
||||||
|
// Move-only
|
||||||
|
GlPersistentBuffer(GlPersistentBuffer&& buffer);
|
||||||
|
GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
|
||||||
|
GlPersistentBuffer(const GlPersistentBuffer&) = delete;
|
||||||
|
GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
|
||||||
|
|
||||||
|
~GlPersistentBuffer();
|
||||||
|
|
||||||
|
void* data() { return data_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
void* data_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Creates read-write persistent buffer with valid CPU pointer
|
||||||
|
Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Implementation details are below.
|
// Implementation details are below.
|
||||||
|
|
||||||
|
@ -78,6 +78,46 @@ Status GlActiveSyncWait() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status GlShaderSync::NewSync(GlShaderSync* gl_sync) {
|
||||||
|
GlShaderSync sync;
|
||||||
|
RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_));
|
||||||
|
static const std::string* kCode = new std::string(R"(#version 310 es
|
||||||
|
layout(local_size_x = 1, local_size_y = 1) in;
|
||||||
|
layout(std430) buffer;
|
||||||
|
layout(binding = 0) buffer Output {
|
||||||
|
int elements[];
|
||||||
|
} output_data;
|
||||||
|
void main() {
|
||||||
|
output_data.elements[0] = 1;
|
||||||
|
})");
|
||||||
|
GlShader shader;
|
||||||
|
RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader));
|
||||||
|
RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_));
|
||||||
|
*gl_sync = std::move(sync);
|
||||||
|
return OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// How it works: GPU writes a buffer and CPU checks the buffer value to be
|
||||||
|
// changed. The buffer is accessible for writing by GPU and reading by CPU
|
||||||
|
// simultaneously - persistent buffer or buffer across shild context can be used
|
||||||
|
// for that.
|
||||||
|
Status GlShaderSync::Wait() {
|
||||||
|
if (!flag_buffer_.is_valid()) {
|
||||||
|
return UnavailableError("GlShaderSync is not initialized.");
|
||||||
|
}
|
||||||
|
RETURN_IF_ERROR(flag_buffer_.BindToIndex(0));
|
||||||
|
volatile int* flag_ptr_ = reinterpret_cast<int*>(flag_buffer_.data());
|
||||||
|
*flag_ptr_ = 0;
|
||||||
|
RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1}));
|
||||||
|
// glFlush must be called to upload GPU task. Adreno won't start executing
|
||||||
|
// the task without glFlush.
|
||||||
|
glFlush();
|
||||||
|
// Wait for the value is being updated by the shader.
|
||||||
|
while (*flag_ptr_ != 1) {
|
||||||
|
}
|
||||||
|
return OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace gl
|
} // namespace gl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -17,7 +17,9 @@ limitations under the License.
|
|||||||
#define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_
|
#define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_SYNC_H_
|
||||||
|
|
||||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
|
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/gl/gl_program.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
|
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
@ -75,10 +77,28 @@ class GlSync {
|
|||||||
// Waits until GPU is done with processing.
|
// Waits until GPU is done with processing.
|
||||||
Status GlSyncWait();
|
Status GlSyncWait();
|
||||||
|
|
||||||
// Performs active waiting by spinning a thread and checking sync status. It
|
// Waits until all comands are flushed and then performs active waiting by
|
||||||
// leads to shorter wait time (up to tens of ms) but consumes more CPU.
|
// spinning a thread and checking sync status. It leads to shorter wait time
|
||||||
|
// (up to tens of ms) but consumes more CPU.
|
||||||
Status GlActiveSyncWait();
|
Status GlActiveSyncWait();
|
||||||
|
|
||||||
|
// Performs the best available minimum latency finish. A calling thread is not
|
||||||
|
// going to sleep keeping active busy wait.
|
||||||
|
// 1) CPU checks the value in the buffer that is going to be written by GPU. The
|
||||||
|
// persistent buffer is used if the extension is available.
|
||||||
|
// 2) glSync is checked for the signalling state in a loop.
|
||||||
|
// 3) glFinish() is performed if all other methods are not available
|
||||||
|
class GlShaderSync {
|
||||||
|
public:
|
||||||
|
static Status NewSync(GlShaderSync* gl_sync);
|
||||||
|
GlShaderSync() {}
|
||||||
|
Status Wait();
|
||||||
|
|
||||||
|
private:
|
||||||
|
GlProgram flag_program_;
|
||||||
|
GlPersistentBuffer flag_buffer_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace gl
|
} // namespace gl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
Loading…
Reference in New Issue
Block a user