STT-tensorflow/tensorflow/lite/delegates/gpu/gl/command_queue.cc
Juhyun Lee fb772b781b Published the GPU delegates.
PiperOrigin-RevId: 240848313
2019-03-28 14:09:57 -07:00

86 lines
2.8 KiB
C++

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/gl/command_queue.h"
#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
#include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
namespace tflite {
namespace gpu {
namespace gl {
namespace {
class DefaultCommandQueue : public CommandQueue {
public:
Status Dispatch(const GlProgram& program, const uint3& workgroups) override {
RETURN_IF_ERROR(program.Dispatch(workgroups));
return TFLITE_GPU_CALL_GL(glMemoryBarrier, GL_ALL_BARRIER_BITS);
}
Status WaitForCompletion() override {
// TODO(akulik): may be let a user to choose what wait method to use.
return GlActiveSyncWait();
}
};
// On Adreno do flush periodically as this affects performance. Command queue
// needs to be manually managed to ensure that accumulated work goes to GPU as
// fast as it can.
//
// Also, on older Adreno devices glFlush is required after every memory barrier
// to avoid hitting GPU driver bug.
class AdrenoCommandQueue : public DefaultCommandQueue {
public:
explicit AdrenoCommandQueue(int flush_every_n)
: flush_every_n_(flush_every_n) {}
Status Dispatch(const GlProgram& program, const uint3& workgroups) final {
RETURN_IF_ERROR(DefaultCommandQueue::Dispatch(program, workgroups));
if ((++program_counter_ % flush_every_n_) == 0) {
glFlush();
}
return OkStatus();
}
private:
const int flush_every_n_;
int program_counter_ = 0;
};
} // namespace
std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) {
if (gpu_info.type == GpuType::ADRENO) {
int flush_every_n = 1;
// On Adreno 630 and Adreno 505 there is up to 2x performance boost when
// glFlush happens not so often.
if (gpu_info.gpu_model == GpuModel::ADRENO630 ||
gpu_info.gpu_model == GpuModel::ADRENO505) {
flush_every_n = 10;
}
return absl::make_unique<AdrenoCommandQueue>(flush_every_n);
}
return absl::make_unique<DefaultCommandQueue>();
}
} // namespace gl
} // namespace gpu
} // namespace tflite